; /* sp_x86_64_asm.asm */
; /*
;  * Copyright (C) 2006-2023 wolfSSL Inc.
;  *
;  * This file is part of wolfSSL.
;  *
;  * wolfSSL is free software; you can redistribute it and/or modify
;  * it under the terms of the GNU General Public License as published by
;  * the Free Software Foundation; either version 2 of the License, or
;  * (at your option) any later version.
;  *
;  * wolfSSL is distributed in the hope that it will be useful,
;  * but WITHOUT ANY WARRANTY; without even the implied warranty of
;  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;  * GNU General Public License for more details.
;  *
;  * You should have received a copy of the GNU General Public License
;  * along with this program; if not, write to the Free Software
;  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
;  */
IF @Version LT 1200
; AVX2 instructions not recognized by old versions of MASM
IFNDEF NO_AVX2_SUPPORT
NO_AVX2_SUPPORT = 1
ENDIF
; MOVBE instruction not recognized by old versions of MASM
IFNDEF NO_MOVBE_SUPPORT
NO_MOVBE_SUPPORT = 1
ENDIF
ENDIF

IFNDEF HAVE_INTEL_AVX1
HAVE_INTEL_AVX1 = 1
ENDIF
IFNDEF NO_AVX2_SUPPORT
HAVE_INTEL_AVX2 = 1
ENDIF

IFNDEF _WIN64
_WIN64 = 1
ENDIF

IFNDEF WOLFSSL_SP_NO_2048
IFNDEF WOLFSSL_SP_NO_2048
; /* Read big endian unsigned byte array into r.
;  * Uses the bswap instruction.
;  *
;  * r  A single precision integer.
;  * size  Maximum number of bytes to convert
;  * a  Byte array.
;  * n  Number of bytes in array to read.
;  */
_text SEGMENT READONLY PARA
sp_2048_from_bin_bswap PROC
        push	r12
        push	r13
        mov	r11, r8
        mov	r12, rcx
        add	r11, r9
        add	r12, 256
        xor	r13, r13
        jmp	L_2048_from_bin_bswap_64_end
L_2048_from_bin_bswap_64_start:
        sub	r11, 64
        mov	rax, QWORD PTR [r11+56]
        mov	r10, QWORD PTR [r11+48]
        bswap	rax
        bswap	r10
        mov	QWORD PTR [rcx], rax
        mov	QWORD PTR [rcx+8], r10
        mov	rax, QWORD PTR [r11+40]
        mov	r10, QWORD PTR [r11+32]
        bswap	rax
        bswap	r10
        mov	QWORD PTR [rcx+16], rax
        mov	QWORD PTR [rcx+24], r10
        mov	rax, QWORD PTR [r11+24]
        mov	r10, QWORD PTR [r11+16]
        bswap	rax
        bswap	r10
        mov	QWORD PTR [rcx+32], rax
        mov	QWORD PTR [rcx+40], r10
        mov	rax, QWORD PTR [r11+8]
        mov	r10, QWORD PTR [r11]
        bswap	rax
        bswap	r10
        mov	QWORD PTR [rcx+48], rax
        mov	QWORD PTR [rcx+56], r10
        add	rcx, 64
        sub	r9, 64
L_2048_from_bin_bswap_64_end:
        cmp	r9, 63
        jg	L_2048_from_bin_bswap_64_start
        jmp	L_2048_from_bin_bswap_8_end
L_2048_from_bin_bswap_8_start:
        sub	r11, 8
        mov	rax, QWORD PTR [r11]
        bswap	rax
        mov	QWORD PTR [rcx], rax
        add	rcx, 8
        sub	r9, 8
L_2048_from_bin_bswap_8_end:
        cmp	r9, 7
        jg	L_2048_from_bin_bswap_8_start
        cmp	r9, r13
        je	L_2048_from_bin_bswap_hi_end
        mov	r10, r13
        mov	rax, r13
L_2048_from_bin_bswap_hi_start:
        mov	al, BYTE PTR [r8]
        shl	r10, 8
        inc	r8
        add	r10, rax
        dec	r9
        jg	L_2048_from_bin_bswap_hi_start
        mov	QWORD PTR [rcx], r10
        add	rcx, 8
L_2048_from_bin_bswap_hi_end:
        cmp	rcx, r12
        jge	L_2048_from_bin_bswap_zero_end
L_2048_from_bin_bswap_zero_start:
        mov	QWORD PTR [rcx], r13
        add	rcx, 8
        cmp	rcx, r12
        jl	L_2048_from_bin_bswap_zero_start
L_2048_from_bin_bswap_zero_end:
        pop	r13
        pop	r12
        ret
sp_2048_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
;  * Uses the movbe instruction which is an optional instruction.
;  *
;  * r  A single precision integer.
;  * size  Maximum number of bytes to convert
;  * a  Byte array.
;  * n  Number of bytes in array to read.
;  */
_text SEGMENT READONLY PARA
sp_2048_from_bin_movbe PROC
        push	r12
        mov	r11, r8
        mov	r12, rcx
        add	r11, r9
        add	r12, 256
        jmp	L_2048_from_bin_movbe_64_end
L_2048_from_bin_movbe_64_start:
        sub	r11, 64
        movbe	rax, QWORD PTR [r11+56]
        movbe	r10, QWORD PTR [r11+48]
        mov	QWORD PTR [rcx], rax
        mov	QWORD PTR [rcx+8], r10
        movbe	rax, QWORD PTR [r11+40]
        movbe	r10, QWORD PTR [r11+32]
        mov	QWORD PTR [rcx+16], rax
        mov	QWORD PTR [rcx+24], r10
        movbe	rax, QWORD PTR [r11+24]
        movbe	r10, QWORD PTR [r11+16]
        mov	QWORD PTR [rcx+32], rax
        mov	QWORD PTR [rcx+40], r10
        movbe	rax, QWORD PTR [r11+8]
        movbe	r10, QWORD PTR [r11]
        mov	QWORD PTR [rcx+48], rax
        mov	QWORD PTR [rcx+56], r10
        add	rcx, 64
        sub	r9, 64
L_2048_from_bin_movbe_64_end:
        cmp	r9, 63
        jg	L_2048_from_bin_movbe_64_start
        jmp	L_2048_from_bin_movbe_8_end
L_2048_from_bin_movbe_8_start:
        sub	r11, 8
        movbe	rax, QWORD PTR [r11]
        mov	QWORD PTR [rcx], rax
        add	rcx, 8
        sub	r9, 8
L_2048_from_bin_movbe_8_end:
        cmp	r9, 7
        jg	L_2048_from_bin_movbe_8_start
        cmp	r9, 0
        je	L_2048_from_bin_movbe_hi_end
        mov	r10, 0
        mov	rax, 0
L_2048_from_bin_movbe_hi_start:
        mov	al, BYTE PTR [r8]
        shl	r10, 8
        inc	r8
        add	r10, rax
        dec	r9
        jg	L_2048_from_bin_movbe_hi_start
        mov	QWORD PTR [rcx], r10
        add	rcx, 8
L_2048_from_bin_movbe_hi_end:
        cmp	rcx, r12
        jge	L_2048_from_bin_movbe_zero_end
L_2048_from_bin_movbe_zero_start:
        mov	QWORD PTR [rcx], 0
        add	rcx, 8
        cmp	rcx, r12
        jl	L_2048_from_bin_movbe_zero_start
L_2048_from_bin_movbe_zero_end:
        pop	r12
        ret
sp_2048_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
;  * Fixed length number of bytes written: 256
;  * Uses the bswap instruction.
;  *
;  * r  A single precision integer.
;  * a  Byte array.
;  */
_text SEGMENT READONLY PARA
sp_2048_to_bin_bswap_32 PROC
        mov	rax, QWORD PTR [rcx+248]
        mov	r8, QWORD PTR [rcx+240]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx], rax
        mov	QWORD PTR [rdx+8], r8
        mov	rax, QWORD PTR [rcx+232]
        mov	r8, QWORD PTR [rcx+224]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+16], rax
        mov	QWORD PTR [rdx+24], r8
        mov	rax, QWORD PTR [rcx+216]
        mov	r8, QWORD PTR [rcx+208]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+32], rax
        mov	QWORD PTR [rdx+40], r8
        mov	rax, QWORD PTR [rcx+200]
        mov	r8, QWORD PTR [rcx+192]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+48], rax
        mov	QWORD PTR [rdx+56], r8
        mov	rax, QWORD PTR [rcx+184]
        mov	r8, QWORD PTR [rcx+176]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+64], rax
        mov	QWORD PTR [rdx+72], r8
        mov	rax, QWORD PTR [rcx+168]
        mov	r8, QWORD PTR [rcx+160]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+80], rax
        mov	QWORD PTR [rdx+88], r8
        mov	rax, QWORD PTR [rcx+152]
        mov	r8, QWORD PTR [rcx+144]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+96], rax
        mov	QWORD PTR [rdx+104], r8
        mov	rax, QWORD PTR [rcx+136]
        mov	r8, QWORD PTR [rcx+128]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+112], rax
        mov	QWORD PTR [rdx+120], r8
        mov	rax, QWORD PTR [rcx+120]
        mov	r8, QWORD PTR [rcx+112]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+128], rax
        mov	QWORD PTR [rdx+136], r8
        mov	rax, QWORD PTR [rcx+104]
        mov	r8, QWORD PTR [rcx+96]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+144], rax
        mov	QWORD PTR [rdx+152], r8
        mov	rax, QWORD PTR [rcx+88]
        mov	r8, QWORD PTR [rcx+80]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+160], rax
        mov	QWORD PTR [rdx+168], r8
        mov	rax, QWORD PTR [rcx+72]
        mov	r8, QWORD PTR [rcx+64]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+176], rax
        mov	QWORD PTR [rdx+184], r8
        mov	rax, QWORD PTR [rcx+56]
        mov	r8, QWORD PTR [rcx+48]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+192], rax
        mov	QWORD PTR [rdx+200], r8
        mov	rax, QWORD PTR [rcx+40]
        mov	r8, QWORD PTR [rcx+32]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+208], rax
        mov	QWORD PTR [rdx+216], r8
        mov	rax, QWORD PTR [rcx+24]
        mov	r8, QWORD PTR [rcx+16]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+224], rax
        mov	QWORD PTR [rdx+232], r8
        mov	rax, QWORD PTR [rcx+8]
        mov	r8, QWORD PTR [rcx]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+240], rax
        mov	QWORD PTR [rdx+248], r8
        ret
sp_2048_to_bin_bswap_32 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
;  * Fixed length number of bytes written: 256
;  * Uses the movbe instruction which is optional.
;  *
;  * r  A single precision integer.
;  * a  Byte array.
;  */
_text SEGMENT READONLY PARA
sp_2048_to_bin_movbe_32 PROC
        movbe	rax, QWORD PTR [rcx+248]
        movbe	r8, QWORD PTR [rcx+240]
        mov	QWORD PTR [rdx], rax
        mov	QWORD PTR [rdx+8], r8
        movbe	rax, QWORD PTR [rcx+232]
        movbe	r8, QWORD PTR [rcx+224]
        mov	QWORD PTR [rdx+16], rax
        mov	QWORD PTR [rdx+24], r8
        movbe	rax, QWORD PTR [rcx+216]
        movbe	r8, QWORD PTR [rcx+208]
        mov	QWORD PTR [rdx+32], rax
        mov	QWORD PTR [rdx+40], r8
        movbe	rax, QWORD PTR [rcx+200]
        movbe	r8, QWORD PTR [rcx+192]
        mov	QWORD PTR [rdx+48], rax
        mov	QWORD PTR [rdx+56], r8
        movbe	rax, QWORD PTR [rcx+184]
        movbe	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rdx+64], rax
        mov	QWORD PTR [rdx+72], r8
        movbe	rax, QWORD PTR [rcx+168]
        movbe	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rdx+80], rax
        mov	QWORD PTR [rdx+88], r8
        movbe	rax, QWORD PTR [rcx+152]
        movbe	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rdx+96], rax
        mov	QWORD PTR [rdx+104], r8
        movbe	rax, QWORD PTR [rcx+136]
        movbe	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rdx+112], rax
        mov	QWORD PTR [rdx+120], r8
        movbe	rax, QWORD PTR [rcx+120]
        movbe	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rdx+128], rax
        mov	QWORD PTR [rdx+136], r8
        movbe	rax, QWORD PTR [rcx+104]
        movbe	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rdx+144], rax
        mov	QWORD PTR [rdx+152], r8
        movbe	rax, QWORD PTR [rcx+88]
        movbe	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rdx+160], rax
        mov	QWORD PTR [rdx+168], r8
        movbe	rax, QWORD PTR [rcx+72]
        movbe	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rdx+176], rax
        mov	QWORD PTR [rdx+184], r8
        movbe	rax, QWORD PTR [rcx+56]
        movbe	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rdx+192], rax
        mov	QWORD PTR [rdx+200], r8
        movbe	rax, QWORD PTR [rcx+40]
        movbe	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rdx+208], rax
        mov	QWORD PTR [rdx+216], r8
        movbe	rax, QWORD PTR [rcx+24]
        movbe	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rdx+224], rax
        mov	QWORD PTR [rdx+232], r8
        movbe	rax, QWORD PTR [rcx+8]
        movbe	r8, QWORD PTR [rcx]
        mov	QWORD PTR [rdx+240], rax
        mov	QWORD PTR [rdx+248], r8
        ret
sp_2048_to_bin_movbe_32 ENDP
_text ENDS
ENDIF
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_mul_16 PROC
        push	r12
        mov	r9, rdx
        sub	rsp, 128
        ; A[0] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9]
        xor	r12, r12
        mov	QWORD PTR [rsp], rax
        mov	r11, rdx
        ; A[0] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+8], r11
        ; A[0] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+16], r12
        ; A[0] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[1] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+8]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rsp+24], r10
        ; A[0] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+16]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+32], r11
        ; A[0] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+24]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+40], r12
        ; A[0] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[1] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+8]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+32]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rsp+48], r10
        ; A[0] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+16]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+40]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+56], r11
        ; A[0] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+24]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+48]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+64], r12
        ; A[0] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[1] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+8]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+32]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+56]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rsp+72], r10
        ; A[0] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+16]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+40]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+64]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+80], r11
        ; A[0] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+24]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+48]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+72]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+88], r12
        ; A[0] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[1] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+8]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+32]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+56]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+80]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[12] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+96]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rsp+96], r10
        ; A[0] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+16]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+40]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+64]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[12] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+96]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[13] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+104]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+104], r11
        ; A[0] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+24]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+48]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+72]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+96]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[13] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+104]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[14] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+112]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+112], r12
        ; A[0] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[1] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+8]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+32]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+56]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+80]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[12] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+96]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+104]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[14] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+112]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[15] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+120]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rsp+120], r10
        ; A[1] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+8]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+16]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+40]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+64]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[12] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+96]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[13] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+104]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+112]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[15] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+120]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+128], r11
        ; A[2] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+16]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+24]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+48]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+72]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+96]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[13] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+104]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[14] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+112]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+120]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+136], r12
        ; A[3] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+24]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+32]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+56]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+80]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[12] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+96]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+104]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[14] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+112]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[15] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+120]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rcx+144], r10
        ; A[4] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+32]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+40]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+64]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[12] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+96]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[13] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+104]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+112]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[15] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+120]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+152], r11
        ; A[5] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+40]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+48]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+72]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+96]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[13] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+104]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[14] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+112]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+120]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+160], r12
        ; A[6] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+48]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+56]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+80]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[12] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+96]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+104]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[14] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+112]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[15] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+120]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rcx+168], r10
        ; A[7] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+56]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+64]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[12] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+96]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[13] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+104]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+112]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[15] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+120]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+176], r11
        ; A[8] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+64]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+72]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+96]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[13] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+104]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[14] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+112]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+120]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+184], r12
        ; A[9] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+72]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+80]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[12] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+96]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+104]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[14] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+112]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[15] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+120]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rcx+192], r10
        ; A[10] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+80]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[12] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+96]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[13] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+104]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+112]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[15] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+120]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+200], r11
        ; A[11] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+88]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+96]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[13] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+104]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[14] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+112]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+120]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+208], r12
        ; A[12] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+96]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+104]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[14] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+112]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[15] * B[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r9+120]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rcx+216], r10
        ; A[13] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+104]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+112]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[15] * B[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r9+120]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+224], r11
        ; A[14] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+112]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r9+120]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+232], r12
        ; A[15] * B[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r9+120]
        add	r10, rax
        adc	r11, rdx
        mov	QWORD PTR [rcx+240], r10
        mov	QWORD PTR [rcx+248], r11
        mov	rax, QWORD PTR [rsp]
        mov	rdx, QWORD PTR [rsp+8]
        mov	r10, QWORD PTR [rsp+16]
        mov	r11, QWORD PTR [rsp+24]
        mov	QWORD PTR [rcx], rax
        mov	QWORD PTR [rcx+8], rdx
        mov	QWORD PTR [rcx+16], r10
        mov	QWORD PTR [rcx+24], r11
        mov	rax, QWORD PTR [rsp+32]
        mov	rdx, QWORD PTR [rsp+40]
        mov	r10, QWORD PTR [rsp+48]
        mov	r11, QWORD PTR [rsp+56]
        mov	QWORD PTR [rcx+32], rax
        mov	QWORD PTR [rcx+40], rdx
        mov	QWORD PTR [rcx+48], r10
        mov	QWORD PTR [rcx+56], r11
        mov	rax, QWORD PTR [rsp+64]
        mov	rdx, QWORD PTR [rsp+72]
        mov	r10, QWORD PTR [rsp+80]
        mov	r11, QWORD PTR [rsp+88]
        mov	QWORD PTR [rcx+64], rax
        mov	QWORD PTR [rcx+72], rdx
        mov	QWORD PTR [rcx+80], r10
        mov	QWORD PTR [rcx+88], r11
        mov	rax, QWORD PTR [rsp+96]
        mov	rdx, QWORD PTR [rsp+104]
        mov	r10, QWORD PTR [rsp+112]
        mov	r11, QWORD PTR [rsp+120]
        mov	QWORD PTR [rcx+96], rax
        mov	QWORD PTR [rcx+104], rdx
        mov	QWORD PTR [rcx+112], r10
        mov	QWORD PTR [rcx+120], r11
        add	rsp, 128
        pop	r12
        ret
sp_2048_mul_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r   Result of multiplication.
;  * a   First number to multiply.
;  * b   Second number to multiply.
;  */
_text SEGMENT READONLY PARA
sp_2048_mul_avx2_16 PROC
        push	rbx
        push	rbp
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        mov	rbp, r8
        mov	r8, rcx
        mov	r9, rdx
        sub	rsp, 128
        cmp	r9, r8
        mov	rbx, rsp
        cmovne	rbx, r8
        cmp	rbp, r8
        cmove	rbx, rsp
        add	r8, 128
        xor	rdi, rdi
        mov	rdx, QWORD PTR [r9]
        ; A[0] * B[0]
        mulx	r11, r10, QWORD PTR [rbp]
        ; A[0] * B[1]
        mulx	r12, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx], r10
        adcx	r11, rax
        ; A[0] * B[2]
        mulx	r13, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+8], r11
        adcx	r12, rax
        ; A[0] * B[3]
        mulx	r14, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+16], r12
        adcx	r13, rax
        mov	QWORD PTR [rbx+24], r13
        ; A[0] * B[4]
        mulx	r10, rax, QWORD PTR [rbp+32]
        adcx	r14, rax
        ; A[0] * B[5]
        mulx	r11, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+32], r14
        adcx	r10, rax
        ; A[0] * B[6]
        mulx	r12, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+40], r10
        adcx	r11, rax
        ; A[0] * B[7]
        mulx	r13, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+48], r11
        adcx	r12, rax
        mov	QWORD PTR [rbx+56], r12
        ; A[0] * B[8]
        mulx	r14, rax, QWORD PTR [rbp+64]
        adcx	r13, rax
        ; A[0] * B[9]
        mulx	r10, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+64], r13
        adcx	r14, rax
        ; A[0] * B[10]
        mulx	r11, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [rbx+72], r14
        adcx	r10, rax
        ; A[0] * B[11]
        mulx	r12, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        mov	QWORD PTR [rbx+88], r11
        ; A[0] * B[12]
        mulx	r13, rax, QWORD PTR [rbp+96]
        adcx	r12, rax
        ; A[0] * B[13]
        mulx	r14, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        ; A[0] * B[14]
        mulx	r10, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        ; A[0] * B[15]
        mulx	r11, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adcx	r11, rdi
        mov	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [rbx+120], r10
        mov	QWORD PTR [r8], r11
        mov	rdx, QWORD PTR [r9+8]
        mov	r11, QWORD PTR [rbx+8]
        mov	r12, QWORD PTR [rbx+16]
        mov	r13, QWORD PTR [rbx+24]
        mov	r14, QWORD PTR [rbx+32]
        mov	r10, QWORD PTR [rbx+40]
        ; A[1] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r11, rax
        adox	r12, rcx
        ; A[1] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+8], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[1] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+16], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[1] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+24], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+32], r14
        mov	r11, QWORD PTR [rbx+48]
        mov	r12, QWORD PTR [rbx+56]
        mov	r13, QWORD PTR [rbx+64]
        mov	r14, QWORD PTR [rbx+72]
        ; A[1] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r10, rax
        adox	r11, rcx
        ; A[1] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+40], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[1] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+48], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[1] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+56], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [rbx+64], r13
        mov	r10, QWORD PTR [rbx+80]
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        ; A[1] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r14, rax
        adox	r10, rcx
        ; A[1] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+72], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[1] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[1] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [rbx+96], r12
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        ; A[1] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r13, rax
        adox	r14, rcx
        ; A[1] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[1] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[1] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [rbx+120], r10
        mov	r12, rdi
        adcx	r11, rax
        adox	r12, rcx
        adcx	r12, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8], r11
        mov	QWORD PTR [r8+8], r12
        mov	rdx, QWORD PTR [r9+16]
        mov	r12, QWORD PTR [rbx+16]
        mov	r13, QWORD PTR [rbx+24]
        mov	r14, QWORD PTR [rbx+32]
        mov	r10, QWORD PTR [rbx+40]
        mov	r11, QWORD PTR [rbx+48]
        ; A[2] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r12, rax
        adox	r13, rcx
        ; A[2] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+16], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[2] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+24], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[2] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+32], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+40], r10
        mov	r12, QWORD PTR [rbx+56]
        mov	r13, QWORD PTR [rbx+64]
        mov	r14, QWORD PTR [rbx+72]
        mov	r10, QWORD PTR [rbx+80]
        ; A[2] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r11, rax
        adox	r12, rcx
        ; A[2] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+48], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[2] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+56], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[2] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+64], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+72], r14
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        ; A[2] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r10, rax
        adox	r11, rcx
        ; A[2] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[2] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[2] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [rbx+104], r13
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        ; A[2] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r14, rax
        adox	r10, rcx
        ; A[2] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[2] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[2] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8], r11
        mov	r13, rdi
        adcx	r12, rax
        adox	r13, rcx
        adcx	r13, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+8], r12
        mov	QWORD PTR [r8+16], r13
        mov	rdx, QWORD PTR [r9+24]
        mov	r13, QWORD PTR [rbx+24]
        mov	r14, QWORD PTR [rbx+32]
        mov	r10, QWORD PTR [rbx+40]
        mov	r11, QWORD PTR [rbx+48]
        mov	r12, QWORD PTR [rbx+56]
        ; A[3] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r13, rax
        adox	r14, rcx
        ; A[3] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+24], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[3] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+32], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[3] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+40], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+48], r11
        mov	r13, QWORD PTR [rbx+64]
        mov	r14, QWORD PTR [rbx+72]
        mov	r10, QWORD PTR [rbx+80]
        mov	r11, QWORD PTR [rbx+88]
        ; A[3] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r12, rax
        adox	r13, rcx
        ; A[3] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+56], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[3] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+64], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[3] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+72], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+80], r10
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        ; A[3] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r11, rax
        adox	r12, rcx
        ; A[3] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[3] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[3] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+112], r14
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        ; A[3] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r10, rax
        adox	r11, rcx
        ; A[3] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[3] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[3] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+8], r12
        mov	r14, rdi
        adcx	r13, rax
        adox	r14, rcx
        adcx	r14, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+16], r13
        mov	QWORD PTR [r8+24], r14
        mov	rdx, QWORD PTR [r9+32]
        mov	r14, QWORD PTR [rbx+32]
        mov	r10, QWORD PTR [rbx+40]
        mov	r11, QWORD PTR [rbx+48]
        mov	r12, QWORD PTR [rbx+56]
        mov	r13, QWORD PTR [rbx+64]
        ; A[4] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r14, rax
        adox	r10, rcx
        ; A[4] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+32], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[4] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+40], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[4] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+48], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [rbx+56], r12
        mov	r14, QWORD PTR [rbx+72]
        mov	r10, QWORD PTR [rbx+80]
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        ; A[4] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r13, rax
        adox	r14, rcx
        ; A[4] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+64], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[4] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+72], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[4] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+88], r11
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        ; A[4] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r12, rax
        adox	r13, rcx
        ; A[4] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[4] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[4] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+120], r10
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        ; A[4] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r11, rax
        adox	r12, rcx
        ; A[4] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[4] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[4] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+16], r13
        mov	r10, rdi
        adcx	r14, rax
        adox	r10, rcx
        adcx	r10, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+24], r14
        mov	QWORD PTR [r8+32], r10
        mov	rdx, QWORD PTR [r9+40]
        mov	r10, QWORD PTR [rbx+40]
        mov	r11, QWORD PTR [rbx+48]
        mov	r12, QWORD PTR [rbx+56]
        mov	r13, QWORD PTR [rbx+64]
        mov	r14, QWORD PTR [rbx+72]
        ; A[5] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r10, rax
        adox	r11, rcx
        ; A[5] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+40], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[5] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+48], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[5] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+56], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [rbx+64], r13
        mov	r10, QWORD PTR [rbx+80]
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        ; A[5] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r14, rax
        adox	r10, rcx
        ; A[5] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+72], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[5] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[5] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [rbx+96], r12
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        ; A[5] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r13, rax
        adox	r14, rcx
        ; A[5] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[5] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[5] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8], r11
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [r8+32]
        ; A[5] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r12, rax
        adox	r13, rcx
        ; A[5] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[5] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+16], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[5] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+24], r14
        mov	r11, rdi
        adcx	r10, rax
        adox	r11, rcx
        adcx	r11, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+32], r10
        mov	QWORD PTR [r8+40], r11
        mov	rdx, QWORD PTR [r9+48]
        mov	r11, QWORD PTR [rbx+48]
        mov	r12, QWORD PTR [rbx+56]
        mov	r13, QWORD PTR [rbx+64]
        mov	r14, QWORD PTR [rbx+72]
        mov	r10, QWORD PTR [rbx+80]
        ; A[6] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r11, rax
        adox	r12, rcx
        ; A[6] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+48], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[6] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+56], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[6] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+64], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+72], r14
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        ; A[6] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r10, rax
        adox	r11, rcx
        ; A[6] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[6] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[6] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [rbx+104], r13
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        ; A[6] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r14, rax
        adox	r10, rcx
        ; A[6] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[6] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[6] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r8+8], r12
        mov	r14, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        ; A[6] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r13, rax
        adox	r14, rcx
        ; A[6] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+16], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[6] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+24], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[6] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+32], r10
        mov	r12, rdi
        adcx	r11, rax
        adox	r12, rcx
        adcx	r12, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+40], r11
        mov	QWORD PTR [r8+48], r12
        mov	rdx, QWORD PTR [r9+56]
        mov	r12, QWORD PTR [rbx+56]
        mov	r13, QWORD PTR [rbx+64]
        mov	r14, QWORD PTR [rbx+72]
        mov	r10, QWORD PTR [rbx+80]
        mov	r11, QWORD PTR [rbx+88]
        ; A[7] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r12, rax
        adox	r13, rcx
        ; A[7] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+56], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[7] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+64], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[7] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+72], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+80], r10
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        ; A[7] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r11, rax
        adox	r12, rcx
        ; A[7] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[7] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[7] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+112], r14
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        ; A[7] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r10, rax
        adox	r11, rcx
        ; A[7] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[7] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[7] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [r8+16], r13
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [r8+48]
        ; A[7] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r14, rax
        adox	r10, rcx
        ; A[7] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+24], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[7] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+32], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[7] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+40], r11
        mov	r13, rdi
        adcx	r12, rax
        adox	r13, rcx
        adcx	r13, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+48], r12
        mov	QWORD PTR [r8+56], r13
        mov	rdx, QWORD PTR [r9+64]
        mov	r13, QWORD PTR [rbx+64]
        mov	r14, QWORD PTR [rbx+72]
        mov	r10, QWORD PTR [rbx+80]
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        ; A[8] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r13, rax
        adox	r14, rcx
        ; A[8] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+64], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[8] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+72], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[8] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+88], r11
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        ; A[8] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r12, rax
        adox	r13, rcx
        ; A[8] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[8] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[8] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+120], r10
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [r8+32]
        ; A[8] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r11, rax
        adox	r12, rcx
        ; A[8] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[8] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[8] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+16], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+24], r14
        mov	r11, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [r8+48]
        mov	r13, QWORD PTR [r8+56]
        ; A[8] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r10, rax
        adox	r11, rcx
        ; A[8] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+32], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[8] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+40], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[8] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+48], r12
        mov	r14, rdi
        adcx	r13, rax
        adox	r14, rcx
        adcx	r14, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+56], r13
        mov	QWORD PTR [r8+64], r14
        mov	rdx, QWORD PTR [r9+72]
        mov	r14, QWORD PTR [rbx+72]
        mov	r10, QWORD PTR [rbx+80]
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        ; A[9] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r14, rax
        adox	r10, rcx
        ; A[9] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+72], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[9] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[9] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [rbx+96], r12
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        ; A[9] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r13, rax
        adox	r14, rcx
        ; A[9] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[9] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[9] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8], r11
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        ; A[9] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r12, rax
        adox	r13, rcx
        ; A[9] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[9] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8+16], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[9] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+24], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+32], r10
        mov	r12, QWORD PTR [r8+48]
        mov	r13, QWORD PTR [r8+56]
        mov	r14, QWORD PTR [r8+64]
        ; A[9] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r11, rax
        adox	r12, rcx
        ; A[9] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+40], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[9] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+48], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[9] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+56], r13
        mov	r10, rdi
        adcx	r14, rax
        adox	r10, rcx
        adcx	r10, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+64], r14
        mov	QWORD PTR [r8+72], r10
        mov	rdx, QWORD PTR [r9+80]
        mov	r10, QWORD PTR [rbx+80]
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        ; A[10] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r10, rax
        adox	r11, rcx
        ; A[10] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+80], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[10] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[10] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [rbx+104], r13
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        ; A[10] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r14, rax
        adox	r10, rcx
        ; A[10] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[10] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[10] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r8+8], r12
        mov	r14, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [r8+48]
        ; A[10] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r13, rax
        adox	r14, rcx
        ; A[10] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+16], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[10] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8+24], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[10] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+32], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+40], r11
        mov	r13, QWORD PTR [r8+56]
        mov	r14, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [r8+72]
        ; A[10] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r12, rax
        adox	r13, rcx
        ; A[10] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+48], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[10] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+56], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[10] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+64], r14
        mov	r11, rdi
        adcx	r10, rax
        adox	r11, rcx
        adcx	r11, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+72], r10
        mov	QWORD PTR [r8+80], r11
        mov	rdx, QWORD PTR [r9+88]
        mov	r11, QWORD PTR [rbx+88]
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        ; A[11] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r11, rax
        adox	r12, rcx
        ; A[11] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+88], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[11] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[11] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+112], r14
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        ; A[11] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r10, rax
        adox	r11, rcx
        ; A[11] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[11] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[11] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [r8+16], r13
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [r8+48]
        mov	r13, QWORD PTR [r8+56]
        ; A[11] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r14, rax
        adox	r10, rcx
        ; A[11] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+24], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[11] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8+32], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[11] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+40], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r8+48], r12
        mov	r14, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [r8+72]
        mov	r11, QWORD PTR [r8+80]
        ; A[11] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r13, rax
        adox	r14, rcx
        ; A[11] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+56], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[11] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+64], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[11] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+72], r10
        mov	r12, rdi
        adcx	r11, rax
        adox	r12, rcx
        adcx	r12, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+80], r11
        mov	QWORD PTR [r8+88], r12
        mov	rdx, QWORD PTR [r9+96]
        mov	r12, QWORD PTR [rbx+96]
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        ; A[12] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r12, rax
        adox	r13, rcx
        ; A[12] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+96], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[12] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[12] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+120], r10
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [r8+32]
        ; A[12] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r11, rax
        adox	r12, rcx
        ; A[12] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[12] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[12] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+16], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+24], r14
        mov	r11, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [r8+48]
        mov	r13, QWORD PTR [r8+56]
        mov	r14, QWORD PTR [r8+64]
        ; A[12] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r10, rax
        adox	r11, rcx
        ; A[12] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+32], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[12] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8+40], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[12] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+48], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [r8+56], r13
        mov	r10, QWORD PTR [r8+72]
        mov	r11, QWORD PTR [r8+80]
        mov	r12, QWORD PTR [r8+88]
        ; A[12] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r14, rax
        adox	r10, rcx
        ; A[12] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+64], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[12] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+72], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[12] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+80], r11
        mov	r13, rdi
        adcx	r12, rax
        adox	r13, rcx
        adcx	r13, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+88], r12
        mov	QWORD PTR [r8+96], r13
        mov	rdx, QWORD PTR [r9+104]
        mov	r13, QWORD PTR [rbx+104]
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        ; A[13] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r13, rax
        adox	r14, rcx
        ; A[13] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+104], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[13] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[13] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8], r11
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        ; A[13] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r12, rax
        adox	r13, rcx
        ; A[13] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[13] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [r8+16], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[13] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+24], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+32], r10
        mov	r12, QWORD PTR [r8+48]
        mov	r13, QWORD PTR [r8+56]
        mov	r14, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [r8+72]
        ; A[13] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r11, rax
        adox	r12, rcx
        ; A[13] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+40], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[13] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8+48], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[13] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+56], r13
        adcx	r14, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+64], r14
        mov	r11, QWORD PTR [r8+80]
        mov	r12, QWORD PTR [r8+88]
        mov	r13, QWORD PTR [r8+96]
        ; A[13] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r10, rax
        adox	r11, rcx
        ; A[13] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+72], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[13] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+80], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[13] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+88], r12
        mov	r14, rdi
        adcx	r13, rax
        adox	r14, rcx
        adcx	r14, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+96], r13
        mov	QWORD PTR [r8+104], r14
        mov	rdx, QWORD PTR [r9+112]
        mov	r14, QWORD PTR [rbx+112]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        ; A[14] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r14, rax
        adox	r10, rcx
        ; A[14] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+112], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[14] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[14] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r8+8], r12
        mov	r14, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [r8+48]
        ; A[14] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r13, rax
        adox	r14, rcx
        ; A[14] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [r8+16], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[14] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [r8+24], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[14] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+32], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+40], r11
        mov	r13, QWORD PTR [r8+56]
        mov	r14, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [r8+72]
        mov	r11, QWORD PTR [r8+80]
        ; A[14] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r12, rax
        adox	r13, rcx
        ; A[14] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+48], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[14] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8+56], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[14] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+64], r14
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+72], r10
        mov	r12, QWORD PTR [r8+88]
        mov	r13, QWORD PTR [r8+96]
        mov	r14, QWORD PTR [r8+104]
        ; A[14] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r11, rax
        adox	r12, rcx
        ; A[14] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+80], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[14] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+88], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[14] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+96], r13
        mov	r10, rdi
        adcx	r14, rax
        adox	r10, rcx
        adcx	r10, r15
        mov	r15, rdi
        adox	r15, rdi
        adcx	r15, rdi
        mov	QWORD PTR [r8+104], r14
        mov	QWORD PTR [r8+112], r10
        mov	rdx, QWORD PTR [r9+120]
        mov	r10, QWORD PTR [rbx+120]
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        mov	r13, QWORD PTR [r8+16]
        mov	r14, QWORD PTR [r8+24]
        ; A[15] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r10, rax
        adox	r11, rcx
        ; A[15] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+120], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[15] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        mov	QWORD PTR [r8], r11
        adcx	r12, rax
        adox	r13, rcx
        ; A[15] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [r8+8], r12
        adcx	r13, rax
        adox	r14, rcx
        mov	QWORD PTR [r8+16], r13
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [r8+48]
        mov	r13, QWORD PTR [r8+56]
        ; A[15] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r14, rax
        adox	r10, rcx
        ; A[15] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [r8+24], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[15] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [r8+32], r10
        adcx	r11, rax
        adox	r12, rcx
        ; A[15] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+40], r11
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r8+48], r12
        mov	r14, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [r8+72]
        mov	r11, QWORD PTR [r8+80]
        mov	r12, QWORD PTR [r8+88]
        ; A[15] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r13, rax
        adox	r14, rcx
        ; A[15] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+56], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[15] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        mov	QWORD PTR [r8+64], r14
        adcx	r10, rax
        adox	r11, rcx
        ; A[15] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+72], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+80], r11
        mov	r13, QWORD PTR [r8+96]
        mov	r14, QWORD PTR [r8+104]
        mov	r10, QWORD PTR [r8+112]
        ; A[15] * B[12]
        mulx	rcx, rax, QWORD PTR [rbp+96]
        adcx	r12, rax
        adox	r13, rcx
        ; A[15] * B[13]
        mulx	rcx, rax, QWORD PTR [rbp+104]
        mov	QWORD PTR [r8+88], r12
        adcx	r13, rax
        adox	r14, rcx
        ; A[15] * B[14]
        mulx	rcx, rax, QWORD PTR [rbp+112]
        mov	QWORD PTR [r8+96], r13
        adcx	r14, rax
        adox	r10, rcx
        ; A[15] * B[15]
        mulx	rcx, rax, QWORD PTR [rbp+120]
        mov	QWORD PTR [r8+104], r14
        mov	r11, rdi
        adcx	r10, rax
        adox	r11, rcx
        adcx	r11, r15
        mov	QWORD PTR [r8+112], r10
        mov	QWORD PTR [r8+120], r11
        sub	r8, 128
        cmp	r9, r8
        je	L_start_2048_mul_avx2_16
        cmp	rbp, r8
        jne	L_end_2048_mul_avx2_16
L_start_2048_mul_avx2_16:
        vmovdqu	xmm0, OWORD PTR [rbx]
        vmovups	OWORD PTR [r8], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+16]
        vmovups	OWORD PTR [r8+16], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+32]
        vmovups	OWORD PTR [r8+32], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+48]
        vmovups	OWORD PTR [r8+48], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+64]
        vmovups	OWORD PTR [r8+64], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+80]
        vmovups	OWORD PTR [r8+80], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+96]
        vmovups	OWORD PTR [r8+96], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+112]
        vmovups	OWORD PTR [r8+112], xmm0
L_end_2048_mul_avx2_16:
        add	rsp, 128
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        pop	rbp
        pop	rbx
        ret
sp_2048_mul_avx2_16 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_add_16 PROC
        ; Add
        mov	r9, QWORD PTR [rdx]
        xor	rax, rax
        add	r9, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx+8]
        mov	QWORD PTR [rcx], r9
        adc	r10, QWORD PTR [r8+8]
        mov	r9, QWORD PTR [rdx+16]
        mov	QWORD PTR [rcx+8], r10
        adc	r9, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [rdx+24]
        mov	QWORD PTR [rcx+16], r9
        adc	r10, QWORD PTR [r8+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [rcx+24], r10
        adc	r9, QWORD PTR [r8+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [rcx+32], r9
        adc	r10, QWORD PTR [r8+40]
        mov	r9, QWORD PTR [rdx+48]
        mov	QWORD PTR [rcx+40], r10
        adc	r9, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+56]
        mov	QWORD PTR [rcx+48], r9
        adc	r10, QWORD PTR [r8+56]
        mov	r9, QWORD PTR [rdx+64]
        mov	QWORD PTR [rcx+56], r10
        adc	r9, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [rdx+72]
        mov	QWORD PTR [rcx+64], r9
        adc	r10, QWORD PTR [r8+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [rcx+72], r10
        adc	r9, QWORD PTR [r8+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [rcx+80], r9
        adc	r10, QWORD PTR [r8+88]
        mov	r9, QWORD PTR [rdx+96]
        mov	QWORD PTR [rcx+88], r10
        adc	r9, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+104]
        mov	QWORD PTR [rcx+96], r9
        adc	r10, QWORD PTR [r8+104]
        mov	r9, QWORD PTR [rdx+112]
        mov	QWORD PTR [rcx+104], r10
        adc	r9, QWORD PTR [r8+112]
        mov	r10, QWORD PTR [rdx+120]
        mov	QWORD PTR [rcx+112], r9
        adc	r10, QWORD PTR [r8+120]
        mov	QWORD PTR [rcx+120], r10
        adc	rax, 0
        ret
sp_2048_add_16 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
;  *
;  * a  A single precision integer and result.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_sub_in_place_32 PROC
        mov	r8, QWORD PTR [rcx]
        sub	r8, QWORD PTR [rdx]
        mov	r9, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	r9, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], r9
        sbb	r8, QWORD PTR [rdx+16]
        mov	r9, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	r9, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], r9
        sbb	r8, QWORD PTR [rdx+32]
        mov	r9, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	r9, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], r9
        sbb	r8, QWORD PTR [rdx+48]
        mov	r9, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	r9, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], r9
        sbb	r8, QWORD PTR [rdx+64]
        mov	r9, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	r9, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], r9
        sbb	r8, QWORD PTR [rdx+80]
        mov	r9, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	r9, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], r9
        sbb	r8, QWORD PTR [rdx+96]
        mov	r9, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        sbb	r9, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], r9
        sbb	r8, QWORD PTR [rdx+112]
        mov	r9, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        sbb	r9, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rcx+120], r9
        sbb	r8, QWORD PTR [rdx+128]
        mov	r9, QWORD PTR [rcx+136]
        mov	QWORD PTR [rcx+128], r8
        sbb	r9, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rcx+136], r9
        sbb	r8, QWORD PTR [rdx+144]
        mov	r9, QWORD PTR [rcx+152]
        mov	QWORD PTR [rcx+144], r8
        sbb	r9, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rcx+152], r9
        sbb	r8, QWORD PTR [rdx+160]
        mov	r9, QWORD PTR [rcx+168]
        mov	QWORD PTR [rcx+160], r8
        sbb	r9, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rcx+168], r9
        sbb	r8, QWORD PTR [rdx+176]
        mov	r9, QWORD PTR [rcx+184]
        mov	QWORD PTR [rcx+176], r8
        sbb	r9, QWORD PTR [rdx+184]
        mov	r8, QWORD PTR [rcx+192]
        mov	QWORD PTR [rcx+184], r9
        sbb	r8, QWORD PTR [rdx+192]
        mov	r9, QWORD PTR [rcx+200]
        mov	QWORD PTR [rcx+192], r8
        sbb	r9, QWORD PTR [rdx+200]
        mov	r8, QWORD PTR [rcx+208]
        mov	QWORD PTR [rcx+200], r9
        sbb	r8, QWORD PTR [rdx+208]
        mov	r9, QWORD PTR [rcx+216]
        mov	QWORD PTR [rcx+208], r8
        sbb	r9, QWORD PTR [rdx+216]
        mov	r8, QWORD PTR [rcx+224]
        mov	QWORD PTR [rcx+216], r9
        sbb	r8, QWORD PTR [rdx+224]
        mov	r9, QWORD PTR [rcx+232]
        mov	QWORD PTR [rcx+224], r8
        sbb	r9, QWORD PTR [rdx+232]
        mov	r8, QWORD PTR [rcx+240]
        mov	QWORD PTR [rcx+232], r9
        sbb	r8, QWORD PTR [rdx+240]
        mov	r9, QWORD PTR [rcx+248]
        mov	QWORD PTR [rcx+240], r8
        sbb	r9, QWORD PTR [rdx+248]
        mov	QWORD PTR [rcx+248], r9
        sbb	rax, rax
        ret
sp_2048_sub_in_place_32 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_add_32 PROC
        ; Add
        mov	r9, QWORD PTR [rdx]
        xor	rax, rax
        add	r9, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx+8]
        mov	QWORD PTR [rcx], r9
        adc	r10, QWORD PTR [r8+8]
        mov	r9, QWORD PTR [rdx+16]
        mov	QWORD PTR [rcx+8], r10
        adc	r9, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [rdx+24]
        mov	QWORD PTR [rcx+16], r9
        adc	r10, QWORD PTR [r8+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [rcx+24], r10
        adc	r9, QWORD PTR [r8+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [rcx+32], r9
        adc	r10, QWORD PTR [r8+40]
        mov	r9, QWORD PTR [rdx+48]
        mov	QWORD PTR [rcx+40], r10
        adc	r9, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+56]
        mov	QWORD PTR [rcx+48], r9
        adc	r10, QWORD PTR [r8+56]
        mov	r9, QWORD PTR [rdx+64]
        mov	QWORD PTR [rcx+56], r10
        adc	r9, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [rdx+72]
        mov	QWORD PTR [rcx+64], r9
        adc	r10, QWORD PTR [r8+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [rcx+72], r10
        adc	r9, QWORD PTR [r8+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [rcx+80], r9
        adc	r10, QWORD PTR [r8+88]
        mov	r9, QWORD PTR [rdx+96]
        mov	QWORD PTR [rcx+88], r10
        adc	r9, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+104]
        mov	QWORD PTR [rcx+96], r9
        adc	r10, QWORD PTR [r8+104]
        mov	r9, QWORD PTR [rdx+112]
        mov	QWORD PTR [rcx+104], r10
        adc	r9, QWORD PTR [r8+112]
        mov	r10, QWORD PTR [rdx+120]
        mov	QWORD PTR [rcx+112], r9
        adc	r10, QWORD PTR [r8+120]
        mov	r9, QWORD PTR [rdx+128]
        mov	QWORD PTR [rcx+120], r10
        adc	r9, QWORD PTR [r8+128]
        mov	r10, QWORD PTR [rdx+136]
        mov	QWORD PTR [rcx+128], r9
        adc	r10, QWORD PTR [r8+136]
        mov	r9, QWORD PTR [rdx+144]
        mov	QWORD PTR [rcx+136], r10
        adc	r9, QWORD PTR [r8+144]
        mov	r10, QWORD PTR [rdx+152]
        mov	QWORD PTR [rcx+144], r9
        adc	r10, QWORD PTR [r8+152]
        mov	r9, QWORD PTR [rdx+160]
        mov	QWORD PTR [rcx+152], r10
        adc	r9, QWORD PTR [r8+160]
        mov	r10, QWORD PTR [rdx+168]
        mov	QWORD PTR [rcx+160], r9
        adc	r10, QWORD PTR [r8+168]
        mov	r9, QWORD PTR [rdx+176]
        mov	QWORD PTR [rcx+168], r10
        adc	r9, QWORD PTR [r8+176]
        mov	r10, QWORD PTR [rdx+184]
        mov	QWORD PTR [rcx+176], r9
        adc	r10, QWORD PTR [r8+184]
        mov	r9, QWORD PTR [rdx+192]
        mov	QWORD PTR [rcx+184], r10
        adc	r9, QWORD PTR [r8+192]
        mov	r10, QWORD PTR [rdx+200]
        mov	QWORD PTR [rcx+192], r9
        adc	r10, QWORD PTR [r8+200]
        mov	r9, QWORD PTR [rdx+208]
        mov	QWORD PTR [rcx+200], r10
        adc	r9, QWORD PTR [r8+208]
        mov	r10, QWORD PTR [rdx+216]
        mov	QWORD PTR [rcx+208], r9
        adc	r10, QWORD PTR [r8+216]
        mov	r9, QWORD PTR [rdx+224]
        mov	QWORD PTR [rcx+216], r10
        adc	r9, QWORD PTR [r8+224]
        mov	r10, QWORD PTR [rdx+232]
        mov	QWORD PTR [rcx+224], r9
        adc	r10, QWORD PTR [r8+232]
        mov	r9, QWORD PTR [rdx+240]
        mov	QWORD PTR [rcx+232], r10
        adc	r9, QWORD PTR [r8+240]
        mov	r10, QWORD PTR [rdx+248]
        mov	QWORD PTR [rcx+240], r9
        adc	r10, QWORD PTR [r8+248]
        mov	QWORD PTR [rcx+248], r10
        adc	rax, 0
        ret
sp_2048_add_32 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_mul_32 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        sub	rsp, 808
        mov	QWORD PTR [rsp+768], rcx
        mov	QWORD PTR [rsp+776], rdx
        mov	QWORD PTR [rsp+784], r8
        lea	r12, QWORD PTR [rsp+512]
        lea	r14, QWORD PTR [rdx+128]
        ; Add
        mov	rax, QWORD PTR [rdx]
        xor	r15, r15
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [rdx+8]
        mov	QWORD PTR [r12], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [rdx+16]
        mov	QWORD PTR [r12+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [rdx+24]
        mov	QWORD PTR [r12+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [r12+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [r12+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r12+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [rdx+56]
        mov	QWORD PTR [r12+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [rdx+64]
        mov	QWORD PTR [r12+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [rdx+72]
        mov	QWORD PTR [r12+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [r12+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [r12+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	rax, QWORD PTR [rdx+96]
        mov	QWORD PTR [r12+88], r10
        adc	rax, QWORD PTR [r14+96]
        mov	r9, QWORD PTR [rdx+104]
        mov	QWORD PTR [r12+96], rax
        adc	r9, QWORD PTR [r14+104]
        mov	r10, QWORD PTR [rdx+112]
        mov	QWORD PTR [r12+104], r9
        adc	r10, QWORD PTR [r14+112]
        mov	rax, QWORD PTR [rdx+120]
        mov	QWORD PTR [r12+112], r10
        adc	rax, QWORD PTR [r14+120]
        mov	QWORD PTR [r12+120], rax
        adc	r15, 0
        mov	QWORD PTR [rsp+792], r15
        lea	r13, QWORD PTR [rsp+640]
        lea	r14, QWORD PTR [r8+128]
        ; Add
        mov	rax, QWORD PTR [r8]
        xor	rdi, rdi
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [r8+8]
        mov	QWORD PTR [r13], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [r8+16]
        mov	QWORD PTR [r13+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [r8+24]
        mov	QWORD PTR [r13+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [r8+32]
        mov	QWORD PTR [r13+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [r8+40]
        mov	QWORD PTR [r13+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [r8+48]
        mov	QWORD PTR [r13+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [r8+56]
        mov	QWORD PTR [r13+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [r8+64]
        mov	QWORD PTR [r13+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [r8+72]
        mov	QWORD PTR [r13+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [r8+80]
        mov	QWORD PTR [r13+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [r8+88]
        mov	QWORD PTR [r13+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	rax, QWORD PTR [r8+96]
        mov	QWORD PTR [r13+88], r10
        adc	rax, QWORD PTR [r14+96]
        mov	r9, QWORD PTR [r8+104]
        mov	QWORD PTR [r13+96], rax
        adc	r9, QWORD PTR [r14+104]
        mov	r10, QWORD PTR [r8+112]
        mov	QWORD PTR [r13+104], r9
        adc	r10, QWORD PTR [r14+112]
        mov	rax, QWORD PTR [r8+120]
        mov	QWORD PTR [r13+112], r10
        adc	rax, QWORD PTR [r14+120]
        mov	QWORD PTR [r13+120], rax
        adc	rdi, 0
        mov	QWORD PTR [rsp+800], rdi
        mov	r8, r13
        mov	rdx, r12
        mov	rcx, rsp
        call	sp_2048_mul_16
        mov	r8, QWORD PTR [rsp+784]
        mov	rdx, QWORD PTR [rsp+776]
        lea	rcx, QWORD PTR [rsp+256]
        add	r8, 128
        add	rdx, 128
        call	sp_2048_mul_16
        mov	r8, QWORD PTR [rsp+784]
        mov	rdx, QWORD PTR [rsp+776]
        mov	rcx, QWORD PTR [rsp+768]
        call	sp_2048_mul_16
IFDEF _WIN64
        mov	r8, QWORD PTR [rsp+784]
        mov	rdx, QWORD PTR [rsp+776]
        mov	rcx, QWORD PTR [rsp+768]
ENDIF
        mov	r15, QWORD PTR [rsp+792]
        mov	rdi, QWORD PTR [rsp+800]
        mov	rsi, QWORD PTR [rsp+768]
        mov	r11, r15
        lea	r12, QWORD PTR [rsp+512]
        lea	r13, QWORD PTR [rsp+640]
        and	r11, rdi
        neg	r15
        neg	rdi
        add	rsi, 256
        mov	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [r13]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12], rax
        mov	QWORD PTR [r13], r9
        mov	rax, QWORD PTR [r12+8]
        mov	r9, QWORD PTR [r13+8]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+8], rax
        mov	QWORD PTR [r13+8], r9
        mov	rax, QWORD PTR [r12+16]
        mov	r9, QWORD PTR [r13+16]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+16], rax
        mov	QWORD PTR [r13+16], r9
        mov	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [r13+24]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+24], rax
        mov	QWORD PTR [r13+24], r9
        mov	rax, QWORD PTR [r12+32]
        mov	r9, QWORD PTR [r13+32]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+32], rax
        mov	QWORD PTR [r13+32], r9
        mov	rax, QWORD PTR [r12+40]
        mov	r9, QWORD PTR [r13+40]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+40], rax
        mov	QWORD PTR [r13+40], r9
        mov	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [r13+48]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+48], rax
        mov	QWORD PTR [r13+48], r9
        mov	rax, QWORD PTR [r12+56]
        mov	r9, QWORD PTR [r13+56]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+56], rax
        mov	QWORD PTR [r13+56], r9
        mov	rax, QWORD PTR [r12+64]
        mov	r9, QWORD PTR [r13+64]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+64], rax
        mov	QWORD PTR [r13+64], r9
        mov	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [r13+72]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+72], rax
        mov	QWORD PTR [r13+72], r9
        mov	rax, QWORD PTR [r12+80]
        mov	r9, QWORD PTR [r13+80]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+80], rax
        mov	QWORD PTR [r13+80], r9
        mov	rax, QWORD PTR [r12+88]
        mov	r9, QWORD PTR [r13+88]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+88], rax
        mov	QWORD PTR [r13+88], r9
        mov	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [r13+96]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+96], rax
        mov	QWORD PTR [r13+96], r9
        mov	rax, QWORD PTR [r12+104]
        mov	r9, QWORD PTR [r13+104]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+104], rax
        mov	QWORD PTR [r13+104], r9
        mov	rax, QWORD PTR [r12+112]
        mov	r9, QWORD PTR [r13+112]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+112], rax
        mov	QWORD PTR [r13+112], r9
        mov	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [r13+120]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+120], rax
        mov	QWORD PTR [r13+120], r9
        mov	rax, QWORD PTR [r12]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r13+120]
        mov	QWORD PTR [rsi+120], rax
        adc	r11, 0
        lea	r13, QWORD PTR [rsp+256]
        mov	r12, rsp
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [r13+184]
        mov	rax, QWORD PTR [r12+192]
        mov	QWORD PTR [r12+184], r10
        sbb	rax, QWORD PTR [r13+192]
        mov	r9, QWORD PTR [r12+200]
        mov	QWORD PTR [r12+192], rax
        sbb	r9, QWORD PTR [r13+200]
        mov	r10, QWORD PTR [r12+208]
        mov	QWORD PTR [r12+200], r9
        sbb	r10, QWORD PTR [r13+208]
        mov	rax, QWORD PTR [r12+216]
        mov	QWORD PTR [r12+208], r10
        sbb	rax, QWORD PTR [r13+216]
        mov	r9, QWORD PTR [r12+224]
        mov	QWORD PTR [r12+216], rax
        sbb	r9, QWORD PTR [r13+224]
        mov	r10, QWORD PTR [r12+232]
        mov	QWORD PTR [r12+224], r9
        sbb	r10, QWORD PTR [r13+232]
        mov	rax, QWORD PTR [r12+240]
        mov	QWORD PTR [r12+232], r10
        sbb	rax, QWORD PTR [r13+240]
        mov	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [r12+240], rax
        sbb	r9, QWORD PTR [r13+248]
        mov	QWORD PTR [r12+248], r9
        sbb	r11, 0
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [rcx]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [rcx+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [rcx+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [rcx+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [rcx+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [rcx+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [rcx+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [rcx+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [rcx+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [rcx+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [rcx+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [rcx+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [rcx+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [rcx+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [rcx+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [rcx+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [rcx+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [rcx+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [rcx+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [rcx+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [rcx+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [rcx+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [rcx+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [rcx+184]
        mov	rax, QWORD PTR [r12+192]
        mov	QWORD PTR [r12+184], r10
        sbb	rax, QWORD PTR [rcx+192]
        mov	r9, QWORD PTR [r12+200]
        mov	QWORD PTR [r12+192], rax
        sbb	r9, QWORD PTR [rcx+200]
        mov	r10, QWORD PTR [r12+208]
        mov	QWORD PTR [r12+200], r9
        sbb	r10, QWORD PTR [rcx+208]
        mov	rax, QWORD PTR [r12+216]
        mov	QWORD PTR [r12+208], r10
        sbb	rax, QWORD PTR [rcx+216]
        mov	r9, QWORD PTR [r12+224]
        mov	QWORD PTR [r12+216], rax
        sbb	r9, QWORD PTR [rcx+224]
        mov	r10, QWORD PTR [r12+232]
        mov	QWORD PTR [r12+224], r9
        sbb	r10, QWORD PTR [rcx+232]
        mov	rax, QWORD PTR [r12+240]
        mov	QWORD PTR [r12+232], r10
        sbb	rax, QWORD PTR [rcx+240]
        mov	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [r12+240], rax
        sbb	r9, QWORD PTR [rcx+248]
        mov	QWORD PTR [r12+248], r9
        sbb	r11, 0
        sub	rsi, 128
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r12+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r12+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r12+128]
        mov	r10, QWORD PTR [rsi+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r12+136]
        mov	rax, QWORD PTR [rsi+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r12+144]
        mov	r9, QWORD PTR [rsi+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r12+152]
        mov	r10, QWORD PTR [rsi+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r12+160]
        mov	rax, QWORD PTR [rsi+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r12+168]
        mov	r9, QWORD PTR [rsi+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r12+176]
        mov	r10, QWORD PTR [rsi+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r12+184]
        mov	rax, QWORD PTR [rsi+192]
        mov	QWORD PTR [rsi+184], r10
        adc	rax, QWORD PTR [r12+192]
        mov	r9, QWORD PTR [rsi+200]
        mov	QWORD PTR [rsi+192], rax
        adc	r9, QWORD PTR [r12+200]
        mov	r10, QWORD PTR [rsi+208]
        mov	QWORD PTR [rsi+200], r9
        adc	r10, QWORD PTR [r12+208]
        mov	rax, QWORD PTR [rsi+216]
        mov	QWORD PTR [rsi+208], r10
        adc	rax, QWORD PTR [r12+216]
        mov	r9, QWORD PTR [rsi+224]
        mov	QWORD PTR [rsi+216], rax
        adc	r9, QWORD PTR [r12+224]
        mov	r10, QWORD PTR [rsi+232]
        mov	QWORD PTR [rsi+224], r9
        adc	r10, QWORD PTR [r12+232]
        mov	rax, QWORD PTR [rsi+240]
        mov	QWORD PTR [rsi+232], r10
        adc	rax, QWORD PTR [r12+240]
        mov	r9, QWORD PTR [rsi+248]
        mov	QWORD PTR [rsi+240], rax
        adc	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [rsi+248], r9
        adc	r11, 0
        mov	QWORD PTR [rcx+384], r11
        add	rsi, 128
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r13+128]
        mov	QWORD PTR [rsi+128], r9
        ; Add to zero
        mov	rax, QWORD PTR [r13+136]
        adc	rax, 0
        mov	r9, QWORD PTR [r13+144]
        mov	QWORD PTR [rsi+136], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+152]
        mov	QWORD PTR [rsi+144], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+160]
        mov	QWORD PTR [rsi+152], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+168]
        mov	QWORD PTR [rsi+160], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+176]
        mov	QWORD PTR [rsi+168], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+184]
        mov	QWORD PTR [rsi+176], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+192]
        mov	QWORD PTR [rsi+184], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+200]
        mov	QWORD PTR [rsi+192], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+208]
        mov	QWORD PTR [rsi+200], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+216]
        mov	QWORD PTR [rsi+208], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+224]
        mov	QWORD PTR [rsi+216], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+232]
        mov	QWORD PTR [rsi+224], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+240]
        mov	QWORD PTR [rsi+232], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+248]
        mov	QWORD PTR [rsi+240], r9
        adc	r10, 0
        mov	QWORD PTR [rsi+248], r10
        add	rsp, 808
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_2048_mul_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_mul_avx2_32 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        sub	rsp, 808
        mov	QWORD PTR [rsp+768], rcx
        mov	QWORD PTR [rsp+776], rdx
        mov	QWORD PTR [rsp+784], r8
        lea	r12, QWORD PTR [rsp+512]
        lea	r14, QWORD PTR [rdx+128]
        ; Add
        mov	rax, QWORD PTR [rdx]
        xor	r15, r15
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [rdx+8]
        mov	QWORD PTR [r12], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [rdx+16]
        mov	QWORD PTR [r12+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [rdx+24]
        mov	QWORD PTR [r12+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [r12+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [r12+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r12+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [rdx+56]
        mov	QWORD PTR [r12+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [rdx+64]
        mov	QWORD PTR [r12+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [rdx+72]
        mov	QWORD PTR [r12+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [r12+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [r12+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	rax, QWORD PTR [rdx+96]
        mov	QWORD PTR [r12+88], r10
        adc	rax, QWORD PTR [r14+96]
        mov	r9, QWORD PTR [rdx+104]
        mov	QWORD PTR [r12+96], rax
        adc	r9, QWORD PTR [r14+104]
        mov	r10, QWORD PTR [rdx+112]
        mov	QWORD PTR [r12+104], r9
        adc	r10, QWORD PTR [r14+112]
        mov	rax, QWORD PTR [rdx+120]
        mov	QWORD PTR [r12+112], r10
        adc	rax, QWORD PTR [r14+120]
        mov	QWORD PTR [r12+120], rax
        adc	r15, 0
        mov	QWORD PTR [rsp+792], r15
        lea	r13, QWORD PTR [rsp+640]
        lea	r14, QWORD PTR [r8+128]
        ; Add
        mov	rax, QWORD PTR [r8]
        xor	rdi, rdi
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [r8+8]
        mov	QWORD PTR [r13], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [r8+16]
        mov	QWORD PTR [r13+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [r8+24]
        mov	QWORD PTR [r13+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [r8+32]
        mov	QWORD PTR [r13+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [r8+40]
        mov	QWORD PTR [r13+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [r8+48]
        mov	QWORD PTR [r13+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [r8+56]
        mov	QWORD PTR [r13+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [r8+64]
        mov	QWORD PTR [r13+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [r8+72]
        mov	QWORD PTR [r13+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [r8+80]
        mov	QWORD PTR [r13+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [r8+88]
        mov	QWORD PTR [r13+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	rax, QWORD PTR [r8+96]
        mov	QWORD PTR [r13+88], r10
        adc	rax, QWORD PTR [r14+96]
        mov	r9, QWORD PTR [r8+104]
        mov	QWORD PTR [r13+96], rax
        adc	r9, QWORD PTR [r14+104]
        mov	r10, QWORD PTR [r8+112]
        mov	QWORD PTR [r13+104], r9
        adc	r10, QWORD PTR [r14+112]
        mov	rax, QWORD PTR [r8+120]
        mov	QWORD PTR [r13+112], r10
        adc	rax, QWORD PTR [r14+120]
        mov	QWORD PTR [r13+120], rax
        adc	rdi, 0
        mov	QWORD PTR [rsp+800], rdi
        mov	r8, r13
        mov	rdx, r12
        mov	rcx, rsp
        call	sp_2048_mul_avx2_16
        mov	r8, QWORD PTR [rsp+784]
        mov	rdx, QWORD PTR [rsp+776]
        lea	rcx, QWORD PTR [rsp+256]
        add	r8, 128
        add	rdx, 128
        call	sp_2048_mul_avx2_16
        mov	r8, QWORD PTR [rsp+784]
        mov	rdx, QWORD PTR [rsp+776]
        mov	rcx, QWORD PTR [rsp+768]
        call	sp_2048_mul_avx2_16
IFDEF _WIN64
        mov	r8, QWORD PTR [rsp+784]
        mov	rdx, QWORD PTR [rsp+776]
        mov	rcx, QWORD PTR [rsp+768]
ENDIF
        mov	r15, QWORD PTR [rsp+792]
        mov	rdi, QWORD PTR [rsp+800]
        mov	rsi, QWORD PTR [rsp+768]
        mov	r11, r15
        lea	r12, QWORD PTR [rsp+512]
        lea	r13, QWORD PTR [rsp+640]
        and	r11, rdi
        neg	r15
        neg	rdi
        add	rsi, 256
        mov	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [r13]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        add	rax, r9
        mov	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [r13+8]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [r13+16]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+8], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [r13+24]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+16], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [r13+32]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+24], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [r13+40]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+32], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [r13+48]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+40], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [r13+56]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+48], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [r13+64]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+56], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [r13+72]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+64], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [r13+80]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+72], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [r13+88]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+80], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [r13+96]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+88], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+104]
        mov	r10, QWORD PTR [r13+104]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+96], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+112]
        mov	rax, QWORD PTR [r13+112]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+104], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [r13+120]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+112], r10
        adc	rax, r9
        mov	QWORD PTR [rsi+120], rax
        adc	r11, 0
        lea	r13, QWORD PTR [rsp+256]
        mov	r12, rsp
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [r13+184]
        mov	rax, QWORD PTR [r12+192]
        mov	QWORD PTR [r12+184], r10
        sbb	rax, QWORD PTR [r13+192]
        mov	r9, QWORD PTR [r12+200]
        mov	QWORD PTR [r12+192], rax
        sbb	r9, QWORD PTR [r13+200]
        mov	r10, QWORD PTR [r12+208]
        mov	QWORD PTR [r12+200], r9
        sbb	r10, QWORD PTR [r13+208]
        mov	rax, QWORD PTR [r12+216]
        mov	QWORD PTR [r12+208], r10
        sbb	rax, QWORD PTR [r13+216]
        mov	r9, QWORD PTR [r12+224]
        mov	QWORD PTR [r12+216], rax
        sbb	r9, QWORD PTR [r13+224]
        mov	r10, QWORD PTR [r12+232]
        mov	QWORD PTR [r12+224], r9
        sbb	r10, QWORD PTR [r13+232]
        mov	rax, QWORD PTR [r12+240]
        mov	QWORD PTR [r12+232], r10
        sbb	rax, QWORD PTR [r13+240]
        mov	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [r12+240], rax
        sbb	r9, QWORD PTR [r13+248]
        mov	QWORD PTR [r12+248], r9
        sbb	r11, 0
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [rcx]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [rcx+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [rcx+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [rcx+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [rcx+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [rcx+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [rcx+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [rcx+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [rcx+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [rcx+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [rcx+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [rcx+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [rcx+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [rcx+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [rcx+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [rcx+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [rcx+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [rcx+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [rcx+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [rcx+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [rcx+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [rcx+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [rcx+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [rcx+184]
        mov	rax, QWORD PTR [r12+192]
        mov	QWORD PTR [r12+184], r10
        sbb	rax, QWORD PTR [rcx+192]
        mov	r9, QWORD PTR [r12+200]
        mov	QWORD PTR [r12+192], rax
        sbb	r9, QWORD PTR [rcx+200]
        mov	r10, QWORD PTR [r12+208]
        mov	QWORD PTR [r12+200], r9
        sbb	r10, QWORD PTR [rcx+208]
        mov	rax, QWORD PTR [r12+216]
        mov	QWORD PTR [r12+208], r10
        sbb	rax, QWORD PTR [rcx+216]
        mov	r9, QWORD PTR [r12+224]
        mov	QWORD PTR [r12+216], rax
        sbb	r9, QWORD PTR [rcx+224]
        mov	r10, QWORD PTR [r12+232]
        mov	QWORD PTR [r12+224], r9
        sbb	r10, QWORD PTR [rcx+232]
        mov	rax, QWORD PTR [r12+240]
        mov	QWORD PTR [r12+232], r10
        sbb	rax, QWORD PTR [rcx+240]
        mov	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [r12+240], rax
        sbb	r9, QWORD PTR [rcx+248]
        mov	QWORD PTR [r12+248], r9
        sbb	r11, 0
        sub	rsi, 128
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r12+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r12+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r12+128]
        mov	r10, QWORD PTR [rsi+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r12+136]
        mov	rax, QWORD PTR [rsi+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r12+144]
        mov	r9, QWORD PTR [rsi+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r12+152]
        mov	r10, QWORD PTR [rsi+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r12+160]
        mov	rax, QWORD PTR [rsi+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r12+168]
        mov	r9, QWORD PTR [rsi+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r12+176]
        mov	r10, QWORD PTR [rsi+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r12+184]
        mov	rax, QWORD PTR [rsi+192]
        mov	QWORD PTR [rsi+184], r10
        adc	rax, QWORD PTR [r12+192]
        mov	r9, QWORD PTR [rsi+200]
        mov	QWORD PTR [rsi+192], rax
        adc	r9, QWORD PTR [r12+200]
        mov	r10, QWORD PTR [rsi+208]
        mov	QWORD PTR [rsi+200], r9
        adc	r10, QWORD PTR [r12+208]
        mov	rax, QWORD PTR [rsi+216]
        mov	QWORD PTR [rsi+208], r10
        adc	rax, QWORD PTR [r12+216]
        mov	r9, QWORD PTR [rsi+224]
        mov	QWORD PTR [rsi+216], rax
        adc	r9, QWORD PTR [r12+224]
        mov	r10, QWORD PTR [rsi+232]
        mov	QWORD PTR [rsi+224], r9
        adc	r10, QWORD PTR [r12+232]
        mov	rax, QWORD PTR [rsi+240]
        mov	QWORD PTR [rsi+232], r10
        adc	rax, QWORD PTR [r12+240]
        mov	r9, QWORD PTR [rsi+248]
        mov	QWORD PTR [rsi+240], rax
        adc	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [rsi+248], r9
        adc	r11, 0
        mov	QWORD PTR [rcx+384], r11
        add	rsi, 128
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r13+128]
        mov	QWORD PTR [rsi+128], r9
        ; Add to zero
        mov	rax, QWORD PTR [r13+136]
        adc	rax, 0
        mov	r9, QWORD PTR [r13+144]
        mov	QWORD PTR [rsi+136], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+152]
        mov	QWORD PTR [rsi+144], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+160]
        mov	QWORD PTR [rsi+152], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+168]
        mov	QWORD PTR [rsi+160], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+176]
        mov	QWORD PTR [rsi+168], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+184]
        mov	QWORD PTR [rsi+176], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+192]
        mov	QWORD PTR [rsi+184], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+200]
        mov	QWORD PTR [rsi+192], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+208]
        mov	QWORD PTR [rsi+200], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+216]
        mov	QWORD PTR [rsi+208], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+224]
        mov	QWORD PTR [rsi+216], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+232]
        mov	QWORD PTR [rsi+224], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+240]
        mov	QWORD PTR [rsi+232], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+248]
        mov	QWORD PTR [rsi+240], r9
        adc	r10, 0
        mov	QWORD PTR [rsi+248], r10
        add	rsp, 808
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_2048_mul_avx2_32 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_sqr_16 PROC
        push	r12
        push	r13
        push	r14
        mov	r8, rdx
        sub	rsp, 128
        ; A[0] * A[0]
        mov	rax, QWORD PTR [r8]
        mul	rax
        xor	r11, r11
        mov	QWORD PTR [rsp], rax
        mov	r10, rdx
        ; A[0] * A[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r8]
        xor	r9, r9
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        mov	QWORD PTR [rsp+8], r10
        ; A[0] * A[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r8]
        xor	r10, r10
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        ; A[1] * A[1]
        mov	rax, QWORD PTR [r8+8]
        mul	rax
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+16], r11
        ; A[0] * A[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r8]
        xor	r11, r11
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * A[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r8+8]
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+24], r9
        ; A[0] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r8]
        xor	r9, r9
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        ; A[1] * A[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r8+8]
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        ; A[2] * A[2]
        mov	rax, QWORD PTR [r8+16]
        mul	rax
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        mov	QWORD PTR [rsp+32], r10
        ; A[0] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rsp+40], r11
        ; A[0] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[3]
        mov	rax, QWORD PTR [r8+24]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rsp+48], r9
        ; A[0] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rsp+56], r10
        ; A[0] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rsp+64], r11
        ; A[0] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rsp+72], r9
        ; A[0] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rsp+80], r10
        ; A[0] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rsp+88], r11
        ; A[0] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rsp+96], r9
        ; A[0] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rsp+104], r10
        ; A[0] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rsp+112], r11
        ; A[0] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rsp+120], r9
        ; A[1] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+8]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[2] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[8] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rcx+128], r10
        ; A[2] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+16]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[3] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[8] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+64]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rcx+136], r11
        ; A[3] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+24]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[4] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[8] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+64]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[9] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rcx+144], r9
        ; A[4] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+32]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[5] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[8] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+64]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[9] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+72]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rcx+152], r10
        ; A[5] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+40]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[6] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[8] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+64]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[9] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+72]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[10] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rcx+160], r11
        ; A[6] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+48]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[7] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[8] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+64]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[9] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+72]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[10] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+80]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rcx+168], r9
        ; A[7] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+56]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[8] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+64]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[9] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+72]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[10] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+80]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[11] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rcx+176], r10
        ; A[8] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+64]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[9] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+72]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[10] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+80]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[11] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	QWORD PTR [r8+88]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rcx+184], r11
        ; A[9] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+72]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[10] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+80]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[11] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+88]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[12] * A[12]
        mov	rax, QWORD PTR [r8+96]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rcx+192], r9
        ; A[10] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+80]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[11] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+88]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[12] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	QWORD PTR [r8+96]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rcx+200], r10
        ; A[11] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+88]
        xor	r10, r10
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        ; A[12] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+96]
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        ; A[13] * A[13]
        mov	rax, QWORD PTR [r8+104]
        mul	rax
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+208], r11
        ; A[12] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+96]
        xor	r11, r11
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[13] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	QWORD PTR [r8+104]
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+216], r9
        ; A[13] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+104]
        xor	r9, r9
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        ; A[14] * A[14]
        mov	rax, QWORD PTR [r8+112]
        mul	rax
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        mov	QWORD PTR [rcx+224], r10
        ; A[14] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	QWORD PTR [r8+112]
        xor	r10, r10
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+232], r11
        ; A[15] * A[15]
        mov	rax, QWORD PTR [r8+120]
        mul	rax
        add	r9, rax
        adc	r10, rdx
        mov	QWORD PTR [rcx+240], r9
        mov	QWORD PTR [rcx+248], r10
        mov	rax, QWORD PTR [rsp]
        mov	rdx, QWORD PTR [rsp+8]
        mov	r12, QWORD PTR [rsp+16]
        mov	r13, QWORD PTR [rsp+24]
        mov	QWORD PTR [rcx], rax
        mov	QWORD PTR [rcx+8], rdx
        mov	QWORD PTR [rcx+16], r12
        mov	QWORD PTR [rcx+24], r13
        mov	rax, QWORD PTR [rsp+32]
        mov	rdx, QWORD PTR [rsp+40]
        mov	r12, QWORD PTR [rsp+48]
        mov	r13, QWORD PTR [rsp+56]
        mov	QWORD PTR [rcx+32], rax
        mov	QWORD PTR [rcx+40], rdx
        mov	QWORD PTR [rcx+48], r12
        mov	QWORD PTR [rcx+56], r13
        mov	rax, QWORD PTR [rsp+64]
        mov	rdx, QWORD PTR [rsp+72]
        mov	r12, QWORD PTR [rsp+80]
        mov	r13, QWORD PTR [rsp+88]
        mov	QWORD PTR [rcx+64], rax
        mov	QWORD PTR [rcx+72], rdx
        mov	QWORD PTR [rcx+80], r12
        mov	QWORD PTR [rcx+88], r13
        mov	rax, QWORD PTR [rsp+96]
        mov	rdx, QWORD PTR [rsp+104]
        mov	r12, QWORD PTR [rsp+112]
        mov	r13, QWORD PTR [rsp+120]
        mov	QWORD PTR [rcx+96], rax
        mov	QWORD PTR [rcx+104], rdx
        mov	QWORD PTR [rcx+112], r12
        mov	QWORD PTR [rcx+120], r13
        add	rsp, 128
        pop	r14
        pop	r13
        pop	r12
        ret
sp_2048_sqr_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_sqr_avx2_16 PROC
        push	rbp
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        push	rbx
        mov	r8, rcx
        mov	r9, rdx
        sub	rsp, 128
        cmp	r9, r8
        mov	rbp, rsp
        cmovne	rbp, r8
        add	r8, 128
        xor	r13, r13
        ; Diagonal 1
        ; Zero into %r9
        ; Zero into %r10
        ; A[1] x A[0]
        mov	rdx, QWORD PTR [r9]
        mulx	r11, r10, QWORD PTR [r9+8]
        ; A[2] x A[0]
        mulx	r12, rax, QWORD PTR [r9+16]
        adcx	r11, rax
        adox	r12, r13
        mov	QWORD PTR [rbp+8], r10
        mov	QWORD PTR [rbp+16], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[3] x A[0]
        mulx	r10, rax, QWORD PTR [r9+24]
        adcx	r12, rax
        adox	r10, r13
        ; A[4] x A[0]
        mulx	r11, rax, QWORD PTR [r9+32]
        adcx	r10, rax
        adox	r11, r13
        mov	QWORD PTR [rbp+24], r12
        mov	QWORD PTR [rbp+32], r10
        ; Zero into %r10
        ; Zero into %r8
        ; A[5] x A[0]
        mulx	r12, rax, QWORD PTR [r9+40]
        adcx	r11, rax
        adox	r12, r13
        ; A[6] x A[0]
        mulx	r10, rax, QWORD PTR [r9+48]
        adcx	r12, rax
        adox	r10, r13
        mov	QWORD PTR [rbp+40], r11
        mov	QWORD PTR [rbp+48], r12
        ; Zero into %r9
        ; Zero into %r10
        ; A[7] x A[0]
        mulx	r11, rax, QWORD PTR [r9+56]
        adcx	r10, rax
        adox	r11, r13
        ; A[8] x A[0]
        mulx	r12, rax, QWORD PTR [r9+64]
        adcx	r11, rax
        adox	r12, r13
        mov	QWORD PTR [rbp+56], r10
        mov	QWORD PTR [rbp+64], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[9] x A[0]
        mulx	r10, rax, QWORD PTR [r9+72]
        adcx	r12, rax
        adox	r10, r13
        ; A[10] x A[0]
        mulx	r11, rax, QWORD PTR [r9+80]
        adcx	r10, rax
        adox	r11, r13
        mov	QWORD PTR [rbp+72], r12
        mov	QWORD PTR [rbp+80], r10
        ; No load %r13 - %r10
        ; A[11] x A[0]
        mulx	r15, rax, QWORD PTR [r9+88]
        adcx	r11, rax
        adox	r15, r13
        ; A[12] x A[0]
        mulx	rdi, rax, QWORD PTR [r9+96]
        adcx	r15, rax
        adox	rdi, r13
        mov	QWORD PTR [rbp+88], r11
        ; No store %r13 - %r10
        ; No load %r15 - %r9
        ; A[13] x A[0]
        mulx	rsi, rax, QWORD PTR [r9+104]
        adcx	rdi, rax
        adox	rsi, r13
        ; A[14] x A[0]
        mulx	rbx, rax, QWORD PTR [r9+112]
        adcx	rsi, rax
        adox	rbx, r13
        ; No store %r14 - %r8
        ; No store %r15 - %r9
        ; Zero into %r8
        ; Zero into %r9
        ; A[15] x A[0]
        mulx	r10, rax, QWORD PTR [r9+120]
        adcx	rbx, rax
        adox	r10, r13
        ; No store %rbx - %r10
        ;  Carry
        adcx	r10, r13
        mov	r14, r13
        adcx	r14, r13
        adox	r14, r13
        mov	QWORD PTR [r8], r10
        ; Diagonal 2
        mov	r10, QWORD PTR [rbp+24]
        mov	r11, QWORD PTR [rbp+32]
        mov	r12, QWORD PTR [rbp+40]
        ; A[2] x A[1]
        mov	rdx, QWORD PTR [r9+8]
        mulx	rcx, rax, QWORD PTR [r9+16]
        adcx	r10, rax
        adox	r11, rcx
        ; A[3] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+24]
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbp+24], r10
        mov	QWORD PTR [rbp+32], r11
        mov	r10, QWORD PTR [rbp+48]
        mov	r11, QWORD PTR [rbp+56]
        ; A[4] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+32]
        adcx	r12, rax
        adox	r10, rcx
        ; A[5] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+40]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbp+40], r12
        mov	QWORD PTR [rbp+48], r10
        mov	r12, QWORD PTR [rbp+64]
        mov	r10, QWORD PTR [rbp+72]
        ; A[6] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	r11, rax
        adox	r12, rcx
        ; A[7] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbp+56], r11
        mov	QWORD PTR [rbp+64], r12
        mov	r11, QWORD PTR [rbp+80]
        mov	r12, QWORD PTR [rbp+88]
        ; A[8] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	r10, rax
        adox	r11, rcx
        ; A[9] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbp+72], r10
        mov	QWORD PTR [rbp+80], r11
        ; No load %r13 - %r8
        ; A[10] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r12, rax
        adox	r15, rcx
        ; A[11] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r15, rax
        adox	rdi, rcx
        mov	QWORD PTR [rbp+88], r12
        ; No store %r13 - %r8
        ; No load %r15 - %r10
        ; A[12] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	rdi, rax
        adox	rsi, rcx
        ; A[13] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+104]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r14 - %r9
        ; No store %r15 - %r10
        mov	r11, QWORD PTR [r8]
        ; Zero into %r10
        ; A[14] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	rbx, rax
        adox	r11, rcx
        ; A[15] x A[1]
        mulx	r12, rax, QWORD PTR [r9+120]
        adcx	r11, rax
        adox	r12, r13
        ; No store %rbx - %r8
        mov	QWORD PTR [r8], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[15] x A[2]
        mov	rdx, QWORD PTR [r9+16]
        mulx	r10, rax, QWORD PTR [r9+120]
        adcx	r12, rax
        adox	r10, r13
        mov	QWORD PTR [r8+8], r12
        ;  Carry
        adcx	r10, r14
        mov	r14, r13
        adcx	r14, r13
        adox	r14, r13
        mov	QWORD PTR [r8+16], r10
        ; Diagonal 3
        mov	r10, QWORD PTR [rbp+40]
        mov	r11, QWORD PTR [rbp+48]
        mov	r12, QWORD PTR [rbp+56]
        ; A[3] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+24]
        adcx	r10, rax
        adox	r11, rcx
        ; A[4] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+32]
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbp+40], r10
        mov	QWORD PTR [rbp+48], r11
        mov	r10, QWORD PTR [rbp+64]
        mov	r11, QWORD PTR [rbp+72]
        ; A[5] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+40]
        adcx	r12, rax
        adox	r10, rcx
        ; A[6] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbp+56], r12
        mov	QWORD PTR [rbp+64], r10
        mov	r12, QWORD PTR [rbp+80]
        mov	r10, QWORD PTR [rbp+88]
        ; A[7] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	r11, rax
        adox	r12, rcx
        ; A[8] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbp+72], r11
        mov	QWORD PTR [rbp+80], r12
        ; No load %r13 - %r9
        ; A[9] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r10, rax
        adox	r15, rcx
        ; A[10] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r15, rax
        adox	rdi, rcx
        mov	QWORD PTR [rbp+88], r10
        ; No store %r13 - %r9
        ; No load %r15 - %r8
        ; A[11] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	rdi, rax
        adox	rsi, rcx
        ; A[12] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r14 - %r10
        ; No store %r15 - %r8
        mov	r12, QWORD PTR [r8]
        mov	r10, QWORD PTR [r8+8]
        ; A[13] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+104]
        adcx	rbx, rax
        adox	r12, rcx
        ; A[14] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	r12, rax
        adox	r10, rcx
        ; No store %rbx - %r9
        mov	QWORD PTR [r8], r12
        mov	r11, QWORD PTR [r8+16]
        ; Zero into %r10
        ; A[14] x A[3]
        mov	rdx, QWORD PTR [r9+24]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	r10, rax
        adox	r11, rcx
        ; A[14] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	r12, rax, QWORD PTR [r9+112]
        adcx	r11, rax
        adox	r12, r13
        mov	QWORD PTR [r8+8], r10
        mov	QWORD PTR [r8+16], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[14] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	r10, rax, QWORD PTR [r9+112]
        adcx	r12, rax
        adox	r10, r13
        mov	QWORD PTR [r8+24], r12
        ;  Carry
        adcx	r10, r14
        mov	r14, r13
        adcx	r14, r13
        adox	r14, r13
        mov	QWORD PTR [r8+32], r10
        ; Diagonal 4
        mov	r10, QWORD PTR [rbp+56]
        mov	r11, QWORD PTR [rbp+64]
        mov	r12, QWORD PTR [rbp+72]
        ; A[4] x A[3]
        mov	rdx, QWORD PTR [r9+24]
        mulx	rcx, rax, QWORD PTR [r9+32]
        adcx	r10, rax
        adox	r11, rcx
        ; A[5] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+40]
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbp+56], r10
        mov	QWORD PTR [rbp+64], r11
        mov	r10, QWORD PTR [rbp+80]
        mov	r11, QWORD PTR [rbp+88]
        ; A[6] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	r12, rax
        adox	r10, rcx
        ; A[7] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbp+72], r12
        mov	QWORD PTR [rbp+80], r10
        ; No load %r13 - %r10
        ; A[8] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	r11, rax
        adox	r15, rcx
        ; A[9] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r15, rax
        adox	rdi, rcx
        mov	QWORD PTR [rbp+88], r11
        ; No store %r13 - %r10
        ; No load %r15 - %r9
        ; A[10] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	rdi, rax
        adox	rsi, rcx
        ; A[11] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r14 - %r8
        ; No store %r15 - %r9
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[12] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	rbx, rax
        adox	r10, rcx
        ; A[13] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+104]
        adcx	r10, rax
        adox	r11, rcx
        ; No store %rbx - %r10
        mov	QWORD PTR [r8], r10
        mov	r12, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [r8+24]
        ; A[13] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	rcx, rax, QWORD PTR [r9+104]
        adcx	r11, rax
        adox	r12, rcx
        ; A[13] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, QWORD PTR [r9+104]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+8], r11
        mov	QWORD PTR [r8+16], r12
        mov	r11, QWORD PTR [r8+32]
        ; Zero into %r10
        ; A[13] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+104]
        adcx	r10, rax
        adox	r11, rcx
        ; A[13] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	r12, rax, QWORD PTR [r9+104]
        adcx	r11, rax
        adox	r12, r13
        mov	QWORD PTR [r8+24], r10
        mov	QWORD PTR [r8+32], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[13] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	r10, rax, QWORD PTR [r9+104]
        adcx	r12, rax
        adox	r10, r13
        mov	QWORD PTR [r8+40], r12
        ;  Carry
        adcx	r10, r14
        mov	r14, r13
        adcx	r14, r13
        adox	r14, r13
        mov	QWORD PTR [r8+48], r10
        ; Diagonal 5
        mov	r10, QWORD PTR [rbp+72]
        mov	r11, QWORD PTR [rbp+80]
        mov	r12, QWORD PTR [rbp+88]
        ; A[5] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	rcx, rax, QWORD PTR [r9+40]
        adcx	r10, rax
        adox	r11, rcx
        ; A[6] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbp+72], r10
        mov	QWORD PTR [rbp+80], r11
        ; No load %r13 - %r8
        ; A[7] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	r12, rax
        adox	r15, rcx
        ; A[8] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	r15, rax
        adox	rdi, rcx
        mov	QWORD PTR [rbp+88], r12
        ; No store %r13 - %r8
        ; No load %r15 - %r10
        ; A[9] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	rdi, rax
        adox	rsi, rcx
        ; A[10] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r14 - %r9
        ; No store %r15 - %r10
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        ; A[11] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	rbx, rax
        adox	r11, rcx
        ; A[12] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	r11, rax
        adox	r12, rcx
        ; No store %rbx - %r8
        mov	QWORD PTR [r8], r11
        mov	r10, QWORD PTR [r8+16]
        mov	r11, QWORD PTR [r8+24]
        ; A[12] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	r12, rax
        adox	r10, rcx
        ; A[12] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+8], r12
        mov	QWORD PTR [r8+16], r10
        mov	r12, QWORD PTR [r8+32]
        mov	r10, QWORD PTR [r8+40]
        ; A[12] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	r11, rax
        adox	r12, rcx
        ; A[12] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+24], r11
        mov	QWORD PTR [r8+32], r12
        mov	r11, QWORD PTR [r8+48]
        ; Zero into %r10
        ; A[12] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	rcx, rax, QWORD PTR [r9+96]
        adcx	r10, rax
        adox	r11, rcx
        ; A[12] x A[10]
        mov	rdx, QWORD PTR [r9+80]
        mulx	r12, rax, QWORD PTR [r9+96]
        adcx	r11, rax
        adox	r12, r13
        mov	QWORD PTR [r8+40], r10
        mov	QWORD PTR [r8+48], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[12] x A[11]
        mov	rdx, QWORD PTR [r9+88]
        mulx	r10, rax, QWORD PTR [r9+96]
        adcx	r12, rax
        adox	r10, r13
        mov	QWORD PTR [r8+56], r12
        ;  Carry
        adcx	r10, r14
        mov	r14, r13
        adcx	r14, r13
        adox	r14, r13
        mov	QWORD PTR [r8+64], r10
        ; Diagonal 6
        mov	r10, QWORD PTR [rbp+88]
        ; No load %r13 - %r9
        ; A[6] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	r10, rax
        adox	r15, rcx
        ; A[7] x A[5]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	r15, rax
        adox	rdi, rcx
        mov	QWORD PTR [rbp+88], r10
        ; No store %r13 - %r9
        ; No load %r15 - %r8
        ; A[8] x A[5]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	rdi, rax
        adox	rsi, rcx
        ; A[9] x A[5]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r14 - %r10
        ; No store %r15 - %r8
        mov	r12, QWORD PTR [r8]
        mov	r10, QWORD PTR [r8+8]
        ; A[10] x A[5]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	rbx, rax
        adox	r12, rcx
        ; A[11] x A[5]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r12, rax
        adox	r10, rcx
        ; No store %rbx - %r9
        mov	QWORD PTR [r8], r12
        mov	r11, QWORD PTR [r8+16]
        mov	r12, QWORD PTR [r8+24]
        ; A[11] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r10, rax
        adox	r11, rcx
        ; A[11] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+8], r10
        mov	QWORD PTR [r8+16], r11
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        ; A[11] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r12, rax
        adox	r10, rcx
        ; A[11] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+24], r12
        mov	QWORD PTR [r8+32], r10
        mov	r12, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [r8+56]
        ; A[11] x A[10]
        mov	rdx, QWORD PTR [r9+80]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r11, rax
        adox	r12, rcx
        ; A[13] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	rcx, rax, QWORD PTR [r9+104]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+40], r11
        mov	QWORD PTR [r8+48], r12
        mov	r11, QWORD PTR [r8+64]
        ; Zero into %r10
        ; A[13] x A[10]
        mov	rdx, QWORD PTR [r9+80]
        mulx	rcx, rax, QWORD PTR [r9+104]
        adcx	r10, rax
        adox	r11, rcx
        ; A[13] x A[11]
        mov	rdx, QWORD PTR [r9+88]
        mulx	r12, rax, QWORD PTR [r9+104]
        adcx	r11, rax
        adox	r12, r13
        mov	QWORD PTR [r8+56], r10
        mov	QWORD PTR [r8+64], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[13] x A[12]
        mov	rdx, QWORD PTR [r9+96]
        mulx	r10, rax, QWORD PTR [r9+104]
        adcx	r12, rax
        adox	r10, r13
        mov	QWORD PTR [r8+72], r12
        ;  Carry
        adcx	r10, r14
        mov	r14, r13
        adcx	r14, r13
        adox	r14, r13
        mov	QWORD PTR [r8+80], r10
        ; Diagonal 7
        ; No load %r15 - %r9
        ; A[7] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	rdi, rax
        adox	rsi, rcx
        ; A[8] x A[6]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r14 - %r8
        ; No store %r15 - %r9
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[9] x A[6]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	rbx, rax
        adox	r10, rcx
        ; A[10] x A[6]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r10, rax
        adox	r11, rcx
        ; No store %rbx - %r10
        mov	QWORD PTR [r8], r10
        mov	r12, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [r8+24]
        ; A[10] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r11, rax
        adox	r12, rcx
        ; A[10] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+8], r11
        mov	QWORD PTR [r8+16], r12
        mov	r11, QWORD PTR [r8+32]
        mov	r12, QWORD PTR [r8+40]
        ; A[10] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r10, rax
        adox	r11, rcx
        ; A[14] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+24], r10
        mov	QWORD PTR [r8+32], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        ; A[14] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	r12, rax
        adox	r10, rcx
        ; A[14] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+40], r12
        mov	QWORD PTR [r8+48], r10
        mov	r12, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [r8+72]
        ; A[14] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	r11, rax
        adox	r12, rcx
        ; A[14] x A[10]
        mov	rdx, QWORD PTR [r9+80]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+56], r11
        mov	QWORD PTR [r8+64], r12
        mov	r11, QWORD PTR [r8+80]
        ; Zero into %r10
        ; A[14] x A[11]
        mov	rdx, QWORD PTR [r9+88]
        mulx	rcx, rax, QWORD PTR [r9+112]
        adcx	r10, rax
        adox	r11, rcx
        ; A[14] x A[12]
        mov	rdx, QWORD PTR [r9+96]
        mulx	r12, rax, QWORD PTR [r9+112]
        adcx	r11, rax
        adox	r12, r13
        mov	QWORD PTR [r8+72], r10
        mov	QWORD PTR [r8+80], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[14] x A[13]
        mov	rdx, QWORD PTR [r9+104]
        mulx	r10, rax, QWORD PTR [r9+112]
        adcx	r12, rax
        adox	r10, r13
        mov	QWORD PTR [r8+88], r12
        ;  Carry
        adcx	r10, r14
        mov	r14, r13
        adcx	r14, r13
        adox	r14, r13
        mov	QWORD PTR [r8+96], r10
        ; Diagonal 8
        mov	r11, QWORD PTR [r8]
        mov	r12, QWORD PTR [r8+8]
        ; A[8] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	rbx, rax
        adox	r11, rcx
        ; A[9] x A[7]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r11, rax
        adox	r12, rcx
        ; No store %rbx - %r8
        mov	QWORD PTR [r8], r11
        mov	r10, QWORD PTR [r8+16]
        mov	r11, QWORD PTR [r8+24]
        ; A[9] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r12, rax
        adox	r10, rcx
        ; A[15] x A[3]
        mov	rdx, QWORD PTR [r9+24]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+8], r12
        mov	QWORD PTR [r8+16], r10
        mov	r12, QWORD PTR [r8+32]
        mov	r10, QWORD PTR [r8+40]
        ; A[15] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r11, rax
        adox	r12, rcx
        ; A[15] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+24], r11
        mov	QWORD PTR [r8+32], r12
        mov	r11, QWORD PTR [r8+48]
        mov	r12, QWORD PTR [r8+56]
        ; A[15] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r10, rax
        adox	r11, rcx
        ; A[15] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+40], r10
        mov	QWORD PTR [r8+48], r11
        mov	r10, QWORD PTR [r8+64]
        mov	r11, QWORD PTR [r8+72]
        ; A[15] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r12, rax
        adox	r10, rcx
        ; A[15] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+56], r12
        mov	QWORD PTR [r8+64], r10
        mov	r12, QWORD PTR [r8+80]
        mov	r10, QWORD PTR [r8+88]
        ; A[15] x A[10]
        mov	rdx, QWORD PTR [r9+80]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r11, rax
        adox	r12, rcx
        ; A[15] x A[11]
        mov	rdx, QWORD PTR [r9+88]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+72], r11
        mov	QWORD PTR [r8+80], r12
        mov	r11, QWORD PTR [r8+96]
        ; Zero into %r10
        ; A[15] x A[12]
        mov	rdx, QWORD PTR [r9+96]
        mulx	rcx, rax, QWORD PTR [r9+120]
        adcx	r10, rax
        adox	r11, rcx
        ; A[15] x A[13]
        mov	rdx, QWORD PTR [r9+104]
        mulx	r12, rax, QWORD PTR [r9+120]
        adcx	r11, rax
        adox	r12, r13
        mov	QWORD PTR [r8+88], r10
        mov	QWORD PTR [r8+96], r11
        ; Zero into %r8
        ; Zero into %r9
        ; A[15] x A[14]
        mov	rdx, QWORD PTR [r9+112]
        mulx	r10, rax, QWORD PTR [r9+120]
        adcx	r12, rax
        adox	r10, r13
        mov	QWORD PTR [r8+104], r12
        ;  Carry
        adcx	r10, r14
        mov	r14, r13
        adcx	r14, r13
        adox	r14, r13
        mov	QWORD PTR [r8+112], r10
        mov	QWORD PTR [r8+120], r14
        ; Double and Add in A[i] x A[i]
        mov	r11, QWORD PTR [rbp+8]
        ; A[0] x A[0]
        mov	rdx, QWORD PTR [r9]
        mulx	rcx, rax, rdx
        mov	QWORD PTR [rbp], rax
        adox	r11, r11
        adcx	r11, rcx
        mov	QWORD PTR [rbp+8], r11
        mov	r10, QWORD PTR [rbp+16]
        mov	r11, QWORD PTR [rbp+24]
        ; A[1] x A[1]
        mov	rdx, QWORD PTR [r9+8]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [rbp+16], r10
        mov	QWORD PTR [rbp+24], r11
        mov	r10, QWORD PTR [rbp+32]
        mov	r11, QWORD PTR [rbp+40]
        ; A[2] x A[2]
        mov	rdx, QWORD PTR [r9+16]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [rbp+32], r10
        mov	QWORD PTR [rbp+40], r11
        mov	r10, QWORD PTR [rbp+48]
        mov	r11, QWORD PTR [rbp+56]
        ; A[3] x A[3]
        mov	rdx, QWORD PTR [r9+24]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [rbp+48], r10
        mov	QWORD PTR [rbp+56], r11
        mov	r10, QWORD PTR [rbp+64]
        mov	r11, QWORD PTR [rbp+72]
        ; A[4] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [rbp+64], r10
        mov	QWORD PTR [rbp+72], r11
        mov	r10, QWORD PTR [rbp+80]
        mov	r11, QWORD PTR [rbp+88]
        ; A[5] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [rbp+80], r10
        mov	QWORD PTR [rbp+88], r11
        ; A[6] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, rdx
        adox	r15, r15
        adox	rdi, rdi
        adcx	r15, rax
        adcx	rdi, rcx
        ; A[7] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, rdx
        adox	rsi, rsi
        adox	rbx, rbx
        adcx	rsi, rax
        adcx	rbx, rcx
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[8] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	QWORD PTR [r8+8], r11
        mov	r10, QWORD PTR [r8+16]
        mov	r11, QWORD PTR [r8+24]
        ; A[9] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+16], r10
        mov	QWORD PTR [r8+24], r11
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        ; A[10] x A[10]
        mov	rdx, QWORD PTR [r9+80]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+32], r10
        mov	QWORD PTR [r8+40], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        ; A[11] x A[11]
        mov	rdx, QWORD PTR [r9+88]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+48], r10
        mov	QWORD PTR [r8+56], r11
        mov	r10, QWORD PTR [r8+64]
        mov	r11, QWORD PTR [r8+72]
        ; A[12] x A[12]
        mov	rdx, QWORD PTR [r9+96]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+64], r10
        mov	QWORD PTR [r8+72], r11
        mov	r10, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [r8+88]
        ; A[13] x A[13]
        mov	rdx, QWORD PTR [r9+104]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+80], r10
        mov	QWORD PTR [r8+88], r11
        mov	r10, QWORD PTR [r8+96]
        mov	r11, QWORD PTR [r8+104]
        ; A[14] x A[14]
        mov	rdx, QWORD PTR [r9+112]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+96], r10
        mov	QWORD PTR [r8+104], r11
        mov	r10, QWORD PTR [r8+112]
        mov	r11, QWORD PTR [r8+120]
        ; A[15] x A[15]
        mov	rdx, QWORD PTR [r9+120]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+112], r10
        mov	QWORD PTR [r8+120], r11
        mov	QWORD PTR [r8+-32], r15
        mov	QWORD PTR [r8+-24], rdi
        mov	QWORD PTR [r8+-16], rsi
        mov	QWORD PTR [r8+-8], rbx
        sub	r8, 128
        cmp	r9, r8
        jne	L_end_2048_sqr_avx2_16
        vmovdqu	xmm0, OWORD PTR [rbp]
        vmovups	OWORD PTR [r8], xmm0
        vmovdqu	xmm0, OWORD PTR [rbp+16]
        vmovups	OWORD PTR [r8+16], xmm0
        vmovdqu	xmm0, OWORD PTR [rbp+32]
        vmovups	OWORD PTR [r8+32], xmm0
        vmovdqu	xmm0, OWORD PTR [rbp+48]
        vmovups	OWORD PTR [r8+48], xmm0
        vmovdqu	xmm0, OWORD PTR [rbp+64]
        vmovups	OWORD PTR [r8+64], xmm0
        vmovdqu	xmm0, OWORD PTR [rbp+80]
        vmovups	OWORD PTR [r8+80], xmm0
L_end_2048_sqr_avx2_16:
        add	rsp, 128
        pop	rbx
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        pop	rbp
        ret
sp_2048_sqr_avx2_16 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
;  *
;  * Karatsuba: ah^2, al^2, (al - ah)^2
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_sqr_32 PROC
        sub	rsp, 272
        mov	QWORD PTR [rsp+256], rcx
        mov	QWORD PTR [rsp+264], rdx
        mov	r9, 0
        mov	r10, rsp
        lea	r11, QWORD PTR [rdx+128]
        mov	rax, QWORD PTR [rdx]
        sub	rax, QWORD PTR [r11]
        mov	r8, QWORD PTR [rdx+8]
        mov	QWORD PTR [r10], rax
        sbb	r8, QWORD PTR [r11+8]
        mov	rax, QWORD PTR [rdx+16]
        mov	QWORD PTR [r10+8], r8
        sbb	rax, QWORD PTR [r11+16]
        mov	r8, QWORD PTR [rdx+24]
        mov	QWORD PTR [r10+16], rax
        sbb	r8, QWORD PTR [r11+24]
        mov	rax, QWORD PTR [rdx+32]
        mov	QWORD PTR [r10+24], r8
        sbb	rax, QWORD PTR [r11+32]
        mov	r8, QWORD PTR [rdx+40]
        mov	QWORD PTR [r10+32], rax
        sbb	r8, QWORD PTR [r11+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r10+40], r8
        sbb	rax, QWORD PTR [r11+48]
        mov	r8, QWORD PTR [rdx+56]
        mov	QWORD PTR [r10+48], rax
        sbb	r8, QWORD PTR [r11+56]
        mov	rax, QWORD PTR [rdx+64]
        mov	QWORD PTR [r10+56], r8
        sbb	rax, QWORD PTR [r11+64]
        mov	r8, QWORD PTR [rdx+72]
        mov	QWORD PTR [r10+64], rax
        sbb	r8, QWORD PTR [r11+72]
        mov	rax, QWORD PTR [rdx+80]
        mov	QWORD PTR [r10+72], r8
        sbb	rax, QWORD PTR [r11+80]
        mov	r8, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+80], rax
        sbb	r8, QWORD PTR [r11+88]
        mov	rax, QWORD PTR [rdx+96]
        mov	QWORD PTR [r10+88], r8
        sbb	rax, QWORD PTR [r11+96]
        mov	r8, QWORD PTR [rdx+104]
        mov	QWORD PTR [r10+96], rax
        sbb	r8, QWORD PTR [r11+104]
        mov	rax, QWORD PTR [rdx+112]
        mov	QWORD PTR [r10+104], r8
        sbb	rax, QWORD PTR [r11+112]
        mov	r8, QWORD PTR [rdx+120]
        mov	QWORD PTR [r10+112], rax
        sbb	r8, QWORD PTR [r11+120]
        mov	QWORD PTR [r10+120], r8
        sbb	r9, 0
        ; Cond Negate
        mov	rax, QWORD PTR [r10]
        mov	r11, r9
        xor	rax, r9
        neg	r11
        sub	rax, r9
        mov	r8, QWORD PTR [r10+8]
        sbb	r11, 0
        mov	QWORD PTR [r10], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+16]
        setc	r11b
        mov	QWORD PTR [r10+8], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+24]
        setc	r11b
        mov	QWORD PTR [r10+16], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+32]
        setc	r11b
        mov	QWORD PTR [r10+24], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+40]
        setc	r11b
        mov	QWORD PTR [r10+32], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+48]
        setc	r11b
        mov	QWORD PTR [r10+40], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+56]
        setc	r11b
        mov	QWORD PTR [r10+48], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+64]
        setc	r11b
        mov	QWORD PTR [r10+56], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+72]
        setc	r11b
        mov	QWORD PTR [r10+64], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+80]
        setc	r11b
        mov	QWORD PTR [r10+72], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+88]
        setc	r11b
        mov	QWORD PTR [r10+80], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+96]
        setc	r11b
        mov	QWORD PTR [r10+88], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+104]
        setc	r11b
        mov	QWORD PTR [r10+96], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+112]
        setc	r11b
        mov	QWORD PTR [r10+104], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+120]
        setc	r11b
        mov	QWORD PTR [r10+112], rax
        xor	r8, r9
        add	r8, r11
        mov	QWORD PTR [r10+120], r8
        mov	rdx, r10
        mov	rcx, rsp
        call	sp_2048_sqr_16
        mov	rdx, QWORD PTR [rsp+264]
        mov	rcx, QWORD PTR [rsp+256]
        add	rdx, 128
        add	rcx, 256
        call	sp_2048_sqr_16
        mov	rdx, QWORD PTR [rsp+264]
        mov	rcx, QWORD PTR [rsp+256]
        call	sp_2048_sqr_16
IFDEF _WIN64
        mov	rdx, QWORD PTR [rsp+264]
        mov	rcx, QWORD PTR [rsp+256]
ENDIF
        mov	rdx, QWORD PTR [rsp+256]
        lea	r10, QWORD PTR [rsp+128]
        add	rdx, 384
        mov	r9, 0
        mov	r8, QWORD PTR [r10+-128]
        sub	r8, QWORD PTR [rdx+-128]
        mov	rax, QWORD PTR [r10+-120]
        mov	QWORD PTR [r10+-128], r8
        sbb	rax, QWORD PTR [rdx+-120]
        mov	r8, QWORD PTR [r10+-112]
        mov	QWORD PTR [r10+-120], rax
        sbb	r8, QWORD PTR [rdx+-112]
        mov	rax, QWORD PTR [r10+-104]
        mov	QWORD PTR [r10+-112], r8
        sbb	rax, QWORD PTR [rdx+-104]
        mov	r8, QWORD PTR [r10+-96]
        mov	QWORD PTR [r10+-104], rax
        sbb	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [r10+96]
        mov	QWORD PTR [r10+88], rax
        sbb	r8, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [r10+104]
        mov	QWORD PTR [r10+96], r8
        sbb	rax, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [r10+112]
        mov	QWORD PTR [r10+104], rax
        sbb	r8, QWORD PTR [rdx+112]
        mov	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [r10+112], r8
        sbb	rax, QWORD PTR [rdx+120]
        mov	QWORD PTR [r10+120], rax
        sbb	r9, 0
        sub	rdx, 256
        mov	r8, QWORD PTR [r10+-128]
        sub	r8, QWORD PTR [rdx+-128]
        mov	rax, QWORD PTR [r10+-120]
        mov	QWORD PTR [r10+-128], r8
        sbb	rax, QWORD PTR [rdx+-120]
        mov	r8, QWORD PTR [r10+-112]
        mov	QWORD PTR [r10+-120], rax
        sbb	r8, QWORD PTR [rdx+-112]
        mov	rax, QWORD PTR [r10+-104]
        mov	QWORD PTR [r10+-112], r8
        sbb	rax, QWORD PTR [rdx+-104]
        mov	r8, QWORD PTR [r10+-96]
        mov	QWORD PTR [r10+-104], rax
        sbb	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [r10+96]
        mov	QWORD PTR [r10+88], rax
        sbb	r8, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [r10+104]
        mov	QWORD PTR [r10+96], r8
        sbb	rax, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [r10+112]
        mov	QWORD PTR [r10+104], rax
        sbb	r8, QWORD PTR [rdx+112]
        mov	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [r10+112], r8
        sbb	rax, QWORD PTR [rdx+120]
        mov	QWORD PTR [r10+120], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+256]
        neg	r9
        add	rcx, 256
        mov	r8, QWORD PTR [rcx+-128]
        sub	r8, QWORD PTR [r10+-128]
        mov	rax, QWORD PTR [rcx+-120]
        mov	QWORD PTR [rcx+-128], r8
        sbb	rax, QWORD PTR [r10+-120]
        mov	r8, QWORD PTR [rcx+-112]
        mov	QWORD PTR [rcx+-120], rax
        sbb	r8, QWORD PTR [r10+-112]
        mov	rax, QWORD PTR [rcx+-104]
        mov	QWORD PTR [rcx+-112], r8
        sbb	rax, QWORD PTR [r10+-104]
        mov	r8, QWORD PTR [rcx+-96]
        mov	QWORD PTR [rcx+-104], rax
        sbb	r8, QWORD PTR [r10+-96]
        mov	rax, QWORD PTR [rcx+-88]
        mov	QWORD PTR [rcx+-96], r8
        sbb	rax, QWORD PTR [r10+-88]
        mov	r8, QWORD PTR [rcx+-80]
        mov	QWORD PTR [rcx+-88], rax
        sbb	r8, QWORD PTR [r10+-80]
        mov	rax, QWORD PTR [rcx+-72]
        mov	QWORD PTR [rcx+-80], r8
        sbb	rax, QWORD PTR [r10+-72]
        mov	r8, QWORD PTR [rcx+-64]
        mov	QWORD PTR [rcx+-72], rax
        sbb	r8, QWORD PTR [r10+-64]
        mov	rax, QWORD PTR [rcx+-56]
        mov	QWORD PTR [rcx+-64], r8
        sbb	rax, QWORD PTR [r10+-56]
        mov	r8, QWORD PTR [rcx+-48]
        mov	QWORD PTR [rcx+-56], rax
        sbb	r8, QWORD PTR [r10+-48]
        mov	rax, QWORD PTR [rcx+-40]
        mov	QWORD PTR [rcx+-48], r8
        sbb	rax, QWORD PTR [r10+-40]
        mov	r8, QWORD PTR [rcx+-32]
        mov	QWORD PTR [rcx+-40], rax
        sbb	r8, QWORD PTR [r10+-32]
        mov	rax, QWORD PTR [rcx+-24]
        mov	QWORD PTR [rcx+-32], r8
        sbb	rax, QWORD PTR [r10+-24]
        mov	r8, QWORD PTR [rcx+-16]
        mov	QWORD PTR [rcx+-24], rax
        sbb	r8, QWORD PTR [r10+-16]
        mov	rax, QWORD PTR [rcx+-8]
        mov	QWORD PTR [rcx+-16], r8
        sbb	rax, QWORD PTR [r10+-8]
        mov	r8, QWORD PTR [rcx]
        mov	QWORD PTR [rcx+-8], rax
        sbb	r8, QWORD PTR [r10]
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	rax, QWORD PTR [r10+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        sbb	r8, QWORD PTR [r10+16]
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	rax, QWORD PTR [r10+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        sbb	r8, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	rax, QWORD PTR [r10+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        sbb	r8, QWORD PTR [r10+48]
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	rax, QWORD PTR [r10+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        sbb	r8, QWORD PTR [r10+64]
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	rax, QWORD PTR [r10+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        sbb	r8, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	rax, QWORD PTR [r10+88]
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], rax
        sbb	r8, QWORD PTR [r10+96]
        mov	rax, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        sbb	rax, QWORD PTR [r10+104]
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], rax
        sbb	r8, QWORD PTR [r10+112]
        mov	rax, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        sbb	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [rcx+120], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+256]
        add	rcx, 384
        ; Add in word
        mov	r8, QWORD PTR [rcx]
        add	r8, r9
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        adc	rax, 0
        mov	QWORD PTR [rcx+120], rax
        mov	rdx, QWORD PTR [rsp+264]
        mov	rcx, QWORD PTR [rsp+256]
        add	rsp, 272
        ret
sp_2048_sqr_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
;  *
;  * Karatsuba: ah^2, al^2, (al - ah)^2
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_sqr_avx2_32 PROC
        sub	rsp, 272
        mov	QWORD PTR [rsp+256], rcx
        mov	QWORD PTR [rsp+264], rdx
        mov	r9, 0
        mov	r10, rsp
        lea	r11, QWORD PTR [rdx+128]
        mov	rax, QWORD PTR [rdx]
        sub	rax, QWORD PTR [r11]
        mov	r8, QWORD PTR [rdx+8]
        mov	QWORD PTR [r10], rax
        sbb	r8, QWORD PTR [r11+8]
        mov	rax, QWORD PTR [rdx+16]
        mov	QWORD PTR [r10+8], r8
        sbb	rax, QWORD PTR [r11+16]
        mov	r8, QWORD PTR [rdx+24]
        mov	QWORD PTR [r10+16], rax
        sbb	r8, QWORD PTR [r11+24]
        mov	rax, QWORD PTR [rdx+32]
        mov	QWORD PTR [r10+24], r8
        sbb	rax, QWORD PTR [r11+32]
        mov	r8, QWORD PTR [rdx+40]
        mov	QWORD PTR [r10+32], rax
        sbb	r8, QWORD PTR [r11+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r10+40], r8
        sbb	rax, QWORD PTR [r11+48]
        mov	r8, QWORD PTR [rdx+56]
        mov	QWORD PTR [r10+48], rax
        sbb	r8, QWORD PTR [r11+56]
        mov	rax, QWORD PTR [rdx+64]
        mov	QWORD PTR [r10+56], r8
        sbb	rax, QWORD PTR [r11+64]
        mov	r8, QWORD PTR [rdx+72]
        mov	QWORD PTR [r10+64], rax
        sbb	r8, QWORD PTR [r11+72]
        mov	rax, QWORD PTR [rdx+80]
        mov	QWORD PTR [r10+72], r8
        sbb	rax, QWORD PTR [r11+80]
        mov	r8, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+80], rax
        sbb	r8, QWORD PTR [r11+88]
        mov	rax, QWORD PTR [rdx+96]
        mov	QWORD PTR [r10+88], r8
        sbb	rax, QWORD PTR [r11+96]
        mov	r8, QWORD PTR [rdx+104]
        mov	QWORD PTR [r10+96], rax
        sbb	r8, QWORD PTR [r11+104]
        mov	rax, QWORD PTR [rdx+112]
        mov	QWORD PTR [r10+104], r8
        sbb	rax, QWORD PTR [r11+112]
        mov	r8, QWORD PTR [rdx+120]
        mov	QWORD PTR [r10+112], rax
        sbb	r8, QWORD PTR [r11+120]
        mov	QWORD PTR [r10+120], r8
        sbb	r9, 0
        ; Cond Negate
        mov	rax, QWORD PTR [r10]
        mov	r11, r9
        xor	rax, r9
        neg	r11
        sub	rax, r9
        mov	r8, QWORD PTR [r10+8]
        sbb	r11, 0
        mov	QWORD PTR [r10], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+16]
        setc	r11b
        mov	QWORD PTR [r10+8], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+24]
        setc	r11b
        mov	QWORD PTR [r10+16], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+32]
        setc	r11b
        mov	QWORD PTR [r10+24], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+40]
        setc	r11b
        mov	QWORD PTR [r10+32], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+48]
        setc	r11b
        mov	QWORD PTR [r10+40], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+56]
        setc	r11b
        mov	QWORD PTR [r10+48], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+64]
        setc	r11b
        mov	QWORD PTR [r10+56], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+72]
        setc	r11b
        mov	QWORD PTR [r10+64], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+80]
        setc	r11b
        mov	QWORD PTR [r10+72], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+88]
        setc	r11b
        mov	QWORD PTR [r10+80], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+96]
        setc	r11b
        mov	QWORD PTR [r10+88], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+104]
        setc	r11b
        mov	QWORD PTR [r10+96], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+112]
        setc	r11b
        mov	QWORD PTR [r10+104], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+120]
        setc	r11b
        mov	QWORD PTR [r10+112], rax
        xor	r8, r9
        add	r8, r11
        mov	QWORD PTR [r10+120], r8
        mov	rdx, r10
        mov	rcx, rsp
        call	sp_2048_sqr_avx2_16
        mov	rdx, QWORD PTR [rsp+264]
        mov	rcx, QWORD PTR [rsp+256]
        add	rdx, 128
        add	rcx, 256
        call	sp_2048_sqr_avx2_16
        mov	rdx, QWORD PTR [rsp+264]
        mov	rcx, QWORD PTR [rsp+256]
        call	sp_2048_sqr_avx2_16
IFDEF _WIN64
        mov	rdx, QWORD PTR [rsp+264]
        mov	rcx, QWORD PTR [rsp+256]
ENDIF
        mov	rdx, QWORD PTR [rsp+256]
        lea	r10, QWORD PTR [rsp+128]
        add	rdx, 384
        mov	r9, 0
        mov	r8, QWORD PTR [r10+-128]
        sub	r8, QWORD PTR [rdx+-128]
        mov	rax, QWORD PTR [r10+-120]
        mov	QWORD PTR [r10+-128], r8
        sbb	rax, QWORD PTR [rdx+-120]
        mov	r8, QWORD PTR [r10+-112]
        mov	QWORD PTR [r10+-120], rax
        sbb	r8, QWORD PTR [rdx+-112]
        mov	rax, QWORD PTR [r10+-104]
        mov	QWORD PTR [r10+-112], r8
        sbb	rax, QWORD PTR [rdx+-104]
        mov	r8, QWORD PTR [r10+-96]
        mov	QWORD PTR [r10+-104], rax
        sbb	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [r10+96]
        mov	QWORD PTR [r10+88], rax
        sbb	r8, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [r10+104]
        mov	QWORD PTR [r10+96], r8
        sbb	rax, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [r10+112]
        mov	QWORD PTR [r10+104], rax
        sbb	r8, QWORD PTR [rdx+112]
        mov	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [r10+112], r8
        sbb	rax, QWORD PTR [rdx+120]
        mov	QWORD PTR [r10+120], rax
        sbb	r9, 0
        sub	rdx, 256
        mov	r8, QWORD PTR [r10+-128]
        sub	r8, QWORD PTR [rdx+-128]
        mov	rax, QWORD PTR [r10+-120]
        mov	QWORD PTR [r10+-128], r8
        sbb	rax, QWORD PTR [rdx+-120]
        mov	r8, QWORD PTR [r10+-112]
        mov	QWORD PTR [r10+-120], rax
        sbb	r8, QWORD PTR [rdx+-112]
        mov	rax, QWORD PTR [r10+-104]
        mov	QWORD PTR [r10+-112], r8
        sbb	rax, QWORD PTR [rdx+-104]
        mov	r8, QWORD PTR [r10+-96]
        mov	QWORD PTR [r10+-104], rax
        sbb	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [r10+96]
        mov	QWORD PTR [r10+88], rax
        sbb	r8, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [r10+104]
        mov	QWORD PTR [r10+96], r8
        sbb	rax, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [r10+112]
        mov	QWORD PTR [r10+104], rax
        sbb	r8, QWORD PTR [rdx+112]
        mov	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [r10+112], r8
        sbb	rax, QWORD PTR [rdx+120]
        mov	QWORD PTR [r10+120], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+256]
        neg	r9
        add	rcx, 256
        mov	r8, QWORD PTR [rcx+-128]
        sub	r8, QWORD PTR [r10+-128]
        mov	rax, QWORD PTR [rcx+-120]
        mov	QWORD PTR [rcx+-128], r8
        sbb	rax, QWORD PTR [r10+-120]
        mov	r8, QWORD PTR [rcx+-112]
        mov	QWORD PTR [rcx+-120], rax
        sbb	r8, QWORD PTR [r10+-112]
        mov	rax, QWORD PTR [rcx+-104]
        mov	QWORD PTR [rcx+-112], r8
        sbb	rax, QWORD PTR [r10+-104]
        mov	r8, QWORD PTR [rcx+-96]
        mov	QWORD PTR [rcx+-104], rax
        sbb	r8, QWORD PTR [r10+-96]
        mov	rax, QWORD PTR [rcx+-88]
        mov	QWORD PTR [rcx+-96], r8
        sbb	rax, QWORD PTR [r10+-88]
        mov	r8, QWORD PTR [rcx+-80]
        mov	QWORD PTR [rcx+-88], rax
        sbb	r8, QWORD PTR [r10+-80]
        mov	rax, QWORD PTR [rcx+-72]
        mov	QWORD PTR [rcx+-80], r8
        sbb	rax, QWORD PTR [r10+-72]
        mov	r8, QWORD PTR [rcx+-64]
        mov	QWORD PTR [rcx+-72], rax
        sbb	r8, QWORD PTR [r10+-64]
        mov	rax, QWORD PTR [rcx+-56]
        mov	QWORD PTR [rcx+-64], r8
        sbb	rax, QWORD PTR [r10+-56]
        mov	r8, QWORD PTR [rcx+-48]
        mov	QWORD PTR [rcx+-56], rax
        sbb	r8, QWORD PTR [r10+-48]
        mov	rax, QWORD PTR [rcx+-40]
        mov	QWORD PTR [rcx+-48], r8
        sbb	rax, QWORD PTR [r10+-40]
        mov	r8, QWORD PTR [rcx+-32]
        mov	QWORD PTR [rcx+-40], rax
        sbb	r8, QWORD PTR [r10+-32]
        mov	rax, QWORD PTR [rcx+-24]
        mov	QWORD PTR [rcx+-32], r8
        sbb	rax, QWORD PTR [r10+-24]
        mov	r8, QWORD PTR [rcx+-16]
        mov	QWORD PTR [rcx+-24], rax
        sbb	r8, QWORD PTR [r10+-16]
        mov	rax, QWORD PTR [rcx+-8]
        mov	QWORD PTR [rcx+-16], r8
        sbb	rax, QWORD PTR [r10+-8]
        mov	r8, QWORD PTR [rcx]
        mov	QWORD PTR [rcx+-8], rax
        sbb	r8, QWORD PTR [r10]
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	rax, QWORD PTR [r10+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        sbb	r8, QWORD PTR [r10+16]
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	rax, QWORD PTR [r10+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        sbb	r8, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	rax, QWORD PTR [r10+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        sbb	r8, QWORD PTR [r10+48]
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	rax, QWORD PTR [r10+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        sbb	r8, QWORD PTR [r10+64]
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	rax, QWORD PTR [r10+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        sbb	r8, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	rax, QWORD PTR [r10+88]
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], rax
        sbb	r8, QWORD PTR [r10+96]
        mov	rax, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        sbb	rax, QWORD PTR [r10+104]
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], rax
        sbb	r8, QWORD PTR [r10+112]
        mov	rax, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        sbb	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [rcx+120], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+256]
        add	rcx, 384
        ; Add in word
        mov	r8, QWORD PTR [rcx]
        add	r8, r9
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        adc	rax, 0
        mov	QWORD PTR [rcx+120], rax
        mov	rdx, QWORD PTR [rsp+264]
        mov	rcx, QWORD PTR [rsp+256]
        add	rsp, 272
        ret
sp_2048_sqr_avx2_32 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
;  *
;  * a  A single precision integer and result.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_sub_in_place_16 PROC
        mov	r8, QWORD PTR [rcx]
        sub	r8, QWORD PTR [rdx]
        mov	r9, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	r9, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], r9
        sbb	r8, QWORD PTR [rdx+16]
        mov	r9, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	r9, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], r9
        sbb	r8, QWORD PTR [rdx+32]
        mov	r9, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	r9, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], r9
        sbb	r8, QWORD PTR [rdx+48]
        mov	r9, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	r9, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], r9
        sbb	r8, QWORD PTR [rdx+64]
        mov	r9, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	r9, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], r9
        sbb	r8, QWORD PTR [rdx+80]
        mov	r9, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	r9, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], r9
        sbb	r8, QWORD PTR [rdx+96]
        mov	r9, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        sbb	r9, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], r9
        sbb	r8, QWORD PTR [rdx+112]
        mov	r9, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        sbb	r9, QWORD PTR [rdx+120]
        mov	QWORD PTR [rcx+120], r9
        sbb	rax, rax
        ret
sp_2048_sub_in_place_16 ENDP
_text ENDS
; /* Mul a by digit b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision digit.
;  */
_text SEGMENT READONLY PARA
sp_2048_mul_d_32 PROC
        push	r12
        mov	r9, rdx
        ; A[0] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9]
        mov	r10, rax
        mov	r11, rdx
        mov	QWORD PTR [rcx], r10
        ; A[1] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+8]
        add	r11, rax
        mov	QWORD PTR [rcx+8], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+16]
        add	r12, rax
        mov	QWORD PTR [rcx+16], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+24]
        add	r10, rax
        mov	QWORD PTR [rcx+24], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+32]
        add	r11, rax
        mov	QWORD PTR [rcx+32], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+40]
        add	r12, rax
        mov	QWORD PTR [rcx+40], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+48]
        add	r10, rax
        mov	QWORD PTR [rcx+48], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+56]
        add	r11, rax
        mov	QWORD PTR [rcx+56], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+64]
        add	r12, rax
        mov	QWORD PTR [rcx+64], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+72]
        add	r10, rax
        mov	QWORD PTR [rcx+72], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+80]
        add	r11, rax
        mov	QWORD PTR [rcx+80], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+88]
        add	r12, rax
        mov	QWORD PTR [rcx+88], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+96]
        add	r10, rax
        mov	QWORD PTR [rcx+96], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+104]
        add	r11, rax
        mov	QWORD PTR [rcx+104], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+112]
        add	r12, rax
        mov	QWORD PTR [rcx+112], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+120]
        add	r10, rax
        mov	QWORD PTR [rcx+120], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[16] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+128]
        add	r11, rax
        mov	QWORD PTR [rcx+128], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[17] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+136]
        add	r12, rax
        mov	QWORD PTR [rcx+136], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[18] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+144]
        add	r10, rax
        mov	QWORD PTR [rcx+144], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[19] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+152]
        add	r11, rax
        mov	QWORD PTR [rcx+152], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[20] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+160]
        add	r12, rax
        mov	QWORD PTR [rcx+160], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[21] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+168]
        add	r10, rax
        mov	QWORD PTR [rcx+168], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[22] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+176]
        add	r11, rax
        mov	QWORD PTR [rcx+176], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[23] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+184]
        add	r12, rax
        mov	QWORD PTR [rcx+184], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[24] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+192]
        add	r10, rax
        mov	QWORD PTR [rcx+192], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[25] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+200]
        add	r11, rax
        mov	QWORD PTR [rcx+200], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[26] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+208]
        add	r12, rax
        mov	QWORD PTR [rcx+208], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[27] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+216]
        add	r10, rax
        mov	QWORD PTR [rcx+216], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[28] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+224]
        add	r11, rax
        mov	QWORD PTR [rcx+224], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[29] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+232]
        add	r12, rax
        mov	QWORD PTR [rcx+232], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[30] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+240]
        add	r10, rax
        mov	QWORD PTR [rcx+240], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[31] * B
        mov	rax, r8
        mul	QWORD PTR [r9+248]
        add	r11, rax
        adc	r12, rdx
        mov	QWORD PTR [rcx+248], r11
        mov	QWORD PTR [rcx+256], r12
        pop	r12
        ret
sp_2048_mul_d_32 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
;  * m is -1 to subtract and 0 when not copying.
;  *
;  * r  A single precision number representing condition subtract result.
;  * a  A single precision number to subtract from.
;  * b  A single precision number to subtract.
;  * m  Mask value to apply.
;  */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_16 PROC
        sub	rsp, 128
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp], r10
        mov	QWORD PTR [rsp+8], r11
        mov	r10, QWORD PTR [r8+16]
        mov	r11, QWORD PTR [r8+24]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+16], r10
        mov	QWORD PTR [rsp+24], r11
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+32], r10
        mov	QWORD PTR [rsp+40], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+48], r10
        mov	QWORD PTR [rsp+56], r11
        mov	r10, QWORD PTR [r8+64]
        mov	r11, QWORD PTR [r8+72]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+64], r10
        mov	QWORD PTR [rsp+72], r11
        mov	r10, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [r8+88]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+80], r10
        mov	QWORD PTR [rsp+88], r11
        mov	r10, QWORD PTR [r8+96]
        mov	r11, QWORD PTR [r8+104]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+96], r10
        mov	QWORD PTR [rsp+104], r11
        mov	r10, QWORD PTR [r8+112]
        mov	r11, QWORD PTR [r8+120]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+112], r10
        mov	QWORD PTR [rsp+120], r11
        mov	r10, QWORD PTR [rdx]
        mov	r8, QWORD PTR [rsp]
        sub	r10, r8
        mov	r11, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [rsp+8]
        sbb	r11, r8
        mov	QWORD PTR [rcx], r10
        mov	r10, QWORD PTR [rdx+16]
        mov	r8, QWORD PTR [rsp+16]
        sbb	r10, r8
        mov	QWORD PTR [rcx+8], r11
        mov	r11, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rsp+24]
        sbb	r11, r8
        mov	QWORD PTR [rcx+16], r10
        mov	r10, QWORD PTR [rdx+32]
        mov	r8, QWORD PTR [rsp+32]
        sbb	r10, r8
        mov	QWORD PTR [rcx+24], r11
        mov	r11, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [rsp+40]
        sbb	r11, r8
        mov	QWORD PTR [rcx+32], r10
        mov	r10, QWORD PTR [rdx+48]
        mov	r8, QWORD PTR [rsp+48]
        sbb	r10, r8
        mov	QWORD PTR [rcx+40], r11
        mov	r11, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rsp+56]
        sbb	r11, r8
        mov	QWORD PTR [rcx+48], r10
        mov	r10, QWORD PTR [rdx+64]
        mov	r8, QWORD PTR [rsp+64]
        sbb	r10, r8
        mov	QWORD PTR [rcx+56], r11
        mov	r11, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [rsp+72]
        sbb	r11, r8
        mov	QWORD PTR [rcx+64], r10
        mov	r10, QWORD PTR [rdx+80]
        mov	r8, QWORD PTR [rsp+80]
        sbb	r10, r8
        mov	QWORD PTR [rcx+72], r11
        mov	r11, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rsp+88]
        sbb	r11, r8
        mov	QWORD PTR [rcx+80], r10
        mov	r10, QWORD PTR [rdx+96]
        mov	r8, QWORD PTR [rsp+96]
        sbb	r10, r8
        mov	QWORD PTR [rcx+88], r11
        mov	r11, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [rsp+104]
        sbb	r11, r8
        mov	QWORD PTR [rcx+96], r10
        mov	r10, QWORD PTR [rdx+112]
        mov	r8, QWORD PTR [rsp+112]
        sbb	r10, r8
        mov	QWORD PTR [rcx+104], r11
        mov	r11, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [rsp+120]
        sbb	r11, r8
        mov	QWORD PTR [rcx+112], r10
        mov	QWORD PTR [rcx+120], r11
        sbb	rax, rax
        add	rsp, 128
        ret
sp_2048_cond_sub_16 ENDP
_text ENDS
; /* Reduce the number back to 2048 bits using Montgomery reduction.
;  *
;  * a   A single precision number to reduce in place.
;  * m   The single precision number representing the modulus.
;  * mp  The digit representing the negative inverse of m mod 2^n.
;  */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_16 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        mov	r9, rdx
        xor	rsi, rsi
        ; i = 16
        mov	r10, 16
        mov	r15, QWORD PTR [rcx]
        mov	rdi, QWORD PTR [rcx+8]
L_2048_mont_reduce_16_loop:
        ; mu = a[i] * mp
        mov	r13, r15
        imul	r13, r8
        ; a[i+0] += m[0] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9]
        add	r15, rax
        adc	r12, rdx
        ; a[i+1] += m[1] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+8]
        mov	r15, rdi
        add	r15, rax
        adc	r11, rdx
        add	r15, r12
        adc	r11, 0
        ; a[i+2] += m[2] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+16]
        mov	rdi, QWORD PTR [rcx+16]
        add	rdi, rax
        adc	r12, rdx
        add	rdi, r11
        adc	r12, 0
        ; a[i+3] += m[3] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+24]
        mov	r14, QWORD PTR [rcx+24]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+24], r14
        adc	r11, 0
        ; a[i+4] += m[4] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+32]
        mov	r14, QWORD PTR [rcx+32]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+32], r14
        adc	r12, 0
        ; a[i+5] += m[5] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+40]
        mov	r14, QWORD PTR [rcx+40]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+40], r14
        adc	r11, 0
        ; a[i+6] += m[6] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+48]
        mov	r14, QWORD PTR [rcx+48]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+48], r14
        adc	r12, 0
        ; a[i+7] += m[7] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+56]
        mov	r14, QWORD PTR [rcx+56]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+56], r14
        adc	r11, 0
        ; a[i+8] += m[8] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+64]
        mov	r14, QWORD PTR [rcx+64]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+64], r14
        adc	r12, 0
        ; a[i+9] += m[9] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+72]
        mov	r14, QWORD PTR [rcx+72]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+72], r14
        adc	r11, 0
        ; a[i+10] += m[10] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+80]
        mov	r14, QWORD PTR [rcx+80]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+80], r14
        adc	r12, 0
        ; a[i+11] += m[11] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+88]
        mov	r14, QWORD PTR [rcx+88]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+88], r14
        adc	r11, 0
        ; a[i+12] += m[12] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+96]
        mov	r14, QWORD PTR [rcx+96]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+96], r14
        adc	r12, 0
        ; a[i+13] += m[13] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+104]
        mov	r14, QWORD PTR [rcx+104]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+104], r14
        adc	r11, 0
        ; a[i+14] += m[14] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+112]
        mov	r14, QWORD PTR [rcx+112]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+112], r14
        adc	r12, 0
        ; a[i+15] += m[15] * mu
        mov	rax, r13
        mul	QWORD PTR [r9+120]
        mov	r14, QWORD PTR [rcx+120]
        add	r12, rax
        adc	rdx, rsi
        mov	rsi, 0
        adc	rsi, 0
        add	r14, r12
        mov	QWORD PTR [rcx+120], r14
        adc	QWORD PTR [rcx+128], rdx
        adc	rsi, 0
        ; i -= 1
        add	rcx, 8
        dec	r10
        jnz	L_2048_mont_reduce_16_loop
        mov	QWORD PTR [rcx], r15
        mov	QWORD PTR [rcx+8], rdi
        neg	rsi
IFDEF _WIN64
        mov	r8, r9
        mov	r9, rsi
ELSE
        mov	r9, rsi
        mov	r8, r9
ENDIF
        mov	rdx, rcx
        mov	rcx, rcx
        sub	rcx, 128
        call	sp_2048_cond_sub_16
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_2048_mont_reduce_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
;  * m is -1 to subtract and 0 when not copying.
;  *
;  * r  A single precision number representing condition subtract result.
;  * a  A single precision number to subtract from.
;  * b  A single precision number to subtract.
;  * m  Mask value to apply.
;  */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_avx2_16 PROC
        push	r12
        mov	r12, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx]
        pext	r12, r12, r9
        sub	r10, r12
        mov	r12, QWORD PTR [r8+8]
        mov	r11, QWORD PTR [rdx+8]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+16]
        mov	r12, QWORD PTR [rdx+16]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+8], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [rdx+24]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+16], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [rdx+32]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+24], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [rdx+40]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+32], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+48]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+40], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+56]
        mov	r11, QWORD PTR [rdx+56]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+48], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+64]
        mov	r12, QWORD PTR [rdx+64]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+56], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+72]
        mov	r10, QWORD PTR [rdx+72]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+64], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [rdx+80]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+72], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+88]
        mov	r12, QWORD PTR [rdx+88]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+80], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+96]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+88], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+104]
        mov	r11, QWORD PTR [rdx+104]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+96], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+112]
        mov	r12, QWORD PTR [rdx+112]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+104], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+120]
        mov	r10, QWORD PTR [rdx+120]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+112], r12
        sbb	r10, r11
        mov	QWORD PTR [rcx+120], r10
        sbb	rax, rax
        pop	r12
        ret
sp_2048_cond_sub_avx2_16 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision digit.
;  */
_text SEGMENT READONLY PARA
sp_2048_mul_d_16 PROC
        push	r12
        mov	r9, rdx
        ; A[0] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9]
        mov	r10, rax
        mov	r11, rdx
        mov	QWORD PTR [rcx], r10
        ; A[1] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+8]
        add	r11, rax
        mov	QWORD PTR [rcx+8], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+16]
        add	r12, rax
        mov	QWORD PTR [rcx+16], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+24]
        add	r10, rax
        mov	QWORD PTR [rcx+24], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+32]
        add	r11, rax
        mov	QWORD PTR [rcx+32], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+40]
        add	r12, rax
        mov	QWORD PTR [rcx+40], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+48]
        add	r10, rax
        mov	QWORD PTR [rcx+48], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+56]
        add	r11, rax
        mov	QWORD PTR [rcx+56], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+64]
        add	r12, rax
        mov	QWORD PTR [rcx+64], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+72]
        add	r10, rax
        mov	QWORD PTR [rcx+72], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+80]
        add	r11, rax
        mov	QWORD PTR [rcx+80], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+88]
        add	r12, rax
        mov	QWORD PTR [rcx+88], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+96]
        add	r10, rax
        mov	QWORD PTR [rcx+96], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+104]
        add	r11, rax
        mov	QWORD PTR [rcx+104], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+112]
        add	r12, rax
        mov	QWORD PTR [rcx+112], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B
        mov	rax, r8
        mul	QWORD PTR [r9+120]
        add	r10, rax
        adc	r11, rdx
        mov	QWORD PTR [rcx+120], r10
        mov	QWORD PTR [rcx+128], r11
        pop	r12
        ret
sp_2048_mul_d_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision digit.
;  */
_text SEGMENT READONLY PARA
sp_2048_mul_d_avx2_16 PROC
        push	r12
        push	r13
        mov	rax, rdx
        ; A[0] * B
        mov	rdx, r8
        xor	r13, r13
        mulx	r12, r11, QWORD PTR [rax]
        mov	QWORD PTR [rcx], r11
        ; A[1] * B
        mulx	r10, r9, QWORD PTR [rax+8]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+8], r12
        ; A[2] * B
        mulx	r10, r9, QWORD PTR [rax+16]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+16], r11
        ; A[3] * B
        mulx	r10, r9, QWORD PTR [rax+24]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+24], r12
        ; A[4] * B
        mulx	r10, r9, QWORD PTR [rax+32]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+32], r11
        ; A[5] * B
        mulx	r10, r9, QWORD PTR [rax+40]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+40], r12
        ; A[6] * B
        mulx	r10, r9, QWORD PTR [rax+48]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+48], r11
        ; A[7] * B
        mulx	r10, r9, QWORD PTR [rax+56]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+56], r12
        ; A[8] * B
        mulx	r10, r9, QWORD PTR [rax+64]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+64], r11
        ; A[9] * B
        mulx	r10, r9, QWORD PTR [rax+72]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+72], r12
        ; A[10] * B
        mulx	r10, r9, QWORD PTR [rax+80]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+80], r11
        ; A[11] * B
        mulx	r10, r9, QWORD PTR [rax+88]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+88], r12
        ; A[12] * B
        mulx	r10, r9, QWORD PTR [rax+96]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+96], r11
        ; A[13] * B
        mulx	r10, r9, QWORD PTR [rax+104]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+104], r12
        ; A[14] * B
        mulx	r10, r9, QWORD PTR [rax+112]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+112], r11
        ; A[15] * B
        mulx	r10, r9, QWORD PTR [rax+120]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        adcx	r11, r13
        mov	QWORD PTR [rcx+120], r12
        mov	QWORD PTR [rcx+128], r11
        pop	r13
        pop	r12
        ret
sp_2048_mul_d_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
;  *
;  * d1   The high order half of the number to divide.
;  * d0   The low order half of the number to divide.
;  * div  The dividend.
;  * returns the result of the division.
;  */
_text SEGMENT READONLY PARA
div_2048_word_asm_16 PROC
        mov	r9, rdx
        mov	rax, r9
        mov	rdx, rcx
        div	r8
        ret
div_2048_word_asm_16 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
;  *
;  * a  A single precision integer.
;  * b  A single precision integer.
;  * return -ve, 0 or +ve if a is less than, equal to or greater than b
;  * respectively.
;  */
_text SEGMENT READONLY PARA
sp_2048_cmp_16 PROC
        push	r12
        xor	r9, r9
        mov	r8, -1
        mov	rax, -1
        mov	r10, 1
        mov	r11, QWORD PTR [rcx+120]
        mov	r12, QWORD PTR [rdx+120]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+112]
        mov	r12, QWORD PTR [rdx+112]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+104]
        mov	r12, QWORD PTR [rdx+104]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+96]
        mov	r12, QWORD PTR [rdx+96]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+88]
        mov	r12, QWORD PTR [rdx+88]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+80]
        mov	r12, QWORD PTR [rdx+80]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+72]
        mov	r12, QWORD PTR [rdx+72]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+64]
        mov	r12, QWORD PTR [rdx+64]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+56]
        mov	r12, QWORD PTR [rdx+56]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+48]
        mov	r12, QWORD PTR [rdx+48]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+40]
        mov	r12, QWORD PTR [rdx+40]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+32]
        mov	r12, QWORD PTR [rdx+32]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+24]
        mov	r12, QWORD PTR [rdx+24]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+16]
        mov	r12, QWORD PTR [rdx+16]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+8]
        mov	r12, QWORD PTR [rdx+8]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx]
        mov	r12, QWORD PTR [rdx]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        xor	rax, r8
        pop	r12
        ret
sp_2048_cmp_16 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_2048_get_from_table_16 PROC
        sub	rsp, 128
        vmovdqu	OWORD PTR [rsp], xmm6
        vmovdqu	OWORD PTR [rsp+16], xmm7
        vmovdqu	OWORD PTR [rsp+32], xmm8
        vmovdqu	OWORD PTR [rsp+48], xmm9
        vmovdqu	OWORD PTR [rsp+64], xmm10
        vmovdqu	OWORD PTR [rsp+80], xmm11
        vmovdqu	OWORD PTR [rsp+96], xmm12
        vmovdqu	OWORD PTR [rsp+112], xmm13
        mov	rax, 1
        movd	xmm10, r8
        movd	xmm11, rax
        pxor	xmm13, xmm13
        pshufd	xmm11, xmm11, 0
        pshufd	xmm10, xmm10, 0
        ; START: 0-7
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        add	rcx, 64
        ; END: 0-7
        ; START: 8-15
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        ; END: 8-15
        vmovdqu	xmm6, OWORD PTR [rsp]
        vmovdqu	xmm7, OWORD PTR [rsp+16]
        vmovdqu	xmm8, OWORD PTR [rsp+32]
        vmovdqu	xmm9, OWORD PTR [rsp+48]
        vmovdqu	xmm10, OWORD PTR [rsp+64]
        vmovdqu	xmm11, OWORD PTR [rsp+80]
        vmovdqu	xmm12, OWORD PTR [rsp+96]
        vmovdqu	xmm13, OWORD PTR [rsp+112]
        add	rsp, 128
        ret
sp_2048_get_from_table_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 2048 bits using Montgomery reduction.
;  *
;  * a   A single precision number to reduce in place.
;  * m   The single precision number representing the modulus.
;  * mp  The digit representing the negative inverse of m mod 2^n.
;  */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_avx2_16 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        push	rbx
        push	rbp
        mov	r9, rcx
        mov	r10, rdx
        xor	rbp, rbp
        ; i = 16
        mov	r11, 16
        mov	r14, QWORD PTR [r9]
        mov	r15, QWORD PTR [r9+8]
        mov	rdi, QWORD PTR [r9+16]
        mov	rsi, QWORD PTR [r9+24]
        add	r9, 64
        xor	rbp, rbp
L_2048_mont_reduce_avx2_16_loop:
        ; mu = a[i] * mp
        mov	rdx, r14
        mov	r12, r14
        imul	rdx, r8
        xor	rbx, rbx
        ; a[i+0] += m[0] * mu
        mulx	rcx, rax, QWORD PTR [r10]
        mov	r14, r15
        adcx	r12, rax
        adox	r14, rcx
        ; a[i+1] += m[1] * mu
        mulx	rcx, rax, QWORD PTR [r10+8]
        mov	r15, rdi
        adcx	r14, rax
        adox	r15, rcx
        ; a[i+2] += m[2] * mu
        mulx	rcx, rax, QWORD PTR [r10+16]
        mov	rdi, rsi
        adcx	r15, rax
        adox	rdi, rcx
        ; a[i+3] += m[3] * mu
        mulx	rcx, rax, QWORD PTR [r10+24]
        mov	rsi, QWORD PTR [r9+-32]
        adcx	rdi, rax
        adox	rsi, rcx
        ; a[i+4] += m[4] * mu
        mulx	rcx, rax, QWORD PTR [r10+32]
        mov	r13, QWORD PTR [r9+-24]
        adcx	rsi, rax
        adox	r13, rcx
        ; a[i+5] += m[5] * mu
        mulx	rcx, rax, QWORD PTR [r10+40]
        mov	r12, QWORD PTR [r9+-16]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-24], r13
        ; a[i+6] += m[6] * mu
        mulx	rcx, rax, QWORD PTR [r10+48]
        mov	r13, QWORD PTR [r9+-8]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-16], r12
        ; a[i+7] += m[7] * mu
        mulx	rcx, rax, QWORD PTR [r10+56]
        mov	r12, QWORD PTR [r9]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-8], r13
        ; a[i+8] += m[8] * mu
        mulx	rcx, rax, QWORD PTR [r10+64]
        mov	r13, QWORD PTR [r9+8]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9], r12
        ; a[i+9] += m[9] * mu
        mulx	rcx, rax, QWORD PTR [r10+72]
        mov	r12, QWORD PTR [r9+16]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+8], r13
        ; a[i+10] += m[10] * mu
        mulx	rcx, rax, QWORD PTR [r10+80]
        mov	r13, QWORD PTR [r9+24]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+16], r12
        ; a[i+11] += m[11] * mu
        mulx	rcx, rax, QWORD PTR [r10+88]
        mov	r12, QWORD PTR [r9+32]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+24], r13
        ; a[i+12] += m[12] * mu
        mulx	rcx, rax, QWORD PTR [r10+96]
        mov	r13, QWORD PTR [r9+40]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+32], r12
        ; a[i+13] += m[13] * mu
        mulx	rcx, rax, QWORD PTR [r10+104]
        mov	r12, QWORD PTR [r9+48]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+40], r13
        ; a[i+14] += m[14] * mu
        mulx	rcx, rax, QWORD PTR [r10+112]
        mov	r13, QWORD PTR [r9+56]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+48], r12
        ; a[i+15] += m[15] * mu
        mulx	rcx, rax, QWORD PTR [r10+120]
        mov	r12, QWORD PTR [r9+64]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+56], r13
        adcx	r12, rbp
        mov	rbp, rbx
        mov	QWORD PTR [r9+64], r12
        adox	rbp, rbx
        adcx	rbp, rbx
        ; mu = a[i] * mp
        mov	rdx, r14
        mov	r12, r14
        imul	rdx, r8
        xor	rbx, rbx
        ; a[i+0] += m[0] * mu
        mulx	rcx, rax, QWORD PTR [r10]
        mov	r14, r15
        adcx	r12, rax
        adox	r14, rcx
        ; a[i+1] += m[1] * mu
        mulx	rcx, rax, QWORD PTR [r10+8]
        mov	r15, rdi
        adcx	r14, rax
        adox	r15, rcx
        ; a[i+2] += m[2] * mu
        mulx	rcx, rax, QWORD PTR [r10+16]
        mov	rdi, rsi
        adcx	r15, rax
        adox	rdi, rcx
        ; a[i+3] += m[3] * mu
        mulx	rcx, rax, QWORD PTR [r10+24]
        mov	rsi, QWORD PTR [r9+-24]
        adcx	rdi, rax
        adox	rsi, rcx
        ; a[i+4] += m[4] * mu
        mulx	rcx, rax, QWORD PTR [r10+32]
        mov	r13, QWORD PTR [r9+-16]
        adcx	rsi, rax
        adox	r13, rcx
        ; a[i+5] += m[5] * mu
        mulx	rcx, rax, QWORD PTR [r10+40]
        mov	r12, QWORD PTR [r9+-8]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-16], r13
        ; a[i+6] += m[6] * mu
        mulx	rcx, rax, QWORD PTR [r10+48]
        mov	r13, QWORD PTR [r9]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-8], r12
        ; a[i+7] += m[7] * mu
        mulx	rcx, rax, QWORD PTR [r10+56]
        mov	r12, QWORD PTR [r9+8]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9], r13
        ; a[i+8] += m[8] * mu
        mulx	rcx, rax, QWORD PTR [r10+64]
        mov	r13, QWORD PTR [r9+16]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+8], r12
        ; a[i+9] += m[9] * mu
        mulx	rcx, rax, QWORD PTR [r10+72]
        mov	r12, QWORD PTR [r9+24]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+16], r13
        ; a[i+10] += m[10] * mu
        mulx	rcx, rax, QWORD PTR [r10+80]
        mov	r13, QWORD PTR [r9+32]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+24], r12
        ; a[i+11] += m[11] * mu
        mulx	rcx, rax, QWORD PTR [r10+88]
        mov	r12, QWORD PTR [r9+40]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+32], r13
        ; a[i+12] += m[12] * mu
        mulx	rcx, rax, QWORD PTR [r10+96]
        mov	r13, QWORD PTR [r9+48]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+40], r12
        ; a[i+13] += m[13] * mu
        mulx	rcx, rax, QWORD PTR [r10+104]
        mov	r12, QWORD PTR [r9+56]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+48], r13
        ; a[i+14] += m[14] * mu
        mulx	rcx, rax, QWORD PTR [r10+112]
        mov	r13, QWORD PTR [r9+64]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+56], r12
        ; a[i+15] += m[15] * mu
        mulx	rcx, rax, QWORD PTR [r10+120]
        mov	r12, QWORD PTR [r9+72]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+64], r13
        adcx	r12, rbp
        mov	rbp, rbx
        mov	QWORD PTR [r9+72], r12
        adox	rbp, rbx
        adcx	rbp, rbx
        ; a += 2
        add	r9, 16
        ; i -= 2
        sub	r11, 2
        jnz	L_2048_mont_reduce_avx2_16_loop
        sub	r9, 64
        neg	rbp
        mov	r8, r9
        sub	r9, 128
        mov	rcx, QWORD PTR [r10]
        mov	rdx, r14
        pext	rcx, rcx, rbp
        sub	rdx, rcx
        mov	rcx, QWORD PTR [r10+8]
        mov	rax, r15
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+16]
        mov	rcx, rdi
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+8], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+24]
        mov	rdx, rsi
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+16], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [r8+32]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+24], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+40]
        mov	rcx, QWORD PTR [r8+40]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+32], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+48]
        mov	rdx, QWORD PTR [r8+48]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+40], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+56]
        mov	rax, QWORD PTR [r8+56]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+48], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+64]
        mov	rcx, QWORD PTR [r8+64]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+56], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+72]
        mov	rdx, QWORD PTR [r8+72]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+64], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [r8+80]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+72], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+88]
        mov	rcx, QWORD PTR [r8+88]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+80], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+96]
        mov	rdx, QWORD PTR [r8+96]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+88], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+104]
        mov	rax, QWORD PTR [r8+104]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+96], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+112]
        mov	rcx, QWORD PTR [r8+112]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+104], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+120]
        mov	rdx, QWORD PTR [r8+120]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+112], rcx
        sbb	rdx, rax
        mov	QWORD PTR [r9+120], rdx
        pop	rbp
        pop	rbx
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_2048_mont_reduce_avx2_16 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_2048_get_from_table_avx2_16 PROC
        sub	rsp, 128
        vmovdqu	OWORD PTR [rsp], xmm6
        vmovdqu	OWORD PTR [rsp+16], xmm7
        vmovdqu	OWORD PTR [rsp+32], xmm8
        vmovdqu	OWORD PTR [rsp+48], xmm9
        vmovdqu	OWORD PTR [rsp+64], xmm10
        vmovdqu	OWORD PTR [rsp+80], xmm11
        vmovdqu	OWORD PTR [rsp+96], xmm12
        vmovdqu	OWORD PTR [rsp+112], xmm13
        mov	rax, 1
        movd	xmm10, r8
        movd	xmm11, rax
        vpxor	ymm13, ymm13, ymm13
        vpermd	ymm10, ymm13, ymm10
        vpermd	ymm11, ymm13, ymm11
        ; START: 0-15
        vpxor	ymm13, ymm13, ymm13
        vpxor	ymm4, ymm4, ymm4
        vpxor	ymm5, ymm5, ymm5
        vpxor	ymm6, ymm6, ymm6
        vpxor	ymm7, ymm7, ymm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        vmovdqu	YMMWORD PTR [rcx], ymm4
        vmovdqu	YMMWORD PTR [rcx+32], ymm5
        vmovdqu	YMMWORD PTR [rcx+64], ymm6
        vmovdqu	YMMWORD PTR [rcx+96], ymm7
        ; END: 0-15
        vmovdqu	xmm6, OWORD PTR [rsp]
        vmovdqu	xmm7, OWORD PTR [rsp+16]
        vmovdqu	xmm8, OWORD PTR [rsp+32]
        vmovdqu	xmm9, OWORD PTR [rsp+48]
        vmovdqu	xmm10, OWORD PTR [rsp+64]
        vmovdqu	xmm11, OWORD PTR [rsp+80]
        vmovdqu	xmm12, OWORD PTR [rsp+96]
        vmovdqu	xmm13, OWORD PTR [rsp+112]
        add	rsp, 128
        ret
sp_2048_get_from_table_avx2_16 ENDP
_text ENDS
ENDIF
; /* Conditionally subtract b from a using the mask m.
;  * m is -1 to subtract and 0 when not copying.
;  *
;  * r  A single precision number representing condition subtract result.
;  * a  A single precision number to subtract from.
;  * b  A single precision number to subtract.
;  * m  Mask value to apply.
;  */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_32 PROC
        sub	rsp, 256
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp], r10
        mov	QWORD PTR [rsp+8], r11
        mov	r10, QWORD PTR [r8+16]
        mov	r11, QWORD PTR [r8+24]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+16], r10
        mov	QWORD PTR [rsp+24], r11
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+32], r10
        mov	QWORD PTR [rsp+40], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+48], r10
        mov	QWORD PTR [rsp+56], r11
        mov	r10, QWORD PTR [r8+64]
        mov	r11, QWORD PTR [r8+72]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+64], r10
        mov	QWORD PTR [rsp+72], r11
        mov	r10, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [r8+88]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+80], r10
        mov	QWORD PTR [rsp+88], r11
        mov	r10, QWORD PTR [r8+96]
        mov	r11, QWORD PTR [r8+104]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+96], r10
        mov	QWORD PTR [rsp+104], r11
        mov	r10, QWORD PTR [r8+112]
        mov	r11, QWORD PTR [r8+120]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+112], r10
        mov	QWORD PTR [rsp+120], r11
        mov	r10, QWORD PTR [r8+128]
        mov	r11, QWORD PTR [r8+136]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+128], r10
        mov	QWORD PTR [rsp+136], r11
        mov	r10, QWORD PTR [r8+144]
        mov	r11, QWORD PTR [r8+152]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+144], r10
        mov	QWORD PTR [rsp+152], r11
        mov	r10, QWORD PTR [r8+160]
        mov	r11, QWORD PTR [r8+168]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+160], r10
        mov	QWORD PTR [rsp+168], r11
        mov	r10, QWORD PTR [r8+176]
        mov	r11, QWORD PTR [r8+184]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+176], r10
        mov	QWORD PTR [rsp+184], r11
        mov	r10, QWORD PTR [r8+192]
        mov	r11, QWORD PTR [r8+200]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+192], r10
        mov	QWORD PTR [rsp+200], r11
        mov	r10, QWORD PTR [r8+208]
        mov	r11, QWORD PTR [r8+216]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+208], r10
        mov	QWORD PTR [rsp+216], r11
        mov	r10, QWORD PTR [r8+224]
        mov	r11, QWORD PTR [r8+232]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+224], r10
        mov	QWORD PTR [rsp+232], r11
        mov	r10, QWORD PTR [r8+240]
        mov	r11, QWORD PTR [r8+248]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+240], r10
        mov	QWORD PTR [rsp+248], r11
        mov	r10, QWORD PTR [rdx]
        mov	r8, QWORD PTR [rsp]
        sub	r10, r8
        mov	r11, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [rsp+8]
        sbb	r11, r8
        mov	QWORD PTR [rcx], r10
        mov	r10, QWORD PTR [rdx+16]
        mov	r8, QWORD PTR [rsp+16]
        sbb	r10, r8
        mov	QWORD PTR [rcx+8], r11
        mov	r11, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rsp+24]
        sbb	r11, r8
        mov	QWORD PTR [rcx+16], r10
        mov	r10, QWORD PTR [rdx+32]
        mov	r8, QWORD PTR [rsp+32]
        sbb	r10, r8
        mov	QWORD PTR [rcx+24], r11
        mov	r11, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [rsp+40]
        sbb	r11, r8
        mov	QWORD PTR [rcx+32], r10
        mov	r10, QWORD PTR [rdx+48]
        mov	r8, QWORD PTR [rsp+48]
        sbb	r10, r8
        mov	QWORD PTR [rcx+40], r11
        mov	r11, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rsp+56]
        sbb	r11, r8
        mov	QWORD PTR [rcx+48], r10
        mov	r10, QWORD PTR [rdx+64]
        mov	r8, QWORD PTR [rsp+64]
        sbb	r10, r8
        mov	QWORD PTR [rcx+56], r11
        mov	r11, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [rsp+72]
        sbb	r11, r8
        mov	QWORD PTR [rcx+64], r10
        mov	r10, QWORD PTR [rdx+80]
        mov	r8, QWORD PTR [rsp+80]
        sbb	r10, r8
        mov	QWORD PTR [rcx+72], r11
        mov	r11, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rsp+88]
        sbb	r11, r8
        mov	QWORD PTR [rcx+80], r10
        mov	r10, QWORD PTR [rdx+96]
        mov	r8, QWORD PTR [rsp+96]
        sbb	r10, r8
        mov	QWORD PTR [rcx+88], r11
        mov	r11, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [rsp+104]
        sbb	r11, r8
        mov	QWORD PTR [rcx+96], r10
        mov	r10, QWORD PTR [rdx+112]
        mov	r8, QWORD PTR [rsp+112]
        sbb	r10, r8
        mov	QWORD PTR [rcx+104], r11
        mov	r11, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [rsp+120]
        sbb	r11, r8
        mov	QWORD PTR [rcx+112], r10
        mov	r10, QWORD PTR [rdx+128]
        mov	r8, QWORD PTR [rsp+128]
        sbb	r10, r8
        mov	QWORD PTR [rcx+120], r11
        mov	r11, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [rsp+136]
        sbb	r11, r8
        mov	QWORD PTR [rcx+128], r10
        mov	r10, QWORD PTR [rdx+144]
        mov	r8, QWORD PTR [rsp+144]
        sbb	r10, r8
        mov	QWORD PTR [rcx+136], r11
        mov	r11, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [rsp+152]
        sbb	r11, r8
        mov	QWORD PTR [rcx+144], r10
        mov	r10, QWORD PTR [rdx+160]
        mov	r8, QWORD PTR [rsp+160]
        sbb	r10, r8
        mov	QWORD PTR [rcx+152], r11
        mov	r11, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [rsp+168]
        sbb	r11, r8
        mov	QWORD PTR [rcx+160], r10
        mov	r10, QWORD PTR [rdx+176]
        mov	r8, QWORD PTR [rsp+176]
        sbb	r10, r8
        mov	QWORD PTR [rcx+168], r11
        mov	r11, QWORD PTR [rdx+184]
        mov	r8, QWORD PTR [rsp+184]
        sbb	r11, r8
        mov	QWORD PTR [rcx+176], r10
        mov	r10, QWORD PTR [rdx+192]
        mov	r8, QWORD PTR [rsp+192]
        sbb	r10, r8
        mov	QWORD PTR [rcx+184], r11
        mov	r11, QWORD PTR [rdx+200]
        mov	r8, QWORD PTR [rsp+200]
        sbb	r11, r8
        mov	QWORD PTR [rcx+192], r10
        mov	r10, QWORD PTR [rdx+208]
        mov	r8, QWORD PTR [rsp+208]
        sbb	r10, r8
        mov	QWORD PTR [rcx+200], r11
        mov	r11, QWORD PTR [rdx+216]
        mov	r8, QWORD PTR [rsp+216]
        sbb	r11, r8
        mov	QWORD PTR [rcx+208], r10
        mov	r10, QWORD PTR [rdx+224]
        mov	r8, QWORD PTR [rsp+224]
        sbb	r10, r8
        mov	QWORD PTR [rcx+216], r11
        mov	r11, QWORD PTR [rdx+232]
        mov	r8, QWORD PTR [rsp+232]
        sbb	r11, r8
        mov	QWORD PTR [rcx+224], r10
        mov	r10, QWORD PTR [rdx+240]
        mov	r8, QWORD PTR [rsp+240]
        sbb	r10, r8
        mov	QWORD PTR [rcx+232], r11
        mov	r11, QWORD PTR [rdx+248]
        mov	r8, QWORD PTR [rsp+248]
        sbb	r11, r8
        mov	QWORD PTR [rcx+240], r10
        mov	QWORD PTR [rcx+248], r11
        sbb	rax, rax
        add	rsp, 256
        ret
sp_2048_cond_sub_32 ENDP
_text ENDS
; /* Reduce the number back to 2048 bits using Montgomery reduction.
;  *
;  * a   A single precision number to reduce in place.
;  * m   The single precision number representing the modulus.
;  * mp  The digit representing the negative inverse of m mod 2^n.
;  */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_32 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        mov	r9, rdx
        xor	rsi, rsi
        ; i = 32
        mov	r10, 32
        mov	r15, QWORD PTR [rcx]
        mov	rdi, QWORD PTR [rcx+8]
L_2048_mont_reduce_32_loop:
        ; mu = a[i] * mp
        mov	r13, r15
        imul	r13, r8
        ; a[i+0] += m[0] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9]
        add	r15, rax
        adc	r12, rdx
        ; a[i+1] += m[1] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+8]
        mov	r15, rdi
        add	r15, rax
        adc	r11, rdx
        add	r15, r12
        adc	r11, 0
        ; a[i+2] += m[2] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+16]
        mov	rdi, QWORD PTR [rcx+16]
        add	rdi, rax
        adc	r12, rdx
        add	rdi, r11
        adc	r12, 0
        ; a[i+3] += m[3] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+24]
        mov	r14, QWORD PTR [rcx+24]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+24], r14
        adc	r11, 0
        ; a[i+4] += m[4] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+32]
        mov	r14, QWORD PTR [rcx+32]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+32], r14
        adc	r12, 0
        ; a[i+5] += m[5] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+40]
        mov	r14, QWORD PTR [rcx+40]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+40], r14
        adc	r11, 0
        ; a[i+6] += m[6] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+48]
        mov	r14, QWORD PTR [rcx+48]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+48], r14
        adc	r12, 0
        ; a[i+7] += m[7] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+56]
        mov	r14, QWORD PTR [rcx+56]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+56], r14
        adc	r11, 0
        ; a[i+8] += m[8] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+64]
        mov	r14, QWORD PTR [rcx+64]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+64], r14
        adc	r12, 0
        ; a[i+9] += m[9] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+72]
        mov	r14, QWORD PTR [rcx+72]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+72], r14
        adc	r11, 0
        ; a[i+10] += m[10] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+80]
        mov	r14, QWORD PTR [rcx+80]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+80], r14
        adc	r12, 0
        ; a[i+11] += m[11] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+88]
        mov	r14, QWORD PTR [rcx+88]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+88], r14
        adc	r11, 0
        ; a[i+12] += m[12] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+96]
        mov	r14, QWORD PTR [rcx+96]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+96], r14
        adc	r12, 0
        ; a[i+13] += m[13] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+104]
        mov	r14, QWORD PTR [rcx+104]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+104], r14
        adc	r11, 0
        ; a[i+14] += m[14] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+112]
        mov	r14, QWORD PTR [rcx+112]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+112], r14
        adc	r12, 0
        ; a[i+15] += m[15] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+120]
        mov	r14, QWORD PTR [rcx+120]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+120], r14
        adc	r11, 0
        ; a[i+16] += m[16] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+128]
        mov	r14, QWORD PTR [rcx+128]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+128], r14
        adc	r12, 0
        ; a[i+17] += m[17] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+136]
        mov	r14, QWORD PTR [rcx+136]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+136], r14
        adc	r11, 0
        ; a[i+18] += m[18] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+144]
        mov	r14, QWORD PTR [rcx+144]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+144], r14
        adc	r12, 0
        ; a[i+19] += m[19] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+152]
        mov	r14, QWORD PTR [rcx+152]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+152], r14
        adc	r11, 0
        ; a[i+20] += m[20] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+160]
        mov	r14, QWORD PTR [rcx+160]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+160], r14
        adc	r12, 0
        ; a[i+21] += m[21] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+168]
        mov	r14, QWORD PTR [rcx+168]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+168], r14
        adc	r11, 0
        ; a[i+22] += m[22] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+176]
        mov	r14, QWORD PTR [rcx+176]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+176], r14
        adc	r12, 0
        ; a[i+23] += m[23] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+184]
        mov	r14, QWORD PTR [rcx+184]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+184], r14
        adc	r11, 0
        ; a[i+24] += m[24] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+192]
        mov	r14, QWORD PTR [rcx+192]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+192], r14
        adc	r12, 0
        ; a[i+25] += m[25] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+200]
        mov	r14, QWORD PTR [rcx+200]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+200], r14
        adc	r11, 0
        ; a[i+26] += m[26] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+208]
        mov	r14, QWORD PTR [rcx+208]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+208], r14
        adc	r12, 0
        ; a[i+27] += m[27] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+216]
        mov	r14, QWORD PTR [rcx+216]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+216], r14
        adc	r11, 0
        ; a[i+28] += m[28] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+224]
        mov	r14, QWORD PTR [rcx+224]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+224], r14
        adc	r12, 0
        ; a[i+29] += m[29] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+232]
        mov	r14, QWORD PTR [rcx+232]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+232], r14
        adc	r11, 0
        ; a[i+30] += m[30] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+240]
        mov	r14, QWORD PTR [rcx+240]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+240], r14
        adc	r12, 0
        ; a[i+31] += m[31] * mu
        mov	rax, r13
        mul	QWORD PTR [r9+248]
        mov	r14, QWORD PTR [rcx+248]
        add	r12, rax
        adc	rdx, rsi
        mov	rsi, 0
        adc	rsi, 0
        add	r14, r12
        mov	QWORD PTR [rcx+248], r14
        adc	QWORD PTR [rcx+256], rdx
        adc	rsi, 0
        ; i -= 1
        add	rcx, 8
        dec	r10
        jnz	L_2048_mont_reduce_32_loop
        mov	QWORD PTR [rcx], r15
        mov	QWORD PTR [rcx+8], rdi
        neg	rsi
IFDEF _WIN64
        mov	r8, r9
        mov	r9, rsi
ELSE
        mov	r9, rsi
        mov	r8, r9
ENDIF
        mov	rdx, rcx
        mov	rcx, rcx
        sub	rcx, 256
        call	sp_2048_cond_sub_32
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_2048_mont_reduce_32 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_2048_sub_32 PROC
        mov	r9, QWORD PTR [rdx]
        sub	r9, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx+8]
        mov	QWORD PTR [rcx], r9
        sbb	r10, QWORD PTR [r8+8]
        mov	r9, QWORD PTR [rdx+16]
        mov	QWORD PTR [rcx+8], r10
        sbb	r9, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [rdx+24]
        mov	QWORD PTR [rcx+16], r9
        sbb	r10, QWORD PTR [r8+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [rcx+24], r10
        sbb	r9, QWORD PTR [r8+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [rcx+32], r9
        sbb	r10, QWORD PTR [r8+40]
        mov	r9, QWORD PTR [rdx+48]
        mov	QWORD PTR [rcx+40], r10
        sbb	r9, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+56]
        mov	QWORD PTR [rcx+48], r9
        sbb	r10, QWORD PTR [r8+56]
        mov	r9, QWORD PTR [rdx+64]
        mov	QWORD PTR [rcx+56], r10
        sbb	r9, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [rdx+72]
        mov	QWORD PTR [rcx+64], r9
        sbb	r10, QWORD PTR [r8+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [rcx+72], r10
        sbb	r9, QWORD PTR [r8+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [rcx+80], r9
        sbb	r10, QWORD PTR [r8+88]
        mov	r9, QWORD PTR [rdx+96]
        mov	QWORD PTR [rcx+88], r10
        sbb	r9, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+104]
        mov	QWORD PTR [rcx+96], r9
        sbb	r10, QWORD PTR [r8+104]
        mov	r9, QWORD PTR [rdx+112]
        mov	QWORD PTR [rcx+104], r10
        sbb	r9, QWORD PTR [r8+112]
        mov	r10, QWORD PTR [rdx+120]
        mov	QWORD PTR [rcx+112], r9
        sbb	r10, QWORD PTR [r8+120]
        mov	r9, QWORD PTR [rdx+128]
        mov	QWORD PTR [rcx+120], r10
        sbb	r9, QWORD PTR [r8+128]
        mov	r10, QWORD PTR [rdx+136]
        mov	QWORD PTR [rcx+128], r9
        sbb	r10, QWORD PTR [r8+136]
        mov	r9, QWORD PTR [rdx+144]
        mov	QWORD PTR [rcx+136], r10
        sbb	r9, QWORD PTR [r8+144]
        mov	r10, QWORD PTR [rdx+152]
        mov	QWORD PTR [rcx+144], r9
        sbb	r10, QWORD PTR [r8+152]
        mov	r9, QWORD PTR [rdx+160]
        mov	QWORD PTR [rcx+152], r10
        sbb	r9, QWORD PTR [r8+160]
        mov	r10, QWORD PTR [rdx+168]
        mov	QWORD PTR [rcx+160], r9
        sbb	r10, QWORD PTR [r8+168]
        mov	r9, QWORD PTR [rdx+176]
        mov	QWORD PTR [rcx+168], r10
        sbb	r9, QWORD PTR [r8+176]
        mov	r10, QWORD PTR [rdx+184]
        mov	QWORD PTR [rcx+176], r9
        sbb	r10, QWORD PTR [r8+184]
        mov	r9, QWORD PTR [rdx+192]
        mov	QWORD PTR [rcx+184], r10
        sbb	r9, QWORD PTR [r8+192]
        mov	r10, QWORD PTR [rdx+200]
        mov	QWORD PTR [rcx+192], r9
        sbb	r10, QWORD PTR [r8+200]
        mov	r9, QWORD PTR [rdx+208]
        mov	QWORD PTR [rcx+200], r10
        sbb	r9, QWORD PTR [r8+208]
        mov	r10, QWORD PTR [rdx+216]
        mov	QWORD PTR [rcx+208], r9
        sbb	r10, QWORD PTR [r8+216]
        mov	r9, QWORD PTR [rdx+224]
        mov	QWORD PTR [rcx+216], r10
        sbb	r9, QWORD PTR [r8+224]
        mov	r10, QWORD PTR [rdx+232]
        mov	QWORD PTR [rcx+224], r9
        sbb	r10, QWORD PTR [r8+232]
        mov	r9, QWORD PTR [rdx+240]
        mov	QWORD PTR [rcx+232], r10
        sbb	r9, QWORD PTR [r8+240]
        mov	r10, QWORD PTR [rdx+248]
        mov	QWORD PTR [rcx+240], r9
        sbb	r10, QWORD PTR [r8+248]
        mov	QWORD PTR [rcx+248], r10
        sbb	rax, rax
        ret
sp_2048_sub_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision digit.
;  */
_text SEGMENT READONLY PARA
sp_2048_mul_d_avx2_32 PROC
        push	r12
        push	r13
        mov	rax, rdx
        ; A[0] * B
        mov	rdx, r8
        xor	r13, r13
        mulx	r12, r11, QWORD PTR [rax]
        mov	QWORD PTR [rcx], r11
        ; A[1] * B
        mulx	r10, r9, QWORD PTR [rax+8]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+8], r12
        ; A[2] * B
        mulx	r10, r9, QWORD PTR [rax+16]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+16], r11
        ; A[3] * B
        mulx	r10, r9, QWORD PTR [rax+24]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+24], r12
        ; A[4] * B
        mulx	r10, r9, QWORD PTR [rax+32]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+32], r11
        ; A[5] * B
        mulx	r10, r9, QWORD PTR [rax+40]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+40], r12
        ; A[6] * B
        mulx	r10, r9, QWORD PTR [rax+48]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+48], r11
        ; A[7] * B
        mulx	r10, r9, QWORD PTR [rax+56]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+56], r12
        ; A[8] * B
        mulx	r10, r9, QWORD PTR [rax+64]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+64], r11
        ; A[9] * B
        mulx	r10, r9, QWORD PTR [rax+72]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+72], r12
        ; A[10] * B
        mulx	r10, r9, QWORD PTR [rax+80]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+80], r11
        ; A[11] * B
        mulx	r10, r9, QWORD PTR [rax+88]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+88], r12
        ; A[12] * B
        mulx	r10, r9, QWORD PTR [rax+96]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+96], r11
        ; A[13] * B
        mulx	r10, r9, QWORD PTR [rax+104]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+104], r12
        ; A[14] * B
        mulx	r10, r9, QWORD PTR [rax+112]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+112], r11
        ; A[15] * B
        mulx	r10, r9, QWORD PTR [rax+120]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+120], r12
        ; A[16] * B
        mulx	r10, r9, QWORD PTR [rax+128]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+128], r11
        ; A[17] * B
        mulx	r10, r9, QWORD PTR [rax+136]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+136], r12
        ; A[18] * B
        mulx	r10, r9, QWORD PTR [rax+144]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+144], r11
        ; A[19] * B
        mulx	r10, r9, QWORD PTR [rax+152]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+152], r12
        ; A[20] * B
        mulx	r10, r9, QWORD PTR [rax+160]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+160], r11
        ; A[21] * B
        mulx	r10, r9, QWORD PTR [rax+168]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+168], r12
        ; A[22] * B
        mulx	r10, r9, QWORD PTR [rax+176]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+176], r11
        ; A[23] * B
        mulx	r10, r9, QWORD PTR [rax+184]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+184], r12
        ; A[24] * B
        mulx	r10, r9, QWORD PTR [rax+192]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+192], r11
        ; A[25] * B
        mulx	r10, r9, QWORD PTR [rax+200]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+200], r12
        ; A[26] * B
        mulx	r10, r9, QWORD PTR [rax+208]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+208], r11
        ; A[27] * B
        mulx	r10, r9, QWORD PTR [rax+216]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+216], r12
        ; A[28] * B
        mulx	r10, r9, QWORD PTR [rax+224]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+224], r11
        ; A[29] * B
        mulx	r10, r9, QWORD PTR [rax+232]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+232], r12
        ; A[30] * B
        mulx	r10, r9, QWORD PTR [rax+240]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+240], r11
        ; A[31] * B
        mulx	r10, r9, QWORD PTR [rax+248]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        adcx	r11, r13
        mov	QWORD PTR [rcx+248], r12
        mov	QWORD PTR [rcx+256], r11
        pop	r13
        pop	r12
        ret
sp_2048_mul_d_avx2_32 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
;  *
;  * d1   The high order half of the number to divide.
;  * d0   The low order half of the number to divide.
;  * div  The dividend.
;  * returns the result of the division.
;  */
_text SEGMENT READONLY PARA
div_2048_word_asm_32 PROC
        mov	r9, rdx
        mov	rax, r9
        mov	rdx, rcx
        div	r8
        ret
div_2048_word_asm_32 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
;  * m is -1 to subtract and 0 when not copying.
;  *
;  * r  A single precision number representing condition subtract result.
;  * a  A single precision number to subtract from.
;  * b  A single precision number to subtract.
;  * m  Mask value to apply.
;  */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_avx2_32 PROC
        push	r12
        mov	r12, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx]
        pext	r12, r12, r9
        sub	r10, r12
        mov	r12, QWORD PTR [r8+8]
        mov	r11, QWORD PTR [rdx+8]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+16]
        mov	r12, QWORD PTR [rdx+16]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+8], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [rdx+24]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+16], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [rdx+32]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+24], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [rdx+40]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+32], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+48]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+40], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+56]
        mov	r11, QWORD PTR [rdx+56]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+48], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+64]
        mov	r12, QWORD PTR [rdx+64]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+56], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+72]
        mov	r10, QWORD PTR [rdx+72]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+64], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [rdx+80]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+72], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+88]
        mov	r12, QWORD PTR [rdx+88]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+80], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+96]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+88], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+104]
        mov	r11, QWORD PTR [rdx+104]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+96], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+112]
        mov	r12, QWORD PTR [rdx+112]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+104], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+120]
        mov	r10, QWORD PTR [rdx+120]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+112], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+128]
        mov	r11, QWORD PTR [rdx+128]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+120], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+136]
        mov	r12, QWORD PTR [rdx+136]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+128], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+144]
        mov	r10, QWORD PTR [rdx+144]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+136], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+152]
        mov	r11, QWORD PTR [rdx+152]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+144], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+160]
        mov	r12, QWORD PTR [rdx+160]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+152], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+168]
        mov	r10, QWORD PTR [rdx+168]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+160], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+176]
        mov	r11, QWORD PTR [rdx+176]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+168], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+184]
        mov	r12, QWORD PTR [rdx+184]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+176], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+192]
        mov	r10, QWORD PTR [rdx+192]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+184], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+200]
        mov	r11, QWORD PTR [rdx+200]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+192], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+208]
        mov	r12, QWORD PTR [rdx+208]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+200], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+216]
        mov	r10, QWORD PTR [rdx+216]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+208], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+224]
        mov	r11, QWORD PTR [rdx+224]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+216], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+232]
        mov	r12, QWORD PTR [rdx+232]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+224], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+240]
        mov	r10, QWORD PTR [rdx+240]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+232], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+248]
        mov	r11, QWORD PTR [rdx+248]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+240], r10
        sbb	r11, r12
        mov	QWORD PTR [rcx+248], r11
        sbb	rax, rax
        pop	r12
        ret
sp_2048_cond_sub_avx2_32 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
;  *
;  * a  A single precision integer.
;  * b  A single precision integer.
;  * return -ve, 0 or +ve if a is less than, equal to or greater than b
;  * respectively.
;  */
_text SEGMENT READONLY PARA
sp_2048_cmp_32 PROC
        push	r12
        xor	r9, r9
        mov	r8, -1
        mov	rax, -1
        mov	r10, 1
        mov	r11, QWORD PTR [rcx+248]
        mov	r12, QWORD PTR [rdx+248]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+240]
        mov	r12, QWORD PTR [rdx+240]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+232]
        mov	r12, QWORD PTR [rdx+232]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+224]
        mov	r12, QWORD PTR [rdx+224]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+216]
        mov	r12, QWORD PTR [rdx+216]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+208]
        mov	r12, QWORD PTR [rdx+208]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+200]
        mov	r12, QWORD PTR [rdx+200]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+192]
        mov	r12, QWORD PTR [rdx+192]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+184]
        mov	r12, QWORD PTR [rdx+184]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+176]
        mov	r12, QWORD PTR [rdx+176]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+168]
        mov	r12, QWORD PTR [rdx+168]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+160]
        mov	r12, QWORD PTR [rdx+160]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+152]
        mov	r12, QWORD PTR [rdx+152]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+144]
        mov	r12, QWORD PTR [rdx+144]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+136]
        mov	r12, QWORD PTR [rdx+136]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+128]
        mov	r12, QWORD PTR [rdx+128]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+120]
        mov	r12, QWORD PTR [rdx+120]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+112]
        mov	r12, QWORD PTR [rdx+112]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+104]
        mov	r12, QWORD PTR [rdx+104]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+96]
        mov	r12, QWORD PTR [rdx+96]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+88]
        mov	r12, QWORD PTR [rdx+88]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+80]
        mov	r12, QWORD PTR [rdx+80]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+72]
        mov	r12, QWORD PTR [rdx+72]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+64]
        mov	r12, QWORD PTR [rdx+64]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+56]
        mov	r12, QWORD PTR [rdx+56]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+48]
        mov	r12, QWORD PTR [rdx+48]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+40]
        mov	r12, QWORD PTR [rdx+40]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+32]
        mov	r12, QWORD PTR [rdx+32]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+24]
        mov	r12, QWORD PTR [rdx+24]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+16]
        mov	r12, QWORD PTR [rdx+16]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+8]
        mov	r12, QWORD PTR [rdx+8]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx]
        mov	r12, QWORD PTR [rdx]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        xor	rax, r8
        pop	r12
        ret
sp_2048_cmp_32 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_2048_get_from_table_32 PROC
        sub	rsp, 128
        vmovdqu	OWORD PTR [rsp], xmm6
        vmovdqu	OWORD PTR [rsp+16], xmm7
        vmovdqu	OWORD PTR [rsp+32], xmm8
        vmovdqu	OWORD PTR [rsp+48], xmm9
        vmovdqu	OWORD PTR [rsp+64], xmm10
        vmovdqu	OWORD PTR [rsp+80], xmm11
        vmovdqu	OWORD PTR [rsp+96], xmm12
        vmovdqu	OWORD PTR [rsp+112], xmm13
        mov	rax, 1
        movd	xmm10, r8
        movd	xmm11, rax
        pxor	xmm13, xmm13
        pshufd	xmm11, xmm11, 0
        pshufd	xmm10, xmm10, 0
        ; START: 0-7
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 32
        mov	r9, QWORD PTR [rdx+256]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 33
        mov	r9, QWORD PTR [rdx+264]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 34
        mov	r9, QWORD PTR [rdx+272]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 35
        mov	r9, QWORD PTR [rdx+280]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 36
        mov	r9, QWORD PTR [rdx+288]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 37
        mov	r9, QWORD PTR [rdx+296]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 38
        mov	r9, QWORD PTR [rdx+304]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 39
        mov	r9, QWORD PTR [rdx+312]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 40
        mov	r9, QWORD PTR [rdx+320]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 41
        mov	r9, QWORD PTR [rdx+328]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 42
        mov	r9, QWORD PTR [rdx+336]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 43
        mov	r9, QWORD PTR [rdx+344]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 44
        mov	r9, QWORD PTR [rdx+352]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 45
        mov	r9, QWORD PTR [rdx+360]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 46
        mov	r9, QWORD PTR [rdx+368]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 47
        mov	r9, QWORD PTR [rdx+376]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 48
        mov	r9, QWORD PTR [rdx+384]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 49
        mov	r9, QWORD PTR [rdx+392]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 50
        mov	r9, QWORD PTR [rdx+400]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 51
        mov	r9, QWORD PTR [rdx+408]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 52
        mov	r9, QWORD PTR [rdx+416]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 53
        mov	r9, QWORD PTR [rdx+424]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 54
        mov	r9, QWORD PTR [rdx+432]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 55
        mov	r9, QWORD PTR [rdx+440]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 56
        mov	r9, QWORD PTR [rdx+448]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 57
        mov	r9, QWORD PTR [rdx+456]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 58
        mov	r9, QWORD PTR [rdx+464]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 59
        mov	r9, QWORD PTR [rdx+472]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 60
        mov	r9, QWORD PTR [rdx+480]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 61
        mov	r9, QWORD PTR [rdx+488]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 62
        mov	r9, QWORD PTR [rdx+496]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 63
        mov	r9, QWORD PTR [rdx+504]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        add	rcx, 64
        ; END: 0-7
        ; START: 8-15
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 32
        mov	r9, QWORD PTR [rdx+256]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 33
        mov	r9, QWORD PTR [rdx+264]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 34
        mov	r9, QWORD PTR [rdx+272]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 35
        mov	r9, QWORD PTR [rdx+280]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 36
        mov	r9, QWORD PTR [rdx+288]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 37
        mov	r9, QWORD PTR [rdx+296]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 38
        mov	r9, QWORD PTR [rdx+304]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 39
        mov	r9, QWORD PTR [rdx+312]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 40
        mov	r9, QWORD PTR [rdx+320]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 41
        mov	r9, QWORD PTR [rdx+328]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 42
        mov	r9, QWORD PTR [rdx+336]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 43
        mov	r9, QWORD PTR [rdx+344]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 44
        mov	r9, QWORD PTR [rdx+352]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 45
        mov	r9, QWORD PTR [rdx+360]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 46
        mov	r9, QWORD PTR [rdx+368]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 47
        mov	r9, QWORD PTR [rdx+376]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 48
        mov	r9, QWORD PTR [rdx+384]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 49
        mov	r9, QWORD PTR [rdx+392]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 50
        mov	r9, QWORD PTR [rdx+400]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 51
        mov	r9, QWORD PTR [rdx+408]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 52
        mov	r9, QWORD PTR [rdx+416]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 53
        mov	r9, QWORD PTR [rdx+424]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 54
        mov	r9, QWORD PTR [rdx+432]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 55
        mov	r9, QWORD PTR [rdx+440]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 56
        mov	r9, QWORD PTR [rdx+448]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 57
        mov	r9, QWORD PTR [rdx+456]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 58
        mov	r9, QWORD PTR [rdx+464]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 59
        mov	r9, QWORD PTR [rdx+472]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 60
        mov	r9, QWORD PTR [rdx+480]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 61
        mov	r9, QWORD PTR [rdx+488]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 62
        mov	r9, QWORD PTR [rdx+496]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 63
        mov	r9, QWORD PTR [rdx+504]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        add	rcx, 64
        ; END: 8-15
        ; START: 16-23
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 32
        mov	r9, QWORD PTR [rdx+256]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 33
        mov	r9, QWORD PTR [rdx+264]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 34
        mov	r9, QWORD PTR [rdx+272]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 35
        mov	r9, QWORD PTR [rdx+280]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 36
        mov	r9, QWORD PTR [rdx+288]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 37
        mov	r9, QWORD PTR [rdx+296]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 38
        mov	r9, QWORD PTR [rdx+304]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 39
        mov	r9, QWORD PTR [rdx+312]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 40
        mov	r9, QWORD PTR [rdx+320]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 41
        mov	r9, QWORD PTR [rdx+328]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 42
        mov	r9, QWORD PTR [rdx+336]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 43
        mov	r9, QWORD PTR [rdx+344]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 44
        mov	r9, QWORD PTR [rdx+352]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 45
        mov	r9, QWORD PTR [rdx+360]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 46
        mov	r9, QWORD PTR [rdx+368]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 47
        mov	r9, QWORD PTR [rdx+376]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 48
        mov	r9, QWORD PTR [rdx+384]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 49
        mov	r9, QWORD PTR [rdx+392]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 50
        mov	r9, QWORD PTR [rdx+400]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 51
        mov	r9, QWORD PTR [rdx+408]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 52
        mov	r9, QWORD PTR [rdx+416]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 53
        mov	r9, QWORD PTR [rdx+424]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 54
        mov	r9, QWORD PTR [rdx+432]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 55
        mov	r9, QWORD PTR [rdx+440]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 56
        mov	r9, QWORD PTR [rdx+448]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 57
        mov	r9, QWORD PTR [rdx+456]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 58
        mov	r9, QWORD PTR [rdx+464]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 59
        mov	r9, QWORD PTR [rdx+472]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 60
        mov	r9, QWORD PTR [rdx+480]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 61
        mov	r9, QWORD PTR [rdx+488]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 62
        mov	r9, QWORD PTR [rdx+496]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 63
        mov	r9, QWORD PTR [rdx+504]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        add	rcx, 64
        ; END: 16-23
        ; START: 24-31
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 32
        mov	r9, QWORD PTR [rdx+256]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 33
        mov	r9, QWORD PTR [rdx+264]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 34
        mov	r9, QWORD PTR [rdx+272]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 35
        mov	r9, QWORD PTR [rdx+280]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 36
        mov	r9, QWORD PTR [rdx+288]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 37
        mov	r9, QWORD PTR [rdx+296]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 38
        mov	r9, QWORD PTR [rdx+304]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 39
        mov	r9, QWORD PTR [rdx+312]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 40
        mov	r9, QWORD PTR [rdx+320]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 41
        mov	r9, QWORD PTR [rdx+328]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 42
        mov	r9, QWORD PTR [rdx+336]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 43
        mov	r9, QWORD PTR [rdx+344]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 44
        mov	r9, QWORD PTR [rdx+352]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 45
        mov	r9, QWORD PTR [rdx+360]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 46
        mov	r9, QWORD PTR [rdx+368]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 47
        mov	r9, QWORD PTR [rdx+376]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 48
        mov	r9, QWORD PTR [rdx+384]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 49
        mov	r9, QWORD PTR [rdx+392]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 50
        mov	r9, QWORD PTR [rdx+400]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 51
        mov	r9, QWORD PTR [rdx+408]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 52
        mov	r9, QWORD PTR [rdx+416]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 53
        mov	r9, QWORD PTR [rdx+424]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 54
        mov	r9, QWORD PTR [rdx+432]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 55
        mov	r9, QWORD PTR [rdx+440]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 56
        mov	r9, QWORD PTR [rdx+448]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 57
        mov	r9, QWORD PTR [rdx+456]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 58
        mov	r9, QWORD PTR [rdx+464]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 59
        mov	r9, QWORD PTR [rdx+472]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 60
        mov	r9, QWORD PTR [rdx+480]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 61
        mov	r9, QWORD PTR [rdx+488]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 62
        mov	r9, QWORD PTR [rdx+496]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 63
        mov	r9, QWORD PTR [rdx+504]
        add	r9, 192
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        ; END: 24-31
        vmovdqu	xmm6, OWORD PTR [rsp]
        vmovdqu	xmm7, OWORD PTR [rsp+16]
        vmovdqu	xmm8, OWORD PTR [rsp+32]
        vmovdqu	xmm9, OWORD PTR [rsp+48]
        vmovdqu	xmm10, OWORD PTR [rsp+64]
        vmovdqu	xmm11, OWORD PTR [rsp+80]
        vmovdqu	xmm12, OWORD PTR [rsp+96]
        vmovdqu	xmm13, OWORD PTR [rsp+112]
        add	rsp, 128
        ret
sp_2048_get_from_table_32 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 2048 bits using Montgomery reduction.
;  *
;  * a   A single precision number to reduce in place.
;  * m   The single precision number representing the modulus.
;  * mp  The digit representing the negative inverse of m mod 2^n.
;  */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_avx2_32 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        push	rbx
        push	rbp
        mov	r9, rcx
        mov	r10, rdx
        xor	rbp, rbp
        ; i = 32
        mov	r11, 32
        mov	r14, QWORD PTR [r9]
        mov	r15, QWORD PTR [r9+8]
        mov	rdi, QWORD PTR [r9+16]
        mov	rsi, QWORD PTR [r9+24]
        add	r9, 128
        xor	rbp, rbp
L_2048_mont_reduce_avx2_32_loop:
        ; mu = a[i] * mp
        mov	rdx, r14
        mov	r12, r14
        imul	rdx, r8
        xor	rbx, rbx
        ; a[i+0] += m[0] * mu
        mulx	rcx, rax, QWORD PTR [r10]
        mov	r14, r15
        adcx	r12, rax
        adox	r14, rcx
        ; a[i+1] += m[1] * mu
        mulx	rcx, rax, QWORD PTR [r10+8]
        mov	r15, rdi
        adcx	r14, rax
        adox	r15, rcx
        ; a[i+2] += m[2] * mu
        mulx	rcx, rax, QWORD PTR [r10+16]
        mov	rdi, rsi
        adcx	r15, rax
        adox	rdi, rcx
        ; a[i+3] += m[3] * mu
        mulx	rcx, rax, QWORD PTR [r10+24]
        mov	rsi, QWORD PTR [r9+-96]
        adcx	rdi, rax
        adox	rsi, rcx
        ; a[i+4] += m[4] * mu
        mulx	rcx, rax, QWORD PTR [r10+32]
        mov	r13, QWORD PTR [r9+-88]
        adcx	rsi, rax
        adox	r13, rcx
        ; a[i+5] += m[5] * mu
        mulx	rcx, rax, QWORD PTR [r10+40]
        mov	r12, QWORD PTR [r9+-80]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-88], r13
        ; a[i+6] += m[6] * mu
        mulx	rcx, rax, QWORD PTR [r10+48]
        mov	r13, QWORD PTR [r9+-72]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-80], r12
        ; a[i+7] += m[7] * mu
        mulx	rcx, rax, QWORD PTR [r10+56]
        mov	r12, QWORD PTR [r9+-64]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-72], r13
        ; a[i+8] += m[8] * mu
        mulx	rcx, rax, QWORD PTR [r10+64]
        mov	r13, QWORD PTR [r9+-56]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-64], r12
        ; a[i+9] += m[9] * mu
        mulx	rcx, rax, QWORD PTR [r10+72]
        mov	r12, QWORD PTR [r9+-48]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-56], r13
        ; a[i+10] += m[10] * mu
        mulx	rcx, rax, QWORD PTR [r10+80]
        mov	r13, QWORD PTR [r9+-40]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-48], r12
        ; a[i+11] += m[11] * mu
        mulx	rcx, rax, QWORD PTR [r10+88]
        mov	r12, QWORD PTR [r9+-32]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-40], r13
        ; a[i+12] += m[12] * mu
        mulx	rcx, rax, QWORD PTR [r10+96]
        mov	r13, QWORD PTR [r9+-24]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-32], r12
        ; a[i+13] += m[13] * mu
        mulx	rcx, rax, QWORD PTR [r10+104]
        mov	r12, QWORD PTR [r9+-16]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-24], r13
        ; a[i+14] += m[14] * mu
        mulx	rcx, rax, QWORD PTR [r10+112]
        mov	r13, QWORD PTR [r9+-8]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-16], r12
        ; a[i+15] += m[15] * mu
        mulx	rcx, rax, QWORD PTR [r10+120]
        mov	r12, QWORD PTR [r9]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-8], r13
        ; a[i+16] += m[16] * mu
        mulx	rcx, rax, QWORD PTR [r10+128]
        mov	r13, QWORD PTR [r9+8]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9], r12
        ; a[i+17] += m[17] * mu
        mulx	rcx, rax, QWORD PTR [r10+136]
        mov	r12, QWORD PTR [r9+16]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+8], r13
        ; a[i+18] += m[18] * mu
        mulx	rcx, rax, QWORD PTR [r10+144]
        mov	r13, QWORD PTR [r9+24]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+16], r12
        ; a[i+19] += m[19] * mu
        mulx	rcx, rax, QWORD PTR [r10+152]
        mov	r12, QWORD PTR [r9+32]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+24], r13
        ; a[i+20] += m[20] * mu
        mulx	rcx, rax, QWORD PTR [r10+160]
        mov	r13, QWORD PTR [r9+40]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+32], r12
        ; a[i+21] += m[21] * mu
        mulx	rcx, rax, QWORD PTR [r10+168]
        mov	r12, QWORD PTR [r9+48]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+40], r13
        ; a[i+22] += m[22] * mu
        mulx	rcx, rax, QWORD PTR [r10+176]
        mov	r13, QWORD PTR [r9+56]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+48], r12
        ; a[i+23] += m[23] * mu
        mulx	rcx, rax, QWORD PTR [r10+184]
        mov	r12, QWORD PTR [r9+64]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+56], r13
        ; a[i+24] += m[24] * mu
        mulx	rcx, rax, QWORD PTR [r10+192]
        mov	r13, QWORD PTR [r9+72]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+64], r12
        ; a[i+25] += m[25] * mu
        mulx	rcx, rax, QWORD PTR [r10+200]
        mov	r12, QWORD PTR [r9+80]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+72], r13
        ; a[i+26] += m[26] * mu
        mulx	rcx, rax, QWORD PTR [r10+208]
        mov	r13, QWORD PTR [r9+88]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+80], r12
        ; a[i+27] += m[27] * mu
        mulx	rcx, rax, QWORD PTR [r10+216]
        mov	r12, QWORD PTR [r9+96]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+88], r13
        ; a[i+28] += m[28] * mu
        mulx	rcx, rax, QWORD PTR [r10+224]
        mov	r13, QWORD PTR [r9+104]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+96], r12
        ; a[i+29] += m[29] * mu
        mulx	rcx, rax, QWORD PTR [r10+232]
        mov	r12, QWORD PTR [r9+112]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+104], r13
        ; a[i+30] += m[30] * mu
        mulx	rcx, rax, QWORD PTR [r10+240]
        mov	r13, QWORD PTR [r9+120]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+112], r12
        ; a[i+31] += m[31] * mu
        mulx	rcx, rax, QWORD PTR [r10+248]
        mov	r12, QWORD PTR [r9+128]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+120], r13
        adcx	r12, rbp
        mov	rbp, rbx
        mov	QWORD PTR [r9+128], r12
        adox	rbp, rbx
        adcx	rbp, rbx
        ; a += 1
        add	r9, 8
        ; i -= 1
        sub	r11, 1
        jnz	L_2048_mont_reduce_avx2_32_loop
        sub	r9, 128
        neg	rbp
        mov	r8, r9
        sub	r9, 256
        mov	rcx, QWORD PTR [r10]
        mov	rdx, r14
        pext	rcx, rcx, rbp
        sub	rdx, rcx
        mov	rcx, QWORD PTR [r10+8]
        mov	rax, r15
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+16]
        mov	rcx, rdi
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+8], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+24]
        mov	rdx, rsi
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+16], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [r8+32]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+24], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+40]
        mov	rcx, QWORD PTR [r8+40]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+32], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+48]
        mov	rdx, QWORD PTR [r8+48]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+40], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+56]
        mov	rax, QWORD PTR [r8+56]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+48], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+64]
        mov	rcx, QWORD PTR [r8+64]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+56], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+72]
        mov	rdx, QWORD PTR [r8+72]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+64], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [r8+80]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+72], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+88]
        mov	rcx, QWORD PTR [r8+88]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+80], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+96]
        mov	rdx, QWORD PTR [r8+96]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+88], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+104]
        mov	rax, QWORD PTR [r8+104]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+96], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+112]
        mov	rcx, QWORD PTR [r8+112]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+104], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+120]
        mov	rdx, QWORD PTR [r8+120]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+112], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+128]
        mov	rax, QWORD PTR [r8+128]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+120], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+136]
        mov	rcx, QWORD PTR [r8+136]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+128], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+144]
        mov	rdx, QWORD PTR [r8+144]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+136], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+152]
        mov	rax, QWORD PTR [r8+152]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+144], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+160]
        mov	rcx, QWORD PTR [r8+160]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+152], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+168]
        mov	rdx, QWORD PTR [r8+168]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+160], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+176]
        mov	rax, QWORD PTR [r8+176]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+168], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+184]
        mov	rcx, QWORD PTR [r8+184]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+176], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+192]
        mov	rdx, QWORD PTR [r8+192]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+184], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+200]
        mov	rax, QWORD PTR [r8+200]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+192], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+208]
        mov	rcx, QWORD PTR [r8+208]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+200], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+216]
        mov	rdx, QWORD PTR [r8+216]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+208], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+224]
        mov	rax, QWORD PTR [r8+224]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+216], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+232]
        mov	rcx, QWORD PTR [r8+232]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+224], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+240]
        mov	rdx, QWORD PTR [r8+240]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+232], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+248]
        mov	rax, QWORD PTR [r8+248]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+240], rdx
        sbb	rax, rcx
        mov	QWORD PTR [r9+248], rax
        pop	rbp
        pop	rbx
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_2048_mont_reduce_avx2_32 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_2048_get_from_table_avx2_32 PROC
        sub	rsp, 128
        vmovdqu	OWORD PTR [rsp], xmm6
        vmovdqu	OWORD PTR [rsp+16], xmm7
        vmovdqu	OWORD PTR [rsp+32], xmm8
        vmovdqu	OWORD PTR [rsp+48], xmm9
        vmovdqu	OWORD PTR [rsp+64], xmm10
        vmovdqu	OWORD PTR [rsp+80], xmm11
        vmovdqu	OWORD PTR [rsp+96], xmm12
        vmovdqu	OWORD PTR [rsp+112], xmm13
        mov	rax, 1
        movd	xmm10, r8
        movd	xmm11, rax
        vpxor	ymm13, ymm13, ymm13
        vpermd	ymm10, ymm13, ymm10
        vpermd	ymm11, ymm13, ymm11
        ; START: 0-15
        vpxor	ymm13, ymm13, ymm13
        vpxor	ymm4, ymm4, ymm4
        vpxor	ymm5, ymm5, ymm5
        vpxor	ymm6, ymm6, ymm6
        vpxor	ymm7, ymm7, ymm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 32
        mov	r9, QWORD PTR [rdx+256]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 33
        mov	r9, QWORD PTR [rdx+264]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 34
        mov	r9, QWORD PTR [rdx+272]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 35
        mov	r9, QWORD PTR [rdx+280]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 36
        mov	r9, QWORD PTR [rdx+288]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 37
        mov	r9, QWORD PTR [rdx+296]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 38
        mov	r9, QWORD PTR [rdx+304]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 39
        mov	r9, QWORD PTR [rdx+312]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 40
        mov	r9, QWORD PTR [rdx+320]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 41
        mov	r9, QWORD PTR [rdx+328]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 42
        mov	r9, QWORD PTR [rdx+336]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 43
        mov	r9, QWORD PTR [rdx+344]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 44
        mov	r9, QWORD PTR [rdx+352]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 45
        mov	r9, QWORD PTR [rdx+360]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 46
        mov	r9, QWORD PTR [rdx+368]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 47
        mov	r9, QWORD PTR [rdx+376]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 48
        mov	r9, QWORD PTR [rdx+384]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 49
        mov	r9, QWORD PTR [rdx+392]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 50
        mov	r9, QWORD PTR [rdx+400]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 51
        mov	r9, QWORD PTR [rdx+408]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 52
        mov	r9, QWORD PTR [rdx+416]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 53
        mov	r9, QWORD PTR [rdx+424]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 54
        mov	r9, QWORD PTR [rdx+432]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 55
        mov	r9, QWORD PTR [rdx+440]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 56
        mov	r9, QWORD PTR [rdx+448]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 57
        mov	r9, QWORD PTR [rdx+456]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 58
        mov	r9, QWORD PTR [rdx+464]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 59
        mov	r9, QWORD PTR [rdx+472]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 60
        mov	r9, QWORD PTR [rdx+480]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 61
        mov	r9, QWORD PTR [rdx+488]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 62
        mov	r9, QWORD PTR [rdx+496]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 63
        mov	r9, QWORD PTR [rdx+504]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        vmovdqu	YMMWORD PTR [rcx], ymm4
        vmovdqu	YMMWORD PTR [rcx+32], ymm5
        vmovdqu	YMMWORD PTR [rcx+64], ymm6
        vmovdqu	YMMWORD PTR [rcx+96], ymm7
        add	rcx, 128
        ; END: 0-15
        ; START: 16-31
        vpxor	ymm13, ymm13, ymm13
        vpxor	ymm4, ymm4, ymm4
        vpxor	ymm5, ymm5, ymm5
        vpxor	ymm6, ymm6, ymm6
        vpxor	ymm7, ymm7, ymm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 32
        mov	r9, QWORD PTR [rdx+256]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 33
        mov	r9, QWORD PTR [rdx+264]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 34
        mov	r9, QWORD PTR [rdx+272]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 35
        mov	r9, QWORD PTR [rdx+280]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 36
        mov	r9, QWORD PTR [rdx+288]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 37
        mov	r9, QWORD PTR [rdx+296]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 38
        mov	r9, QWORD PTR [rdx+304]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 39
        mov	r9, QWORD PTR [rdx+312]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 40
        mov	r9, QWORD PTR [rdx+320]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 41
        mov	r9, QWORD PTR [rdx+328]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 42
        mov	r9, QWORD PTR [rdx+336]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 43
        mov	r9, QWORD PTR [rdx+344]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 44
        mov	r9, QWORD PTR [rdx+352]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 45
        mov	r9, QWORD PTR [rdx+360]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 46
        mov	r9, QWORD PTR [rdx+368]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 47
        mov	r9, QWORD PTR [rdx+376]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 48
        mov	r9, QWORD PTR [rdx+384]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 49
        mov	r9, QWORD PTR [rdx+392]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 50
        mov	r9, QWORD PTR [rdx+400]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 51
        mov	r9, QWORD PTR [rdx+408]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 52
        mov	r9, QWORD PTR [rdx+416]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 53
        mov	r9, QWORD PTR [rdx+424]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 54
        mov	r9, QWORD PTR [rdx+432]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 55
        mov	r9, QWORD PTR [rdx+440]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 56
        mov	r9, QWORD PTR [rdx+448]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 57
        mov	r9, QWORD PTR [rdx+456]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 58
        mov	r9, QWORD PTR [rdx+464]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 59
        mov	r9, QWORD PTR [rdx+472]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 60
        mov	r9, QWORD PTR [rdx+480]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 61
        mov	r9, QWORD PTR [rdx+488]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 62
        mov	r9, QWORD PTR [rdx+496]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 63
        mov	r9, QWORD PTR [rdx+504]
        add	r9, 128
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        vmovdqu	YMMWORD PTR [rcx], ymm4
        vmovdqu	YMMWORD PTR [rcx+32], ymm5
        vmovdqu	YMMWORD PTR [rcx+64], ymm6
        vmovdqu	YMMWORD PTR [rcx+96], ymm7
        ; END: 16-31
        vmovdqu	xmm6, OWORD PTR [rsp]
        vmovdqu	xmm7, OWORD PTR [rsp+16]
        vmovdqu	xmm8, OWORD PTR [rsp+32]
        vmovdqu	xmm9, OWORD PTR [rsp+48]
        vmovdqu	xmm10, OWORD PTR [rsp+64]
        vmovdqu	xmm11, OWORD PTR [rsp+80]
        vmovdqu	xmm12, OWORD PTR [rsp+96]
        vmovdqu	xmm13, OWORD PTR [rsp+112]
        add	rsp, 128
        ret
sp_2048_get_from_table_avx2_32 ENDP
_text ENDS
ENDIF
; /* Conditionally add a and b using the mask m.
;  * m is -1 to add and 0 when not.
;  *
;  * r  A single precision number representing conditional add result.
;  * a  A single precision number to add with.
;  * b  A single precision number to add.
;  * m  Mask value to apply.
;  */
_text SEGMENT READONLY PARA
sp_2048_cond_add_16 PROC
        sub	rsp, 128
        mov	rax, 0
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp], r10
        mov	QWORD PTR [rsp+8], r11
        mov	r10, QWORD PTR [r8+16]
        mov	r11, QWORD PTR [r8+24]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+16], r10
        mov	QWORD PTR [rsp+24], r11
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+32], r10
        mov	QWORD PTR [rsp+40], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+48], r10
        mov	QWORD PTR [rsp+56], r11
        mov	r10, QWORD PTR [r8+64]
        mov	r11, QWORD PTR [r8+72]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+64], r10
        mov	QWORD PTR [rsp+72], r11
        mov	r10, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [r8+88]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+80], r10
        mov	QWORD PTR [rsp+88], r11
        mov	r10, QWORD PTR [r8+96]
        mov	r11, QWORD PTR [r8+104]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+96], r10
        mov	QWORD PTR [rsp+104], r11
        mov	r10, QWORD PTR [r8+112]
        mov	r11, QWORD PTR [r8+120]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+112], r10
        mov	QWORD PTR [rsp+120], r11
        mov	r10, QWORD PTR [rdx]
        mov	r8, QWORD PTR [rsp]
        add	r10, r8
        mov	r11, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [rsp+8]
        adc	r11, r8
        mov	QWORD PTR [rcx], r10
        mov	r10, QWORD PTR [rdx+16]
        mov	r8, QWORD PTR [rsp+16]
        adc	r10, r8
        mov	QWORD PTR [rcx+8], r11
        mov	r11, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rsp+24]
        adc	r11, r8
        mov	QWORD PTR [rcx+16], r10
        mov	r10, QWORD PTR [rdx+32]
        mov	r8, QWORD PTR [rsp+32]
        adc	r10, r8
        mov	QWORD PTR [rcx+24], r11
        mov	r11, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [rsp+40]
        adc	r11, r8
        mov	QWORD PTR [rcx+32], r10
        mov	r10, QWORD PTR [rdx+48]
        mov	r8, QWORD PTR [rsp+48]
        adc	r10, r8
        mov	QWORD PTR [rcx+40], r11
        mov	r11, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rsp+56]
        adc	r11, r8
        mov	QWORD PTR [rcx+48], r10
        mov	r10, QWORD PTR [rdx+64]
        mov	r8, QWORD PTR [rsp+64]
        adc	r10, r8
        mov	QWORD PTR [rcx+56], r11
        mov	r11, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [rsp+72]
        adc	r11, r8
        mov	QWORD PTR [rcx+64], r10
        mov	r10, QWORD PTR [rdx+80]
        mov	r8, QWORD PTR [rsp+80]
        adc	r10, r8
        mov	QWORD PTR [rcx+72], r11
        mov	r11, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rsp+88]
        adc	r11, r8
        mov	QWORD PTR [rcx+80], r10
        mov	r10, QWORD PTR [rdx+96]
        mov	r8, QWORD PTR [rsp+96]
        adc	r10, r8
        mov	QWORD PTR [rcx+88], r11
        mov	r11, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [rsp+104]
        adc	r11, r8
        mov	QWORD PTR [rcx+96], r10
        mov	r10, QWORD PTR [rdx+112]
        mov	r8, QWORD PTR [rsp+112]
        adc	r10, r8
        mov	QWORD PTR [rcx+104], r11
        mov	r11, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [rsp+120]
        adc	r11, r8
        mov	QWORD PTR [rcx+112], r10
        mov	QWORD PTR [rcx+120], r11
        adc	rax, 0
        add	rsp, 128
        ret
sp_2048_cond_add_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally add a and b using the mask m.
;  * m is -1 to add and 0 when not.
;  *
;  * r  A single precision number representing conditional add result.
;  * a  A single precision number to add with.
;  * b  A single precision number to add.
;  * m  Mask value to apply.
;  */
_text SEGMENT READONLY PARA
sp_2048_cond_add_avx2_16 PROC
        push	r12
        mov	rax, 0
        mov	r12, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx]
        pext	r12, r12, r9
        add	r10, r12
        mov	r12, QWORD PTR [r8+8]
        mov	r11, QWORD PTR [rdx+8]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx], r10
        adc	r11, r12
        mov	r10, QWORD PTR [r8+16]
        mov	r12, QWORD PTR [rdx+16]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+8], r11
        adc	r12, r10
        mov	r11, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [rdx+24]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+16], r12
        adc	r10, r11
        mov	r12, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [rdx+32]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+24], r10
        adc	r11, r12
        mov	r10, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [rdx+40]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+32], r11
        adc	r12, r10
        mov	r11, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+48]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+40], r12
        adc	r10, r11
        mov	r12, QWORD PTR [r8+56]
        mov	r11, QWORD PTR [rdx+56]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+48], r10
        adc	r11, r12
        mov	r10, QWORD PTR [r8+64]
        mov	r12, QWORD PTR [rdx+64]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+56], r11
        adc	r12, r10
        mov	r11, QWORD PTR [r8+72]
        mov	r10, QWORD PTR [rdx+72]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+64], r12
        adc	r10, r11
        mov	r12, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [rdx+80]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+72], r10
        adc	r11, r12
        mov	r10, QWORD PTR [r8+88]
        mov	r12, QWORD PTR [rdx+88]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+80], r11
        adc	r12, r10
        mov	r11, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+96]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+88], r12
        adc	r10, r11
        mov	r12, QWORD PTR [r8+104]
        mov	r11, QWORD PTR [rdx+104]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+96], r10
        adc	r11, r12
        mov	r10, QWORD PTR [r8+112]
        mov	r12, QWORD PTR [rdx+112]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+104], r11
        adc	r12, r10
        mov	r11, QWORD PTR [r8+120]
        mov	r10, QWORD PTR [rdx+120]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+112], r12
        adc	r10, r11
        mov	QWORD PTR [rcx+120], r10
        adc	rax, 0
        pop	r12
        ret
sp_2048_cond_add_avx2_16 ENDP
_text ENDS
ENDIF
; /* Shift number left by n bit. (r = a << n)
;  *
;  * r  Result of left shift by n.
;  * a  Number to shift.
;  * n  Amoutnt o shift.
;  */
_text SEGMENT READONLY PARA
sp_2048_lshift_32 PROC
        push	r12
        push	r13
        mov	cl, r8b
        mov	rax, rcx
        mov	r12, 0
        mov	r13, QWORD PTR [rdx+216]
        mov	r8, QWORD PTR [rdx+224]
        mov	r9, QWORD PTR [rdx+232]
        mov	r10, QWORD PTR [rdx+240]
        mov	r11, QWORD PTR [rdx+248]
        shld	r12, r11, cl
        shld	r11, r10, cl
        shld	r10, r9, cl
        shld	r9, r8, cl
        shld	r8, r13, cl
        mov	QWORD PTR [rax+224], r8
        mov	QWORD PTR [rax+232], r9
        mov	QWORD PTR [rax+240], r10
        mov	QWORD PTR [rax+248], r11
        mov	QWORD PTR [rax+256], r12
        mov	r11, QWORD PTR [rdx+184]
        mov	r8, QWORD PTR [rdx+192]
        mov	r9, QWORD PTR [rdx+200]
        mov	r10, QWORD PTR [rdx+208]
        shld	r13, r10, cl
        shld	r10, r9, cl
        shld	r9, r8, cl
        shld	r8, r11, cl
        mov	QWORD PTR [rax+192], r8
        mov	QWORD PTR [rax+200], r9
        mov	QWORD PTR [rax+208], r10
        mov	QWORD PTR [rax+216], r13
        mov	r13, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [rdx+160]
        mov	r9, QWORD PTR [rdx+168]
        mov	r10, QWORD PTR [rdx+176]
        shld	r11, r10, cl
        shld	r10, r9, cl
        shld	r9, r8, cl
        shld	r8, r13, cl
        mov	QWORD PTR [rax+160], r8
        mov	QWORD PTR [rax+168], r9
        mov	QWORD PTR [rax+176], r10
        mov	QWORD PTR [rax+184], r11
        mov	r11, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [rdx+128]
        mov	r9, QWORD PTR [rdx+136]
        mov	r10, QWORD PTR [rdx+144]
        shld	r13, r10, cl
        shld	r10, r9, cl
        shld	r9, r8, cl
        shld	r8, r11, cl
        mov	QWORD PTR [rax+128], r8
        mov	QWORD PTR [rax+136], r9
        mov	QWORD PTR [rax+144], r10
        mov	QWORD PTR [rax+152], r13
        mov	r13, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rdx+96]
        mov	r9, QWORD PTR [rdx+104]
        mov	r10, QWORD PTR [rdx+112]
        shld	r11, r10, cl
        shld	r10, r9, cl
        shld	r9, r8, cl
        shld	r8, r13, cl
        mov	QWORD PTR [rax+96], r8
        mov	QWORD PTR [rax+104], r9
        mov	QWORD PTR [rax+112], r10
        mov	QWORD PTR [rax+120], r11
        mov	r11, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rdx+64]
        mov	r9, QWORD PTR [rdx+72]
        mov	r10, QWORD PTR [rdx+80]
        shld	r13, r10, cl
        shld	r10, r9, cl
        shld	r9, r8, cl
        shld	r8, r11, cl
        mov	QWORD PTR [rax+64], r8
        mov	QWORD PTR [rax+72], r9
        mov	QWORD PTR [rax+80], r10
        mov	QWORD PTR [rax+88], r13
        mov	r13, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rdx+32]
        mov	r9, QWORD PTR [rdx+40]
        mov	r10, QWORD PTR [rdx+48]
        shld	r11, r10, cl
        shld	r10, r9, cl
        shld	r9, r8, cl
        shld	r8, r13, cl
        mov	QWORD PTR [rax+32], r8
        mov	QWORD PTR [rax+40], r9
        mov	QWORD PTR [rax+48], r10
        mov	QWORD PTR [rax+56], r11
        mov	r8, QWORD PTR [rdx]
        mov	r9, QWORD PTR [rdx+8]
        mov	r10, QWORD PTR [rdx+16]
        shld	r13, r10, cl
        shld	r10, r9, cl
        shld	r9, r8, cl
        shl	r8, cl
        mov	QWORD PTR [rax], r8
        mov	QWORD PTR [rax+8], r9
        mov	QWORD PTR [rax+16], r10
        mov	QWORD PTR [rax+24], r13
        pop	r13
        pop	r12
        ret
sp_2048_lshift_32 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WOLFSSL_SP_NO_3072
IFNDEF WOLFSSL_SP_NO_3072
; /* Read big endian unsigned byte array into r.
;  * Uses the bswap instruction.
;  *
;  * r  A single precision integer.
;  * size  Maximum number of bytes to convert
;  * a  Byte array.
;  * n  Number of bytes in array to read.
;  */
_text SEGMENT READONLY PARA
sp_3072_from_bin_bswap PROC
        push	r12
        push	r13
        mov	r11, r8
        mov	r12, rcx
        add	r11, r9
        add	r12, 384
        xor	r13, r13
        jmp	L_3072_from_bin_bswap_64_end
L_3072_from_bin_bswap_64_start:
        sub	r11, 64
        mov	rax, QWORD PTR [r11+56]
        mov	r10, QWORD PTR [r11+48]
        bswap	rax
        bswap	r10
        mov	QWORD PTR [rcx], rax
        mov	QWORD PTR [rcx+8], r10
        mov	rax, QWORD PTR [r11+40]
        mov	r10, QWORD PTR [r11+32]
        bswap	rax
        bswap	r10
        mov	QWORD PTR [rcx+16], rax
        mov	QWORD PTR [rcx+24], r10
        mov	rax, QWORD PTR [r11+24]
        mov	r10, QWORD PTR [r11+16]
        bswap	rax
        bswap	r10
        mov	QWORD PTR [rcx+32], rax
        mov	QWORD PTR [rcx+40], r10
        mov	rax, QWORD PTR [r11+8]
        mov	r10, QWORD PTR [r11]
        bswap	rax
        bswap	r10
        mov	QWORD PTR [rcx+48], rax
        mov	QWORD PTR [rcx+56], r10
        add	rcx, 64
        sub	r9, 64
L_3072_from_bin_bswap_64_end:
        cmp	r9, 63
        jg	L_3072_from_bin_bswap_64_start
        jmp	L_3072_from_bin_bswap_8_end
L_3072_from_bin_bswap_8_start:
        sub	r11, 8
        mov	rax, QWORD PTR [r11]
        bswap	rax
        mov	QWORD PTR [rcx], rax
        add	rcx, 8
        sub	r9, 8
L_3072_from_bin_bswap_8_end:
        cmp	r9, 7
        jg	L_3072_from_bin_bswap_8_start
        cmp	r9, r13
        je	L_3072_from_bin_bswap_hi_end
        mov	r10, r13
        mov	rax, r13
L_3072_from_bin_bswap_hi_start:
        mov	al, BYTE PTR [r8]
        shl	r10, 8
        inc	r8
        add	r10, rax
        dec	r9
        jg	L_3072_from_bin_bswap_hi_start
        mov	QWORD PTR [rcx], r10
        add	rcx, 8
L_3072_from_bin_bswap_hi_end:
        cmp	rcx, r12
        jge	L_3072_from_bin_bswap_zero_end
L_3072_from_bin_bswap_zero_start:
        mov	QWORD PTR [rcx], r13
        add	rcx, 8
        cmp	rcx, r12
        jl	L_3072_from_bin_bswap_zero_start
L_3072_from_bin_bswap_zero_end:
        pop	r13
        pop	r12
        ret
sp_3072_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
;  * Uses the movbe instruction which is an optional instruction.
;  *
;  * r  A single precision integer.
;  * size  Maximum number of bytes to convert
;  * a  Byte array.
;  * n  Number of bytes in array to read.
;  */
_text SEGMENT READONLY PARA
sp_3072_from_bin_movbe PROC
        push	r12
        mov	r11, r8
        mov	r12, rcx
        add	r11, r9
        add	r12, 384
        jmp	L_3072_from_bin_movbe_64_end
L_3072_from_bin_movbe_64_start:
        sub	r11, 64
        movbe	rax, QWORD PTR [r11+56]
        movbe	r10, QWORD PTR [r11+48]
        mov	QWORD PTR [rcx], rax
        mov	QWORD PTR [rcx+8], r10
        movbe	rax, QWORD PTR [r11+40]
        movbe	r10, QWORD PTR [r11+32]
        mov	QWORD PTR [rcx+16], rax
        mov	QWORD PTR [rcx+24], r10
        movbe	rax, QWORD PTR [r11+24]
        movbe	r10, QWORD PTR [r11+16]
        mov	QWORD PTR [rcx+32], rax
        mov	QWORD PTR [rcx+40], r10
        movbe	rax, QWORD PTR [r11+8]
        movbe	r10, QWORD PTR [r11]
        mov	QWORD PTR [rcx+48], rax
        mov	QWORD PTR [rcx+56], r10
        add	rcx, 64
        sub	r9, 64
L_3072_from_bin_movbe_64_end:
        cmp	r9, 63
        jg	L_3072_from_bin_movbe_64_start
        jmp	L_3072_from_bin_movbe_8_end
L_3072_from_bin_movbe_8_start:
        sub	r11, 8
        movbe	rax, QWORD PTR [r11]
        mov	QWORD PTR [rcx], rax
        add	rcx, 8
        sub	r9, 8
L_3072_from_bin_movbe_8_end:
        cmp	r9, 7
        jg	L_3072_from_bin_movbe_8_start
        cmp	r9, 0
        je	L_3072_from_bin_movbe_hi_end
        mov	r10, 0
        mov	rax, 0
L_3072_from_bin_movbe_hi_start:
        mov	al, BYTE PTR [r8]
        shl	r10, 8
        inc	r8
        add	r10, rax
        dec	r9
        jg	L_3072_from_bin_movbe_hi_start
        mov	QWORD PTR [rcx], r10
        add	rcx, 8
L_3072_from_bin_movbe_hi_end:
        cmp	rcx, r12
        jge	L_3072_from_bin_movbe_zero_end
L_3072_from_bin_movbe_zero_start:
        mov	QWORD PTR [rcx], 0
        add	rcx, 8
        cmp	rcx, r12
        jl	L_3072_from_bin_movbe_zero_start
L_3072_from_bin_movbe_zero_end:
        pop	r12
        ret
sp_3072_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
;  * Fixed length number of bytes written: 384
;  * Uses the bswap instruction.
;  *
;  * r  A single precision integer.
;  * a  Byte array.
;  */
_text SEGMENT READONLY PARA
sp_3072_to_bin_bswap_48 PROC
        mov	rax, QWORD PTR [rcx+376]
        mov	r8, QWORD PTR [rcx+368]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx], rax
        mov	QWORD PTR [rdx+8], r8
        mov	rax, QWORD PTR [rcx+360]
        mov	r8, QWORD PTR [rcx+352]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+16], rax
        mov	QWORD PTR [rdx+24], r8
        mov	rax, QWORD PTR [rcx+344]
        mov	r8, QWORD PTR [rcx+336]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+32], rax
        mov	QWORD PTR [rdx+40], r8
        mov	rax, QWORD PTR [rcx+328]
        mov	r8, QWORD PTR [rcx+320]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+48], rax
        mov	QWORD PTR [rdx+56], r8
        mov	rax, QWORD PTR [rcx+312]
        mov	r8, QWORD PTR [rcx+304]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+64], rax
        mov	QWORD PTR [rdx+72], r8
        mov	rax, QWORD PTR [rcx+296]
        mov	r8, QWORD PTR [rcx+288]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+80], rax
        mov	QWORD PTR [rdx+88], r8
        mov	rax, QWORD PTR [rcx+280]
        mov	r8, QWORD PTR [rcx+272]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+96], rax
        mov	QWORD PTR [rdx+104], r8
        mov	rax, QWORD PTR [rcx+264]
        mov	r8, QWORD PTR [rcx+256]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+112], rax
        mov	QWORD PTR [rdx+120], r8
        mov	rax, QWORD PTR [rcx+248]
        mov	r8, QWORD PTR [rcx+240]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+128], rax
        mov	QWORD PTR [rdx+136], r8
        mov	rax, QWORD PTR [rcx+232]
        mov	r8, QWORD PTR [rcx+224]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+144], rax
        mov	QWORD PTR [rdx+152], r8
        mov	rax, QWORD PTR [rcx+216]
        mov	r8, QWORD PTR [rcx+208]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+160], rax
        mov	QWORD PTR [rdx+168], r8
        mov	rax, QWORD PTR [rcx+200]
        mov	r8, QWORD PTR [rcx+192]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+176], rax
        mov	QWORD PTR [rdx+184], r8
        mov	rax, QWORD PTR [rcx+184]
        mov	r8, QWORD PTR [rcx+176]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+192], rax
        mov	QWORD PTR [rdx+200], r8
        mov	rax, QWORD PTR [rcx+168]
        mov	r8, QWORD PTR [rcx+160]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+208], rax
        mov	QWORD PTR [rdx+216], r8
        mov	rax, QWORD PTR [rcx+152]
        mov	r8, QWORD PTR [rcx+144]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+224], rax
        mov	QWORD PTR [rdx+232], r8
        mov	rax, QWORD PTR [rcx+136]
        mov	r8, QWORD PTR [rcx+128]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+240], rax
        mov	QWORD PTR [rdx+248], r8
        mov	rax, QWORD PTR [rcx+120]
        mov	r8, QWORD PTR [rcx+112]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+256], rax
        mov	QWORD PTR [rdx+264], r8
        mov	rax, QWORD PTR [rcx+104]
        mov	r8, QWORD PTR [rcx+96]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+272], rax
        mov	QWORD PTR [rdx+280], r8
        mov	rax, QWORD PTR [rcx+88]
        mov	r8, QWORD PTR [rcx+80]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+288], rax
        mov	QWORD PTR [rdx+296], r8
        mov	rax, QWORD PTR [rcx+72]
        mov	r8, QWORD PTR [rcx+64]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+304], rax
        mov	QWORD PTR [rdx+312], r8
        mov	rax, QWORD PTR [rcx+56]
        mov	r8, QWORD PTR [rcx+48]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+320], rax
        mov	QWORD PTR [rdx+328], r8
        mov	rax, QWORD PTR [rcx+40]
        mov	r8, QWORD PTR [rcx+32]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+336], rax
        mov	QWORD PTR [rdx+344], r8
        mov	rax, QWORD PTR [rcx+24]
        mov	r8, QWORD PTR [rcx+16]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+352], rax
        mov	QWORD PTR [rdx+360], r8
        mov	rax, QWORD PTR [rcx+8]
        mov	r8, QWORD PTR [rcx]
        bswap	rax
        bswap	r8
        mov	QWORD PTR [rdx+368], rax
        mov	QWORD PTR [rdx+376], r8
        ret
sp_3072_to_bin_bswap_48 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
;  * Fixed length number of bytes written: 384
;  * Uses the movbe instruction which is optional.
;  *
;  * r  A single precision integer.
;  * a  Byte array.
;  */
_text SEGMENT READONLY PARA
sp_3072_to_bin_movbe_48 PROC
        movbe	rax, QWORD PTR [rcx+376]
        movbe	r8, QWORD PTR [rcx+368]
        mov	QWORD PTR [rdx], rax
        mov	QWORD PTR [rdx+8], r8
        movbe	rax, QWORD PTR [rcx+360]
        movbe	r8, QWORD PTR [rcx+352]
        mov	QWORD PTR [rdx+16], rax
        mov	QWORD PTR [rdx+24], r8
        movbe	rax, QWORD PTR [rcx+344]
        movbe	r8, QWORD PTR [rcx+336]
        mov	QWORD PTR [rdx+32], rax
        mov	QWORD PTR [rdx+40], r8
        movbe	rax, QWORD PTR [rcx+328]
        movbe	r8, QWORD PTR [rcx+320]
        mov	QWORD PTR [rdx+48], rax
        mov	QWORD PTR [rdx+56], r8
        movbe	rax, QWORD PTR [rcx+312]
        movbe	r8, QWORD PTR [rcx+304]
        mov	QWORD PTR [rdx+64], rax
        mov	QWORD PTR [rdx+72], r8
        movbe	rax, QWORD PTR [rcx+296]
        movbe	r8, QWORD PTR [rcx+288]
        mov	QWORD PTR [rdx+80], rax
        mov	QWORD PTR [rdx+88], r8
        movbe	rax, QWORD PTR [rcx+280]
        movbe	r8, QWORD PTR [rcx+272]
        mov	QWORD PTR [rdx+96], rax
        mov	QWORD PTR [rdx+104], r8
        movbe	rax, QWORD PTR [rcx+264]
        movbe	r8, QWORD PTR [rcx+256]
        mov	QWORD PTR [rdx+112], rax
        mov	QWORD PTR [rdx+120], r8
        movbe	rax, QWORD PTR [rcx+248]
        movbe	r8, QWORD PTR [rcx+240]
        mov	QWORD PTR [rdx+128], rax
        mov	QWORD PTR [rdx+136], r8
        movbe	rax, QWORD PTR [rcx+232]
        movbe	r8, QWORD PTR [rcx+224]
        mov	QWORD PTR [rdx+144], rax
        mov	QWORD PTR [rdx+152], r8
        movbe	rax, QWORD PTR [rcx+216]
        movbe	r8, QWORD PTR [rcx+208]
        mov	QWORD PTR [rdx+160], rax
        mov	QWORD PTR [rdx+168], r8
        movbe	rax, QWORD PTR [rcx+200]
        movbe	r8, QWORD PTR [rcx+192]
        mov	QWORD PTR [rdx+176], rax
        mov	QWORD PTR [rdx+184], r8
        movbe	rax, QWORD PTR [rcx+184]
        movbe	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rdx+192], rax
        mov	QWORD PTR [rdx+200], r8
        movbe	rax, QWORD PTR [rcx+168]
        movbe	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rdx+208], rax
        mov	QWORD PTR [rdx+216], r8
        movbe	rax, QWORD PTR [rcx+152]
        movbe	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rdx+224], rax
        mov	QWORD PTR [rdx+232], r8
        movbe	rax, QWORD PTR [rcx+136]
        movbe	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rdx+240], rax
        mov	QWORD PTR [rdx+248], r8
        movbe	rax, QWORD PTR [rcx+120]
        movbe	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rdx+256], rax
        mov	QWORD PTR [rdx+264], r8
        movbe	rax, QWORD PTR [rcx+104]
        movbe	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rdx+272], rax
        mov	QWORD PTR [rdx+280], r8
        movbe	rax, QWORD PTR [rcx+88]
        movbe	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rdx+288], rax
        mov	QWORD PTR [rdx+296], r8
        movbe	rax, QWORD PTR [rcx+72]
        movbe	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rdx+304], rax
        mov	QWORD PTR [rdx+312], r8
        movbe	rax, QWORD PTR [rcx+56]
        movbe	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rdx+320], rax
        mov	QWORD PTR [rdx+328], r8
        movbe	rax, QWORD PTR [rcx+40]
        movbe	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rdx+336], rax
        mov	QWORD PTR [rdx+344], r8
        movbe	rax, QWORD PTR [rcx+24]
        movbe	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rdx+352], rax
        mov	QWORD PTR [rdx+360], r8
        movbe	rax, QWORD PTR [rcx+8]
        movbe	r8, QWORD PTR [rcx]
        mov	QWORD PTR [rdx+368], rax
        mov	QWORD PTR [rdx+376], r8
        ret
sp_3072_to_bin_movbe_48 ENDP
_text ENDS
ENDIF
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_12 PROC
        push	r12
        mov	r9, rdx
        sub	rsp, 96
        ; A[0] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9]
        xor	r12, r12
        mov	QWORD PTR [rsp], rax
        mov	r11, rdx
        ; A[0] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+8], r11
        ; A[0] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+16], r12
        ; A[0] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[1] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+8]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rsp+24], r10
        ; A[0] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+16]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+32], r11
        ; A[0] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+24]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+40], r12
        ; A[0] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[1] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+8]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+32]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rsp+48], r10
        ; A[0] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+16]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+40]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+56], r11
        ; A[0] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+24]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+48]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+64], r12
        ; A[0] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[1] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+8]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+32]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+56]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rsp+72], r10
        ; A[0] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[1] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+8]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+16]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+40]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+64]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+80], r11
        ; A[0] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+8]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[2] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+16]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+24]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+48]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+72]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[0]
        mov	rax, QWORD PTR [r8]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+88], r12
        ; A[1] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+8]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[2] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+16]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[3] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+24]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+32]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+56]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+80]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rcx+96], r10
        ; A[2] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+16]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[3] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+24]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[4] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+32]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+40]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+64]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+104], r11
        ; A[3] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+24]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[4] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+32]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[5] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+40]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+48]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+72]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+112], r12
        ; A[4] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+32]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[5] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+40]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[6] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+48]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+56]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+80]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rcx+120], r10
        ; A[5] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+40]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[6] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+48]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[7] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+56]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+64]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+128], r11
        ; A[6] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+48]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[7] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+56]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+64]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+72]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+136], r12
        ; A[7] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+56]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[8] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+64]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[9] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+72]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+80]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rcx+144], r10
        ; A[8] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+64]
        xor	r10, r10
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[9] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+72]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[10] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+80]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+152], r11
        ; A[9] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+72]
        xor	r11, r11
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[10] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+80]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[11] * B[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r9+88]
        add	r12, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+160], r12
        ; A[10] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+80]
        xor	r12, r12
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        ; A[11] * B[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r9+88]
        add	r10, rax
        adc	r11, rdx
        adc	r12, 0
        mov	QWORD PTR [rcx+168], r10
        ; A[11] * B[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r9+88]
        add	r11, rax
        adc	r12, rdx
        mov	QWORD PTR [rcx+176], r11
        mov	QWORD PTR [rcx+184], r12
        mov	rax, QWORD PTR [rsp]
        mov	rdx, QWORD PTR [rsp+8]
        mov	r10, QWORD PTR [rsp+16]
        mov	r11, QWORD PTR [rsp+24]
        mov	QWORD PTR [rcx], rax
        mov	QWORD PTR [rcx+8], rdx
        mov	QWORD PTR [rcx+16], r10
        mov	QWORD PTR [rcx+24], r11
        mov	rax, QWORD PTR [rsp+32]
        mov	rdx, QWORD PTR [rsp+40]
        mov	r10, QWORD PTR [rsp+48]
        mov	r11, QWORD PTR [rsp+56]
        mov	QWORD PTR [rcx+32], rax
        mov	QWORD PTR [rcx+40], rdx
        mov	QWORD PTR [rcx+48], r10
        mov	QWORD PTR [rcx+56], r11
        mov	rax, QWORD PTR [rsp+64]
        mov	rdx, QWORD PTR [rsp+72]
        mov	r10, QWORD PTR [rsp+80]
        mov	r11, QWORD PTR [rsp+88]
        mov	QWORD PTR [rcx+64], rax
        mov	QWORD PTR [rcx+72], rdx
        mov	QWORD PTR [rcx+80], r10
        mov	QWORD PTR [rcx+88], r11
        add	rsp, 96
        pop	r12
        ret
sp_3072_mul_12 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r   Result of multiplication.
;  * a   First number to multiply.
;  * b   Second number to multiply.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_12 PROC
        push	rbx
        push	rbp
        push	r12
        push	r13
        push	r14
        mov	rbp, r8
        mov	r8, rcx
        mov	r9, rdx
        sub	rsp, 96
        cmp	r9, r8
        mov	rbx, rsp
        cmovne	rbx, r8
        cmp	rbp, r8
        cmove	rbx, rsp
        add	r8, 96
        xor	r14, r14
        mov	rdx, QWORD PTR [r9]
        ; A[0] * B[0]
        mulx	r11, r10, QWORD PTR [rbp]
        ; A[0] * B[1]
        mulx	r12, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx], r10
        adcx	r11, rax
        mov	QWORD PTR [rbx+8], r11
        ; A[0] * B[2]
        mulx	r10, rax, QWORD PTR [rbp+16]
        adcx	r12, rax
        ; A[0] * B[3]
        mulx	r11, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+16], r12
        adcx	r10, rax
        mov	QWORD PTR [rbx+24], r10
        ; A[0] * B[4]
        mulx	r12, rax, QWORD PTR [rbp+32]
        adcx	r11, rax
        ; A[0] * B[5]
        mulx	r10, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+32], r11
        adcx	r12, rax
        mov	QWORD PTR [rbx+40], r12
        ; A[0] * B[6]
        mulx	r11, rax, QWORD PTR [rbp+48]
        adcx	r10, rax
        ; A[0] * B[7]
        mulx	r12, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+48], r10
        adcx	r11, rax
        mov	QWORD PTR [rbx+56], r11
        ; A[0] * B[8]
        mulx	r10, rax, QWORD PTR [rbp+64]
        adcx	r12, rax
        ; A[0] * B[9]
        mulx	r11, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+64], r12
        adcx	r10, rax
        mov	QWORD PTR [rbx+72], r10
        ; A[0] * B[10]
        mulx	r12, rax, QWORD PTR [rbp+80]
        adcx	r11, rax
        ; A[0] * B[11]
        mulx	r10, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [rbx+80], r11
        adcx	r12, rax
        adcx	r10, r14
        mov	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [rbx+88], r12
        mov	QWORD PTR [r8], r10
        mov	rdx, QWORD PTR [r9+8]
        mov	r11, QWORD PTR [rbx+8]
        mov	r12, QWORD PTR [rbx+16]
        mov	r10, QWORD PTR [rbx+24]
        ; A[1] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r11, rax
        adox	r12, rcx
        ; A[1] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+8], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+16], r12
        mov	r11, QWORD PTR [rbx+32]
        mov	r12, QWORD PTR [rbx+40]
        ; A[1] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r10, rax
        adox	r11, rcx
        ; A[1] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+24], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+32], r11
        mov	r10, QWORD PTR [rbx+48]
        mov	r11, QWORD PTR [rbx+56]
        ; A[1] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r12, rax
        adox	r10, rcx
        ; A[1] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+40], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+48], r10
        mov	r12, QWORD PTR [rbx+64]
        mov	r10, QWORD PTR [rbx+72]
        ; A[1] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r11, rax
        adox	r12, rcx
        ; A[1] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+56], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+64], r12
        mov	r11, QWORD PTR [rbx+80]
        mov	r12, QWORD PTR [rbx+88]
        ; A[1] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r10, rax
        adox	r11, rcx
        ; A[1] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+72], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+80], r11
        mov	r10, QWORD PTR [r8]
        ; A[1] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r12, rax
        adox	r10, rcx
        ; A[1] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [rbx+88], r12
        mov	r11, r14
        adcx	r10, rax
        adox	r11, rcx
        adcx	r11, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8], r10
        mov	QWORD PTR [r8+8], r11
        mov	rdx, QWORD PTR [r9+16]
        mov	r12, QWORD PTR [rbx+16]
        mov	r10, QWORD PTR [rbx+24]
        mov	r11, QWORD PTR [rbx+32]
        ; A[2] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r12, rax
        adox	r10, rcx
        ; A[2] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+16], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+24], r10
        mov	r12, QWORD PTR [rbx+40]
        mov	r10, QWORD PTR [rbx+48]
        ; A[2] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r11, rax
        adox	r12, rcx
        ; A[2] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+32], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+40], r12
        mov	r11, QWORD PTR [rbx+56]
        mov	r12, QWORD PTR [rbx+64]
        ; A[2] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r10, rax
        adox	r11, rcx
        ; A[2] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+48], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+56], r11
        mov	r10, QWORD PTR [rbx+72]
        mov	r11, QWORD PTR [rbx+80]
        ; A[2] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r12, rax
        adox	r10, rcx
        ; A[2] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+64], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+72], r10
        mov	r12, QWORD PTR [rbx+88]
        mov	r10, QWORD PTR [r8]
        ; A[2] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r11, rax
        adox	r12, rcx
        ; A[2] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+80], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+88], r12
        mov	r11, QWORD PTR [r8+8]
        ; A[2] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r10, rax
        adox	r11, rcx
        ; A[2] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8], r10
        mov	r12, r14
        adcx	r11, rax
        adox	r12, rcx
        adcx	r12, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+8], r11
        mov	QWORD PTR [r8+16], r12
        mov	rdx, QWORD PTR [r9+24]
        mov	r10, QWORD PTR [rbx+24]
        mov	r11, QWORD PTR [rbx+32]
        mov	r12, QWORD PTR [rbx+40]
        ; A[3] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r10, rax
        adox	r11, rcx
        ; A[3] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+24], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+32], r11
        mov	r10, QWORD PTR [rbx+48]
        mov	r11, QWORD PTR [rbx+56]
        ; A[3] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r12, rax
        adox	r10, rcx
        ; A[3] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+40], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+48], r10
        mov	r12, QWORD PTR [rbx+64]
        mov	r10, QWORD PTR [rbx+72]
        ; A[3] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r11, rax
        adox	r12, rcx
        ; A[3] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+56], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+64], r12
        mov	r11, QWORD PTR [rbx+80]
        mov	r12, QWORD PTR [rbx+88]
        ; A[3] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r10, rax
        adox	r11, rcx
        ; A[3] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+72], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+80], r11
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[3] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r12, rax
        adox	r10, rcx
        ; A[3] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [rbx+88], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	r12, QWORD PTR [r8+16]
        ; A[3] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r11, rax
        adox	r12, rcx
        ; A[3] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+8], r11
        mov	r10, r14
        adcx	r12, rax
        adox	r10, rcx
        adcx	r10, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+16], r12
        mov	QWORD PTR [r8+24], r10
        mov	rdx, QWORD PTR [r9+32]
        mov	r11, QWORD PTR [rbx+32]
        mov	r12, QWORD PTR [rbx+40]
        mov	r10, QWORD PTR [rbx+48]
        ; A[4] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r11, rax
        adox	r12, rcx
        ; A[4] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+32], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+40], r12
        mov	r11, QWORD PTR [rbx+56]
        mov	r12, QWORD PTR [rbx+64]
        ; A[4] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r10, rax
        adox	r11, rcx
        ; A[4] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+48], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+56], r11
        mov	r10, QWORD PTR [rbx+72]
        mov	r11, QWORD PTR [rbx+80]
        ; A[4] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r12, rax
        adox	r10, rcx
        ; A[4] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+64], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+72], r10
        mov	r12, QWORD PTR [rbx+88]
        mov	r10, QWORD PTR [r8]
        ; A[4] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r11, rax
        adox	r12, rcx
        ; A[4] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+80], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+88], r12
        mov	r11, QWORD PTR [r8+8]
        mov	r12, QWORD PTR [r8+16]
        ; A[4] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r10, rax
        adox	r11, rcx
        ; A[4] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+8], r11
        mov	r10, QWORD PTR [r8+24]
        ; A[4] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r12, rax
        adox	r10, rcx
        ; A[4] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+16], r12
        mov	r11, r14
        adcx	r10, rax
        adox	r11, rcx
        adcx	r11, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+24], r10
        mov	QWORD PTR [r8+32], r11
        mov	rdx, QWORD PTR [r9+40]
        mov	r12, QWORD PTR [rbx+40]
        mov	r10, QWORD PTR [rbx+48]
        mov	r11, QWORD PTR [rbx+56]
        ; A[5] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r12, rax
        adox	r10, rcx
        ; A[5] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+40], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+48], r10
        mov	r12, QWORD PTR [rbx+64]
        mov	r10, QWORD PTR [rbx+72]
        ; A[5] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r11, rax
        adox	r12, rcx
        ; A[5] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+56], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+64], r12
        mov	r11, QWORD PTR [rbx+80]
        mov	r12, QWORD PTR [rbx+88]
        ; A[5] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r10, rax
        adox	r11, rcx
        ; A[5] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+72], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+80], r11
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[5] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r12, rax
        adox	r10, rcx
        ; A[5] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [rbx+88], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	r12, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [r8+24]
        ; A[5] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r11, rax
        adox	r12, rcx
        ; A[5] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+8], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+16], r12
        mov	r11, QWORD PTR [r8+32]
        ; A[5] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r10, rax
        adox	r11, rcx
        ; A[5] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+24], r10
        mov	r12, r14
        adcx	r11, rax
        adox	r12, rcx
        adcx	r12, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+32], r11
        mov	QWORD PTR [r8+40], r12
        mov	rdx, QWORD PTR [r9+48]
        mov	r10, QWORD PTR [rbx+48]
        mov	r11, QWORD PTR [rbx+56]
        mov	r12, QWORD PTR [rbx+64]
        ; A[6] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r10, rax
        adox	r11, rcx
        ; A[6] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+48], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+56], r11
        mov	r10, QWORD PTR [rbx+72]
        mov	r11, QWORD PTR [rbx+80]
        ; A[6] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r12, rax
        adox	r10, rcx
        ; A[6] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+64], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+72], r10
        mov	r12, QWORD PTR [rbx+88]
        mov	r10, QWORD PTR [r8]
        ; A[6] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r11, rax
        adox	r12, rcx
        ; A[6] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+80], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+88], r12
        mov	r11, QWORD PTR [r8+8]
        mov	r12, QWORD PTR [r8+16]
        ; A[6] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r10, rax
        adox	r11, rcx
        ; A[6] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+8], r11
        mov	r10, QWORD PTR [r8+24]
        mov	r11, QWORD PTR [r8+32]
        ; A[6] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r12, rax
        adox	r10, rcx
        ; A[6] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+16], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+24], r10
        mov	r12, QWORD PTR [r8+40]
        ; A[6] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r11, rax
        adox	r12, rcx
        ; A[6] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+32], r11
        mov	r10, r14
        adcx	r12, rax
        adox	r10, rcx
        adcx	r10, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+40], r12
        mov	QWORD PTR [r8+48], r10
        mov	rdx, QWORD PTR [r9+56]
        mov	r11, QWORD PTR [rbx+56]
        mov	r12, QWORD PTR [rbx+64]
        mov	r10, QWORD PTR [rbx+72]
        ; A[7] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r11, rax
        adox	r12, rcx
        ; A[7] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+56], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+64], r12
        mov	r11, QWORD PTR [rbx+80]
        mov	r12, QWORD PTR [rbx+88]
        ; A[7] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r10, rax
        adox	r11, rcx
        ; A[7] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+72], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+80], r11
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[7] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r12, rax
        adox	r10, rcx
        ; A[7] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [rbx+88], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	r12, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [r8+24]
        ; A[7] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r11, rax
        adox	r12, rcx
        ; A[7] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+8], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+16], r12
        mov	r11, QWORD PTR [r8+32]
        mov	r12, QWORD PTR [r8+40]
        ; A[7] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r10, rax
        adox	r11, rcx
        ; A[7] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+24], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+32], r11
        mov	r10, QWORD PTR [r8+48]
        ; A[7] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r12, rax
        adox	r10, rcx
        ; A[7] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+40], r12
        mov	r11, r14
        adcx	r10, rax
        adox	r11, rcx
        adcx	r11, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+48], r10
        mov	QWORD PTR [r8+56], r11
        mov	rdx, QWORD PTR [r9+64]
        mov	r12, QWORD PTR [rbx+64]
        mov	r10, QWORD PTR [rbx+72]
        mov	r11, QWORD PTR [rbx+80]
        ; A[8] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r12, rax
        adox	r10, rcx
        ; A[8] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+64], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbx+72], r10
        mov	r12, QWORD PTR [rbx+88]
        mov	r10, QWORD PTR [r8]
        ; A[8] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r11, rax
        adox	r12, rcx
        ; A[8] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+80], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+88], r12
        mov	r11, QWORD PTR [r8+8]
        mov	r12, QWORD PTR [r8+16]
        ; A[8] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r10, rax
        adox	r11, rcx
        ; A[8] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [r8], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+8], r11
        mov	r10, QWORD PTR [r8+24]
        mov	r11, QWORD PTR [r8+32]
        ; A[8] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r12, rax
        adox	r10, rcx
        ; A[8] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+16], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+24], r10
        mov	r12, QWORD PTR [r8+40]
        mov	r10, QWORD PTR [r8+48]
        ; A[8] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r11, rax
        adox	r12, rcx
        ; A[8] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+32], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+40], r12
        mov	r11, QWORD PTR [r8+56]
        ; A[8] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r10, rax
        adox	r11, rcx
        ; A[8] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+48], r10
        mov	r12, r14
        adcx	r11, rax
        adox	r12, rcx
        adcx	r12, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+56], r11
        mov	QWORD PTR [r8+64], r12
        mov	rdx, QWORD PTR [r9+72]
        mov	r10, QWORD PTR [rbx+72]
        mov	r11, QWORD PTR [rbx+80]
        mov	r12, QWORD PTR [rbx+88]
        ; A[9] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r10, rax
        adox	r11, rcx
        ; A[9] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+72], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [rbx+80], r11
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[9] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r12, rax
        adox	r10, rcx
        ; A[9] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [rbx+88], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	r12, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [r8+24]
        ; A[9] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r11, rax
        adox	r12, rcx
        ; A[9] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [r8+8], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+16], r12
        mov	r11, QWORD PTR [r8+32]
        mov	r12, QWORD PTR [r8+40]
        ; A[9] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r10, rax
        adox	r11, rcx
        ; A[9] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+24], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+32], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        ; A[9] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r12, rax
        adox	r10, rcx
        ; A[9] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+40], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+48], r10
        mov	r12, QWORD PTR [r8+64]
        ; A[9] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r11, rax
        adox	r12, rcx
        ; A[9] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+56], r11
        mov	r10, r14
        adcx	r12, rax
        adox	r10, rcx
        adcx	r10, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+64], r12
        mov	QWORD PTR [r8+72], r10
        mov	rdx, QWORD PTR [r9+80]
        mov	r11, QWORD PTR [rbx+80]
        mov	r12, QWORD PTR [rbx+88]
        mov	r10, QWORD PTR [r8]
        ; A[10] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r11, rax
        adox	r12, rcx
        ; A[10] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+80], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [rbx+88], r12
        mov	r11, QWORD PTR [r8+8]
        mov	r12, QWORD PTR [r8+16]
        ; A[10] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r10, rax
        adox	r11, rcx
        ; A[10] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [r8], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+8], r11
        mov	r10, QWORD PTR [r8+24]
        mov	r11, QWORD PTR [r8+32]
        ; A[10] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r12, rax
        adox	r10, rcx
        ; A[10] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [r8+16], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+24], r10
        mov	r12, QWORD PTR [r8+40]
        mov	r10, QWORD PTR [r8+48]
        ; A[10] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r11, rax
        adox	r12, rcx
        ; A[10] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+32], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+40], r12
        mov	r11, QWORD PTR [r8+56]
        mov	r12, QWORD PTR [r8+64]
        ; A[10] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r10, rax
        adox	r11, rcx
        ; A[10] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+48], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+56], r11
        mov	r10, QWORD PTR [r8+72]
        ; A[10] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r12, rax
        adox	r10, rcx
        ; A[10] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+64], r12
        mov	r11, r14
        adcx	r10, rax
        adox	r11, rcx
        adcx	r11, r13
        mov	r13, r14
        adox	r13, r14
        adcx	r13, r14
        mov	QWORD PTR [r8+72], r10
        mov	QWORD PTR [r8+80], r11
        mov	rdx, QWORD PTR [r9+88]
        mov	r12, QWORD PTR [rbx+88]
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[11] * B[0]
        mulx	rcx, rax, QWORD PTR [rbp]
        adcx	r12, rax
        adox	r10, rcx
        ; A[11] * B[1]
        mulx	rcx, rax, QWORD PTR [rbp+8]
        mov	QWORD PTR [rbx+88], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	r12, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [r8+24]
        ; A[11] * B[2]
        mulx	rcx, rax, QWORD PTR [rbp+16]
        adcx	r11, rax
        adox	r12, rcx
        ; A[11] * B[3]
        mulx	rcx, rax, QWORD PTR [rbp+24]
        mov	QWORD PTR [r8+8], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+16], r12
        mov	r11, QWORD PTR [r8+32]
        mov	r12, QWORD PTR [r8+40]
        ; A[11] * B[4]
        mulx	rcx, rax, QWORD PTR [rbp+32]
        adcx	r10, rax
        adox	r11, rcx
        ; A[11] * B[5]
        mulx	rcx, rax, QWORD PTR [rbp+40]
        mov	QWORD PTR [r8+24], r10
        adcx	r11, rax
        adox	r12, rcx
        mov	QWORD PTR [r8+32], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        ; A[11] * B[6]
        mulx	rcx, rax, QWORD PTR [rbp+48]
        adcx	r12, rax
        adox	r10, rcx
        ; A[11] * B[7]
        mulx	rcx, rax, QWORD PTR [rbp+56]
        mov	QWORD PTR [r8+40], r12
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+48], r10
        mov	r12, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [r8+72]
        ; A[11] * B[8]
        mulx	rcx, rax, QWORD PTR [rbp+64]
        adcx	r11, rax
        adox	r12, rcx
        ; A[11] * B[9]
        mulx	rcx, rax, QWORD PTR [rbp+72]
        mov	QWORD PTR [r8+56], r11
        adcx	r12, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+64], r12
        mov	r11, QWORD PTR [r8+80]
        ; A[11] * B[10]
        mulx	rcx, rax, QWORD PTR [rbp+80]
        adcx	r10, rax
        adox	r11, rcx
        ; A[11] * B[11]
        mulx	rcx, rax, QWORD PTR [rbp+88]
        mov	QWORD PTR [r8+72], r10
        mov	r12, r14
        adcx	r11, rax
        adox	r12, rcx
        adcx	r12, r13
        mov	QWORD PTR [r8+80], r11
        mov	QWORD PTR [r8+88], r12
        sub	r8, 96
        cmp	r9, r8
        je	L_start_3072_mul_avx2_12
        cmp	rbp, r8
        jne	L_end_3072_mul_avx2_12
L_start_3072_mul_avx2_12:
        vmovdqu	xmm0, OWORD PTR [rbx]
        vmovups	OWORD PTR [r8], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+16]
        vmovups	OWORD PTR [r8+16], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+32]
        vmovups	OWORD PTR [r8+32], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+48]
        vmovups	OWORD PTR [r8+48], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+64]
        vmovups	OWORD PTR [r8+64], xmm0
        vmovdqu	xmm0, OWORD PTR [rbx+80]
        vmovups	OWORD PTR [r8+80], xmm0
L_end_3072_mul_avx2_12:
        add	rsp, 96
        pop	r14
        pop	r13
        pop	r12
        pop	rbp
        pop	rbx
        ret
sp_3072_mul_avx2_12 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_add_12 PROC
        ; Add
        mov	r9, QWORD PTR [rdx]
        xor	rax, rax
        add	r9, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx+8]
        mov	QWORD PTR [rcx], r9
        adc	r10, QWORD PTR [r8+8]
        mov	r9, QWORD PTR [rdx+16]
        mov	QWORD PTR [rcx+8], r10
        adc	r9, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [rdx+24]
        mov	QWORD PTR [rcx+16], r9
        adc	r10, QWORD PTR [r8+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [rcx+24], r10
        adc	r9, QWORD PTR [r8+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [rcx+32], r9
        adc	r10, QWORD PTR [r8+40]
        mov	r9, QWORD PTR [rdx+48]
        mov	QWORD PTR [rcx+40], r10
        adc	r9, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+56]
        mov	QWORD PTR [rcx+48], r9
        adc	r10, QWORD PTR [r8+56]
        mov	r9, QWORD PTR [rdx+64]
        mov	QWORD PTR [rcx+56], r10
        adc	r9, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [rdx+72]
        mov	QWORD PTR [rcx+64], r9
        adc	r10, QWORD PTR [r8+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [rcx+72], r10
        adc	r9, QWORD PTR [r8+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [rcx+80], r9
        adc	r10, QWORD PTR [r8+88]
        mov	QWORD PTR [rcx+88], r10
        adc	rax, 0
        ret
sp_3072_add_12 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
;  *
;  * a  A single precision integer and result.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_sub_in_place_24 PROC
        mov	r8, QWORD PTR [rcx]
        sub	r8, QWORD PTR [rdx]
        mov	r9, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	r9, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], r9
        sbb	r8, QWORD PTR [rdx+16]
        mov	r9, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	r9, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], r9
        sbb	r8, QWORD PTR [rdx+32]
        mov	r9, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	r9, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], r9
        sbb	r8, QWORD PTR [rdx+48]
        mov	r9, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	r9, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], r9
        sbb	r8, QWORD PTR [rdx+64]
        mov	r9, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	r9, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], r9
        sbb	r8, QWORD PTR [rdx+80]
        mov	r9, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	r9, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], r9
        sbb	r8, QWORD PTR [rdx+96]
        mov	r9, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        sbb	r9, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], r9
        sbb	r8, QWORD PTR [rdx+112]
        mov	r9, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        sbb	r9, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rcx+120], r9
        sbb	r8, QWORD PTR [rdx+128]
        mov	r9, QWORD PTR [rcx+136]
        mov	QWORD PTR [rcx+128], r8
        sbb	r9, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rcx+136], r9
        sbb	r8, QWORD PTR [rdx+144]
        mov	r9, QWORD PTR [rcx+152]
        mov	QWORD PTR [rcx+144], r8
        sbb	r9, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rcx+152], r9
        sbb	r8, QWORD PTR [rdx+160]
        mov	r9, QWORD PTR [rcx+168]
        mov	QWORD PTR [rcx+160], r8
        sbb	r9, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rcx+168], r9
        sbb	r8, QWORD PTR [rdx+176]
        mov	r9, QWORD PTR [rcx+184]
        mov	QWORD PTR [rcx+176], r8
        sbb	r9, QWORD PTR [rdx+184]
        mov	QWORD PTR [rcx+184], r9
        sbb	rax, rax
        ret
sp_3072_sub_in_place_24 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_add_24 PROC
        ; Add
        mov	r9, QWORD PTR [rdx]
        xor	rax, rax
        add	r9, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx+8]
        mov	QWORD PTR [rcx], r9
        adc	r10, QWORD PTR [r8+8]
        mov	r9, QWORD PTR [rdx+16]
        mov	QWORD PTR [rcx+8], r10
        adc	r9, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [rdx+24]
        mov	QWORD PTR [rcx+16], r9
        adc	r10, QWORD PTR [r8+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [rcx+24], r10
        adc	r9, QWORD PTR [r8+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [rcx+32], r9
        adc	r10, QWORD PTR [r8+40]
        mov	r9, QWORD PTR [rdx+48]
        mov	QWORD PTR [rcx+40], r10
        adc	r9, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+56]
        mov	QWORD PTR [rcx+48], r9
        adc	r10, QWORD PTR [r8+56]
        mov	r9, QWORD PTR [rdx+64]
        mov	QWORD PTR [rcx+56], r10
        adc	r9, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [rdx+72]
        mov	QWORD PTR [rcx+64], r9
        adc	r10, QWORD PTR [r8+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [rcx+72], r10
        adc	r9, QWORD PTR [r8+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [rcx+80], r9
        adc	r10, QWORD PTR [r8+88]
        mov	r9, QWORD PTR [rdx+96]
        mov	QWORD PTR [rcx+88], r10
        adc	r9, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+104]
        mov	QWORD PTR [rcx+96], r9
        adc	r10, QWORD PTR [r8+104]
        mov	r9, QWORD PTR [rdx+112]
        mov	QWORD PTR [rcx+104], r10
        adc	r9, QWORD PTR [r8+112]
        mov	r10, QWORD PTR [rdx+120]
        mov	QWORD PTR [rcx+112], r9
        adc	r10, QWORD PTR [r8+120]
        mov	r9, QWORD PTR [rdx+128]
        mov	QWORD PTR [rcx+120], r10
        adc	r9, QWORD PTR [r8+128]
        mov	r10, QWORD PTR [rdx+136]
        mov	QWORD PTR [rcx+128], r9
        adc	r10, QWORD PTR [r8+136]
        mov	r9, QWORD PTR [rdx+144]
        mov	QWORD PTR [rcx+136], r10
        adc	r9, QWORD PTR [r8+144]
        mov	r10, QWORD PTR [rdx+152]
        mov	QWORD PTR [rcx+144], r9
        adc	r10, QWORD PTR [r8+152]
        mov	r9, QWORD PTR [rdx+160]
        mov	QWORD PTR [rcx+152], r10
        adc	r9, QWORD PTR [r8+160]
        mov	r10, QWORD PTR [rdx+168]
        mov	QWORD PTR [rcx+160], r9
        adc	r10, QWORD PTR [r8+168]
        mov	r9, QWORD PTR [rdx+176]
        mov	QWORD PTR [rcx+168], r10
        adc	r9, QWORD PTR [r8+176]
        mov	r10, QWORD PTR [rdx+184]
        mov	QWORD PTR [rcx+176], r9
        adc	r10, QWORD PTR [r8+184]
        mov	QWORD PTR [rcx+184], r10
        adc	rax, 0
        ret
sp_3072_add_24 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_24 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        sub	rsp, 616
        mov	QWORD PTR [rsp+576], rcx
        mov	QWORD PTR [rsp+584], rdx
        mov	QWORD PTR [rsp+592], r8
        lea	r12, QWORD PTR [rsp+384]
        lea	r14, QWORD PTR [rdx+96]
        ; Add
        mov	rax, QWORD PTR [rdx]
        xor	r15, r15
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [rdx+8]
        mov	QWORD PTR [r12], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [rdx+16]
        mov	QWORD PTR [r12+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [rdx+24]
        mov	QWORD PTR [r12+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [r12+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [r12+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r12+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [rdx+56]
        mov	QWORD PTR [r12+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [rdx+64]
        mov	QWORD PTR [r12+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [rdx+72]
        mov	QWORD PTR [r12+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [r12+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [r12+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	QWORD PTR [r12+88], r10
        adc	r15, 0
        mov	QWORD PTR [rsp+600], r15
        lea	r13, QWORD PTR [rsp+480]
        lea	r14, QWORD PTR [r8+96]
        ; Add
        mov	rax, QWORD PTR [r8]
        xor	rdi, rdi
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [r8+8]
        mov	QWORD PTR [r13], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [r8+16]
        mov	QWORD PTR [r13+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [r8+24]
        mov	QWORD PTR [r13+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [r8+32]
        mov	QWORD PTR [r13+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [r8+40]
        mov	QWORD PTR [r13+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [r8+48]
        mov	QWORD PTR [r13+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [r8+56]
        mov	QWORD PTR [r13+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [r8+64]
        mov	QWORD PTR [r13+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [r8+72]
        mov	QWORD PTR [r13+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [r8+80]
        mov	QWORD PTR [r13+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [r8+88]
        mov	QWORD PTR [r13+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	QWORD PTR [r13+88], r10
        adc	rdi, 0
        mov	QWORD PTR [rsp+608], rdi
        mov	r8, r13
        mov	rdx, r12
        mov	rcx, rsp
        call	sp_3072_mul_12
        mov	r8, QWORD PTR [rsp+592]
        mov	rdx, QWORD PTR [rsp+584]
        lea	rcx, QWORD PTR [rsp+192]
        add	r8, 96
        add	rdx, 96
        call	sp_3072_mul_12
        mov	r8, QWORD PTR [rsp+592]
        mov	rdx, QWORD PTR [rsp+584]
        mov	rcx, QWORD PTR [rsp+576]
        call	sp_3072_mul_12
IFDEF _WIN64
        mov	r8, QWORD PTR [rsp+592]
        mov	rdx, QWORD PTR [rsp+584]
        mov	rcx, QWORD PTR [rsp+576]
ENDIF
        mov	r15, QWORD PTR [rsp+600]
        mov	rdi, QWORD PTR [rsp+608]
        mov	rsi, QWORD PTR [rsp+576]
        mov	r11, r15
        lea	r12, QWORD PTR [rsp+384]
        lea	r13, QWORD PTR [rsp+480]
        and	r11, rdi
        neg	r15
        neg	rdi
        add	rsi, 192
        mov	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [r13]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12], rax
        mov	QWORD PTR [r13], r9
        mov	rax, QWORD PTR [r12+8]
        mov	r9, QWORD PTR [r13+8]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+8], rax
        mov	QWORD PTR [r13+8], r9
        mov	rax, QWORD PTR [r12+16]
        mov	r9, QWORD PTR [r13+16]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+16], rax
        mov	QWORD PTR [r13+16], r9
        mov	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [r13+24]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+24], rax
        mov	QWORD PTR [r13+24], r9
        mov	rax, QWORD PTR [r12+32]
        mov	r9, QWORD PTR [r13+32]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+32], rax
        mov	QWORD PTR [r13+32], r9
        mov	rax, QWORD PTR [r12+40]
        mov	r9, QWORD PTR [r13+40]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+40], rax
        mov	QWORD PTR [r13+40], r9
        mov	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [r13+48]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+48], rax
        mov	QWORD PTR [r13+48], r9
        mov	rax, QWORD PTR [r12+56]
        mov	r9, QWORD PTR [r13+56]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+56], rax
        mov	QWORD PTR [r13+56], r9
        mov	rax, QWORD PTR [r12+64]
        mov	r9, QWORD PTR [r13+64]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+64], rax
        mov	QWORD PTR [r13+64], r9
        mov	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [r13+72]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+72], rax
        mov	QWORD PTR [r13+72], r9
        mov	rax, QWORD PTR [r12+80]
        mov	r9, QWORD PTR [r13+80]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+80], rax
        mov	QWORD PTR [r13+80], r9
        mov	rax, QWORD PTR [r12+88]
        mov	r9, QWORD PTR [r13+88]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+88], rax
        mov	QWORD PTR [r13+88], r9
        mov	rax, QWORD PTR [r12]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	QWORD PTR [rsi+88], r10
        adc	r11, 0
        lea	r13, QWORD PTR [rsp+192]
        mov	r12, rsp
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [r13+184]
        mov	QWORD PTR [r12+184], r10
        sbb	r11, 0
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [rcx]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [rcx+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [rcx+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [rcx+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [rcx+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [rcx+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [rcx+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [rcx+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [rcx+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [rcx+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [rcx+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [rcx+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [rcx+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [rcx+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [rcx+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [rcx+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [rcx+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [rcx+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [rcx+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [rcx+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [rcx+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [rcx+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [rcx+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [rcx+184]
        mov	QWORD PTR [r12+184], r10
        sbb	r11, 0
        sub	rsi, 96
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r12+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r12+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r12+128]
        mov	r10, QWORD PTR [rsi+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r12+136]
        mov	rax, QWORD PTR [rsi+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r12+144]
        mov	r9, QWORD PTR [rsi+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r12+152]
        mov	r10, QWORD PTR [rsi+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r12+160]
        mov	rax, QWORD PTR [rsi+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r12+168]
        mov	r9, QWORD PTR [rsi+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r12+176]
        mov	r10, QWORD PTR [rsi+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [rsi+184], r10
        adc	r11, 0
        mov	QWORD PTR [rcx+288], r11
        add	rsi, 96
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r13+96]
        mov	QWORD PTR [rsi+96], rax
        ; Add to zero
        mov	rax, QWORD PTR [r13+104]
        adc	rax, 0
        mov	r9, QWORD PTR [r13+112]
        mov	QWORD PTR [rsi+104], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+120]
        mov	QWORD PTR [rsi+112], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+128]
        mov	QWORD PTR [rsi+120], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+136]
        mov	QWORD PTR [rsi+128], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+144]
        mov	QWORD PTR [rsi+136], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+152]
        mov	QWORD PTR [rsi+144], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+160]
        mov	QWORD PTR [rsi+152], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+168]
        mov	QWORD PTR [rsi+160], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+176]
        mov	QWORD PTR [rsi+168], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+184]
        mov	QWORD PTR [rsi+176], rax
        adc	r9, 0
        mov	QWORD PTR [rsi+184], r9
        add	rsp, 616
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_3072_mul_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_24 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        sub	rsp, 616
        mov	QWORD PTR [rsp+576], rcx
        mov	QWORD PTR [rsp+584], rdx
        mov	QWORD PTR [rsp+592], r8
        lea	r12, QWORD PTR [rsp+384]
        lea	r14, QWORD PTR [rdx+96]
        ; Add
        mov	rax, QWORD PTR [rdx]
        xor	r15, r15
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [rdx+8]
        mov	QWORD PTR [r12], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [rdx+16]
        mov	QWORD PTR [r12+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [rdx+24]
        mov	QWORD PTR [r12+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [r12+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [r12+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r12+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [rdx+56]
        mov	QWORD PTR [r12+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [rdx+64]
        mov	QWORD PTR [r12+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [rdx+72]
        mov	QWORD PTR [r12+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [r12+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [r12+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	QWORD PTR [r12+88], r10
        adc	r15, 0
        mov	QWORD PTR [rsp+600], r15
        lea	r13, QWORD PTR [rsp+480]
        lea	r14, QWORD PTR [r8+96]
        ; Add
        mov	rax, QWORD PTR [r8]
        xor	rdi, rdi
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [r8+8]
        mov	QWORD PTR [r13], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [r8+16]
        mov	QWORD PTR [r13+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [r8+24]
        mov	QWORD PTR [r13+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [r8+32]
        mov	QWORD PTR [r13+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [r8+40]
        mov	QWORD PTR [r13+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [r8+48]
        mov	QWORD PTR [r13+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [r8+56]
        mov	QWORD PTR [r13+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [r8+64]
        mov	QWORD PTR [r13+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [r8+72]
        mov	QWORD PTR [r13+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [r8+80]
        mov	QWORD PTR [r13+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [r8+88]
        mov	QWORD PTR [r13+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	QWORD PTR [r13+88], r10
        adc	rdi, 0
        mov	QWORD PTR [rsp+608], rdi
        mov	r8, r13
        mov	rdx, r12
        mov	rcx, rsp
        call	sp_3072_mul_avx2_12
        mov	r8, QWORD PTR [rsp+592]
        mov	rdx, QWORD PTR [rsp+584]
        lea	rcx, QWORD PTR [rsp+192]
        add	r8, 96
        add	rdx, 96
        call	sp_3072_mul_avx2_12
        mov	r8, QWORD PTR [rsp+592]
        mov	rdx, QWORD PTR [rsp+584]
        mov	rcx, QWORD PTR [rsp+576]
        call	sp_3072_mul_avx2_12
IFDEF _WIN64
        mov	r8, QWORD PTR [rsp+592]
        mov	rdx, QWORD PTR [rsp+584]
        mov	rcx, QWORD PTR [rsp+576]
ENDIF
        mov	r15, QWORD PTR [rsp+600]
        mov	rdi, QWORD PTR [rsp+608]
        mov	rsi, QWORD PTR [rsp+576]
        mov	r11, r15
        lea	r12, QWORD PTR [rsp+384]
        lea	r13, QWORD PTR [rsp+480]
        and	r11, rdi
        neg	r15
        neg	rdi
        add	rsi, 192
        mov	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [r13]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        add	rax, r9
        mov	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [r13+8]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [r13+16]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+8], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [r13+24]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+16], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [r13+32]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+24], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [r13+40]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+32], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [r13+48]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+40], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [r13+56]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+48], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [r13+64]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+56], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [r13+72]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+64], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [r13+80]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+72], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [r13+88]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+80], r9
        adc	r10, rax
        mov	QWORD PTR [rsi+88], r10
        adc	r11, 0
        lea	r13, QWORD PTR [rsp+192]
        mov	r12, rsp
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [r13+184]
        mov	QWORD PTR [r12+184], r10
        sbb	r11, 0
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [rcx]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [rcx+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [rcx+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [rcx+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [rcx+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [rcx+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [rcx+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [rcx+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [rcx+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [rcx+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [rcx+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [rcx+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [rcx+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [rcx+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [rcx+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [rcx+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [rcx+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [rcx+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [rcx+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [rcx+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [rcx+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [rcx+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [rcx+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [rcx+184]
        mov	QWORD PTR [r12+184], r10
        sbb	r11, 0
        sub	rsi, 96
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r12+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r12+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r12+128]
        mov	r10, QWORD PTR [rsi+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r12+136]
        mov	rax, QWORD PTR [rsi+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r12+144]
        mov	r9, QWORD PTR [rsi+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r12+152]
        mov	r10, QWORD PTR [rsi+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r12+160]
        mov	rax, QWORD PTR [rsi+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r12+168]
        mov	r9, QWORD PTR [rsi+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r12+176]
        mov	r10, QWORD PTR [rsi+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [rsi+184], r10
        adc	r11, 0
        mov	QWORD PTR [rcx+288], r11
        add	rsi, 96
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r13+96]
        mov	QWORD PTR [rsi+96], rax
        ; Add to zero
        mov	rax, QWORD PTR [r13+104]
        adc	rax, 0
        mov	r9, QWORD PTR [r13+112]
        mov	QWORD PTR [rsi+104], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+120]
        mov	QWORD PTR [rsi+112], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+128]
        mov	QWORD PTR [rsi+120], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+136]
        mov	QWORD PTR [rsi+128], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+144]
        mov	QWORD PTR [rsi+136], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+152]
        mov	QWORD PTR [rsi+144], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+160]
        mov	QWORD PTR [rsi+152], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+168]
        mov	QWORD PTR [rsi+160], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+176]
        mov	QWORD PTR [rsi+168], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+184]
        mov	QWORD PTR [rsi+176], rax
        adc	r9, 0
        mov	QWORD PTR [rsi+184], r9
        add	rsp, 616
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_3072_mul_avx2_24 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
;  *
;  * a  A single precision integer and result.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_sub_in_place_48 PROC
        mov	r8, QWORD PTR [rcx]
        sub	r8, QWORD PTR [rdx]
        mov	r9, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	r9, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], r9
        sbb	r8, QWORD PTR [rdx+16]
        mov	r9, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	r9, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], r9
        sbb	r8, QWORD PTR [rdx+32]
        mov	r9, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	r9, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], r9
        sbb	r8, QWORD PTR [rdx+48]
        mov	r9, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	r9, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], r9
        sbb	r8, QWORD PTR [rdx+64]
        mov	r9, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	r9, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], r9
        sbb	r8, QWORD PTR [rdx+80]
        mov	r9, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	r9, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], r9
        sbb	r8, QWORD PTR [rdx+96]
        mov	r9, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        sbb	r9, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], r9
        sbb	r8, QWORD PTR [rdx+112]
        mov	r9, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        sbb	r9, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rcx+120], r9
        sbb	r8, QWORD PTR [rdx+128]
        mov	r9, QWORD PTR [rcx+136]
        mov	QWORD PTR [rcx+128], r8
        sbb	r9, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rcx+136], r9
        sbb	r8, QWORD PTR [rdx+144]
        mov	r9, QWORD PTR [rcx+152]
        mov	QWORD PTR [rcx+144], r8
        sbb	r9, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rcx+152], r9
        sbb	r8, QWORD PTR [rdx+160]
        mov	r9, QWORD PTR [rcx+168]
        mov	QWORD PTR [rcx+160], r8
        sbb	r9, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rcx+168], r9
        sbb	r8, QWORD PTR [rdx+176]
        mov	r9, QWORD PTR [rcx+184]
        mov	QWORD PTR [rcx+176], r8
        sbb	r9, QWORD PTR [rdx+184]
        mov	r8, QWORD PTR [rcx+192]
        mov	QWORD PTR [rcx+184], r9
        sbb	r8, QWORD PTR [rdx+192]
        mov	r9, QWORD PTR [rcx+200]
        mov	QWORD PTR [rcx+192], r8
        sbb	r9, QWORD PTR [rdx+200]
        mov	r8, QWORD PTR [rcx+208]
        mov	QWORD PTR [rcx+200], r9
        sbb	r8, QWORD PTR [rdx+208]
        mov	r9, QWORD PTR [rcx+216]
        mov	QWORD PTR [rcx+208], r8
        sbb	r9, QWORD PTR [rdx+216]
        mov	r8, QWORD PTR [rcx+224]
        mov	QWORD PTR [rcx+216], r9
        sbb	r8, QWORD PTR [rdx+224]
        mov	r9, QWORD PTR [rcx+232]
        mov	QWORD PTR [rcx+224], r8
        sbb	r9, QWORD PTR [rdx+232]
        mov	r8, QWORD PTR [rcx+240]
        mov	QWORD PTR [rcx+232], r9
        sbb	r8, QWORD PTR [rdx+240]
        mov	r9, QWORD PTR [rcx+248]
        mov	QWORD PTR [rcx+240], r8
        sbb	r9, QWORD PTR [rdx+248]
        mov	r8, QWORD PTR [rcx+256]
        mov	QWORD PTR [rcx+248], r9
        sbb	r8, QWORD PTR [rdx+256]
        mov	r9, QWORD PTR [rcx+264]
        mov	QWORD PTR [rcx+256], r8
        sbb	r9, QWORD PTR [rdx+264]
        mov	r8, QWORD PTR [rcx+272]
        mov	QWORD PTR [rcx+264], r9
        sbb	r8, QWORD PTR [rdx+272]
        mov	r9, QWORD PTR [rcx+280]
        mov	QWORD PTR [rcx+272], r8
        sbb	r9, QWORD PTR [rdx+280]
        mov	r8, QWORD PTR [rcx+288]
        mov	QWORD PTR [rcx+280], r9
        sbb	r8, QWORD PTR [rdx+288]
        mov	r9, QWORD PTR [rcx+296]
        mov	QWORD PTR [rcx+288], r8
        sbb	r9, QWORD PTR [rdx+296]
        mov	r8, QWORD PTR [rcx+304]
        mov	QWORD PTR [rcx+296], r9
        sbb	r8, QWORD PTR [rdx+304]
        mov	r9, QWORD PTR [rcx+312]
        mov	QWORD PTR [rcx+304], r8
        sbb	r9, QWORD PTR [rdx+312]
        mov	r8, QWORD PTR [rcx+320]
        mov	QWORD PTR [rcx+312], r9
        sbb	r8, QWORD PTR [rdx+320]
        mov	r9, QWORD PTR [rcx+328]
        mov	QWORD PTR [rcx+320], r8
        sbb	r9, QWORD PTR [rdx+328]
        mov	r8, QWORD PTR [rcx+336]
        mov	QWORD PTR [rcx+328], r9
        sbb	r8, QWORD PTR [rdx+336]
        mov	r9, QWORD PTR [rcx+344]
        mov	QWORD PTR [rcx+336], r8
        sbb	r9, QWORD PTR [rdx+344]
        mov	r8, QWORD PTR [rcx+352]
        mov	QWORD PTR [rcx+344], r9
        sbb	r8, QWORD PTR [rdx+352]
        mov	r9, QWORD PTR [rcx+360]
        mov	QWORD PTR [rcx+352], r8
        sbb	r9, QWORD PTR [rdx+360]
        mov	r8, QWORD PTR [rcx+368]
        mov	QWORD PTR [rcx+360], r9
        sbb	r8, QWORD PTR [rdx+368]
        mov	r9, QWORD PTR [rcx+376]
        mov	QWORD PTR [rcx+368], r8
        sbb	r9, QWORD PTR [rdx+376]
        mov	QWORD PTR [rcx+376], r9
        sbb	rax, rax
        ret
sp_3072_sub_in_place_48 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_add_48 PROC
        ; Add
        mov	r9, QWORD PTR [rdx]
        xor	rax, rax
        add	r9, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx+8]
        mov	QWORD PTR [rcx], r9
        adc	r10, QWORD PTR [r8+8]
        mov	r9, QWORD PTR [rdx+16]
        mov	QWORD PTR [rcx+8], r10
        adc	r9, QWORD PTR [r8+16]
        mov	r10, QWORD PTR [rdx+24]
        mov	QWORD PTR [rcx+16], r9
        adc	r10, QWORD PTR [r8+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [rcx+24], r10
        adc	r9, QWORD PTR [r8+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [rcx+32], r9
        adc	r10, QWORD PTR [r8+40]
        mov	r9, QWORD PTR [rdx+48]
        mov	QWORD PTR [rcx+40], r10
        adc	r9, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+56]
        mov	QWORD PTR [rcx+48], r9
        adc	r10, QWORD PTR [r8+56]
        mov	r9, QWORD PTR [rdx+64]
        mov	QWORD PTR [rcx+56], r10
        adc	r9, QWORD PTR [r8+64]
        mov	r10, QWORD PTR [rdx+72]
        mov	QWORD PTR [rcx+64], r9
        adc	r10, QWORD PTR [r8+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [rcx+72], r10
        adc	r9, QWORD PTR [r8+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [rcx+80], r9
        adc	r10, QWORD PTR [r8+88]
        mov	r9, QWORD PTR [rdx+96]
        mov	QWORD PTR [rcx+88], r10
        adc	r9, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+104]
        mov	QWORD PTR [rcx+96], r9
        adc	r10, QWORD PTR [r8+104]
        mov	r9, QWORD PTR [rdx+112]
        mov	QWORD PTR [rcx+104], r10
        adc	r9, QWORD PTR [r8+112]
        mov	r10, QWORD PTR [rdx+120]
        mov	QWORD PTR [rcx+112], r9
        adc	r10, QWORD PTR [r8+120]
        mov	r9, QWORD PTR [rdx+128]
        mov	QWORD PTR [rcx+120], r10
        adc	r9, QWORD PTR [r8+128]
        mov	r10, QWORD PTR [rdx+136]
        mov	QWORD PTR [rcx+128], r9
        adc	r10, QWORD PTR [r8+136]
        mov	r9, QWORD PTR [rdx+144]
        mov	QWORD PTR [rcx+136], r10
        adc	r9, QWORD PTR [r8+144]
        mov	r10, QWORD PTR [rdx+152]
        mov	QWORD PTR [rcx+144], r9
        adc	r10, QWORD PTR [r8+152]
        mov	r9, QWORD PTR [rdx+160]
        mov	QWORD PTR [rcx+152], r10
        adc	r9, QWORD PTR [r8+160]
        mov	r10, QWORD PTR [rdx+168]
        mov	QWORD PTR [rcx+160], r9
        adc	r10, QWORD PTR [r8+168]
        mov	r9, QWORD PTR [rdx+176]
        mov	QWORD PTR [rcx+168], r10
        adc	r9, QWORD PTR [r8+176]
        mov	r10, QWORD PTR [rdx+184]
        mov	QWORD PTR [rcx+176], r9
        adc	r10, QWORD PTR [r8+184]
        mov	r9, QWORD PTR [rdx+192]
        mov	QWORD PTR [rcx+184], r10
        adc	r9, QWORD PTR [r8+192]
        mov	r10, QWORD PTR [rdx+200]
        mov	QWORD PTR [rcx+192], r9
        adc	r10, QWORD PTR [r8+200]
        mov	r9, QWORD PTR [rdx+208]
        mov	QWORD PTR [rcx+200], r10
        adc	r9, QWORD PTR [r8+208]
        mov	r10, QWORD PTR [rdx+216]
        mov	QWORD PTR [rcx+208], r9
        adc	r10, QWORD PTR [r8+216]
        mov	r9, QWORD PTR [rdx+224]
        mov	QWORD PTR [rcx+216], r10
        adc	r9, QWORD PTR [r8+224]
        mov	r10, QWORD PTR [rdx+232]
        mov	QWORD PTR [rcx+224], r9
        adc	r10, QWORD PTR [r8+232]
        mov	r9, QWORD PTR [rdx+240]
        mov	QWORD PTR [rcx+232], r10
        adc	r9, QWORD PTR [r8+240]
        mov	r10, QWORD PTR [rdx+248]
        mov	QWORD PTR [rcx+240], r9
        adc	r10, QWORD PTR [r8+248]
        mov	r9, QWORD PTR [rdx+256]
        mov	QWORD PTR [rcx+248], r10
        adc	r9, QWORD PTR [r8+256]
        mov	r10, QWORD PTR [rdx+264]
        mov	QWORD PTR [rcx+256], r9
        adc	r10, QWORD PTR [r8+264]
        mov	r9, QWORD PTR [rdx+272]
        mov	QWORD PTR [rcx+264], r10
        adc	r9, QWORD PTR [r8+272]
        mov	r10, QWORD PTR [rdx+280]
        mov	QWORD PTR [rcx+272], r9
        adc	r10, QWORD PTR [r8+280]
        mov	r9, QWORD PTR [rdx+288]
        mov	QWORD PTR [rcx+280], r10
        adc	r9, QWORD PTR [r8+288]
        mov	r10, QWORD PTR [rdx+296]
        mov	QWORD PTR [rcx+288], r9
        adc	r10, QWORD PTR [r8+296]
        mov	r9, QWORD PTR [rdx+304]
        mov	QWORD PTR [rcx+296], r10
        adc	r9, QWORD PTR [r8+304]
        mov	r10, QWORD PTR [rdx+312]
        mov	QWORD PTR [rcx+304], r9
        adc	r10, QWORD PTR [r8+312]
        mov	r9, QWORD PTR [rdx+320]
        mov	QWORD PTR [rcx+312], r10
        adc	r9, QWORD PTR [r8+320]
        mov	r10, QWORD PTR [rdx+328]
        mov	QWORD PTR [rcx+320], r9
        adc	r10, QWORD PTR [r8+328]
        mov	r9, QWORD PTR [rdx+336]
        mov	QWORD PTR [rcx+328], r10
        adc	r9, QWORD PTR [r8+336]
        mov	r10, QWORD PTR [rdx+344]
        mov	QWORD PTR [rcx+336], r9
        adc	r10, QWORD PTR [r8+344]
        mov	r9, QWORD PTR [rdx+352]
        mov	QWORD PTR [rcx+344], r10
        adc	r9, QWORD PTR [r8+352]
        mov	r10, QWORD PTR [rdx+360]
        mov	QWORD PTR [rcx+352], r9
        adc	r10, QWORD PTR [r8+360]
        mov	r9, QWORD PTR [rdx+368]
        mov	QWORD PTR [rcx+360], r10
        adc	r9, QWORD PTR [r8+368]
        mov	r10, QWORD PTR [rdx+376]
        mov	QWORD PTR [rcx+368], r9
        adc	r10, QWORD PTR [r8+376]
        mov	QWORD PTR [rcx+376], r10
        adc	rax, 0
        ret
sp_3072_add_48 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_48 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        sub	rsp, 1192
        mov	QWORD PTR [rsp+1152], rcx
        mov	QWORD PTR [rsp+1160], rdx
        mov	QWORD PTR [rsp+1168], r8
        lea	r12, QWORD PTR [rsp+768]
        lea	r14, QWORD PTR [rdx+192]
        ; Add
        mov	rax, QWORD PTR [rdx]
        xor	r15, r15
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [rdx+8]
        mov	QWORD PTR [r12], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [rdx+16]
        mov	QWORD PTR [r12+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [rdx+24]
        mov	QWORD PTR [r12+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [r12+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [r12+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r12+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [rdx+56]
        mov	QWORD PTR [r12+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [rdx+64]
        mov	QWORD PTR [r12+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [rdx+72]
        mov	QWORD PTR [r12+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [r12+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [r12+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	rax, QWORD PTR [rdx+96]
        mov	QWORD PTR [r12+88], r10
        adc	rax, QWORD PTR [r14+96]
        mov	r9, QWORD PTR [rdx+104]
        mov	QWORD PTR [r12+96], rax
        adc	r9, QWORD PTR [r14+104]
        mov	r10, QWORD PTR [rdx+112]
        mov	QWORD PTR [r12+104], r9
        adc	r10, QWORD PTR [r14+112]
        mov	rax, QWORD PTR [rdx+120]
        mov	QWORD PTR [r12+112], r10
        adc	rax, QWORD PTR [r14+120]
        mov	r9, QWORD PTR [rdx+128]
        mov	QWORD PTR [r12+120], rax
        adc	r9, QWORD PTR [r14+128]
        mov	r10, QWORD PTR [rdx+136]
        mov	QWORD PTR [r12+128], r9
        adc	r10, QWORD PTR [r14+136]
        mov	rax, QWORD PTR [rdx+144]
        mov	QWORD PTR [r12+136], r10
        adc	rax, QWORD PTR [r14+144]
        mov	r9, QWORD PTR [rdx+152]
        mov	QWORD PTR [r12+144], rax
        adc	r9, QWORD PTR [r14+152]
        mov	r10, QWORD PTR [rdx+160]
        mov	QWORD PTR [r12+152], r9
        adc	r10, QWORD PTR [r14+160]
        mov	rax, QWORD PTR [rdx+168]
        mov	QWORD PTR [r12+160], r10
        adc	rax, QWORD PTR [r14+168]
        mov	r9, QWORD PTR [rdx+176]
        mov	QWORD PTR [r12+168], rax
        adc	r9, QWORD PTR [r14+176]
        mov	r10, QWORD PTR [rdx+184]
        mov	QWORD PTR [r12+176], r9
        adc	r10, QWORD PTR [r14+184]
        mov	QWORD PTR [r12+184], r10
        adc	r15, 0
        mov	QWORD PTR [rsp+1176], r15
        lea	r13, QWORD PTR [rsp+960]
        lea	r14, QWORD PTR [r8+192]
        ; Add
        mov	rax, QWORD PTR [r8]
        xor	rdi, rdi
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [r8+8]
        mov	QWORD PTR [r13], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [r8+16]
        mov	QWORD PTR [r13+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [r8+24]
        mov	QWORD PTR [r13+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [r8+32]
        mov	QWORD PTR [r13+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [r8+40]
        mov	QWORD PTR [r13+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [r8+48]
        mov	QWORD PTR [r13+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [r8+56]
        mov	QWORD PTR [r13+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [r8+64]
        mov	QWORD PTR [r13+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [r8+72]
        mov	QWORD PTR [r13+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [r8+80]
        mov	QWORD PTR [r13+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [r8+88]
        mov	QWORD PTR [r13+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	rax, QWORD PTR [r8+96]
        mov	QWORD PTR [r13+88], r10
        adc	rax, QWORD PTR [r14+96]
        mov	r9, QWORD PTR [r8+104]
        mov	QWORD PTR [r13+96], rax
        adc	r9, QWORD PTR [r14+104]
        mov	r10, QWORD PTR [r8+112]
        mov	QWORD PTR [r13+104], r9
        adc	r10, QWORD PTR [r14+112]
        mov	rax, QWORD PTR [r8+120]
        mov	QWORD PTR [r13+112], r10
        adc	rax, QWORD PTR [r14+120]
        mov	r9, QWORD PTR [r8+128]
        mov	QWORD PTR [r13+120], rax
        adc	r9, QWORD PTR [r14+128]
        mov	r10, QWORD PTR [r8+136]
        mov	QWORD PTR [r13+128], r9
        adc	r10, QWORD PTR [r14+136]
        mov	rax, QWORD PTR [r8+144]
        mov	QWORD PTR [r13+136], r10
        adc	rax, QWORD PTR [r14+144]
        mov	r9, QWORD PTR [r8+152]
        mov	QWORD PTR [r13+144], rax
        adc	r9, QWORD PTR [r14+152]
        mov	r10, QWORD PTR [r8+160]
        mov	QWORD PTR [r13+152], r9
        adc	r10, QWORD PTR [r14+160]
        mov	rax, QWORD PTR [r8+168]
        mov	QWORD PTR [r13+160], r10
        adc	rax, QWORD PTR [r14+168]
        mov	r9, QWORD PTR [r8+176]
        mov	QWORD PTR [r13+168], rax
        adc	r9, QWORD PTR [r14+176]
        mov	r10, QWORD PTR [r8+184]
        mov	QWORD PTR [r13+176], r9
        adc	r10, QWORD PTR [r14+184]
        mov	QWORD PTR [r13+184], r10
        adc	rdi, 0
        mov	QWORD PTR [rsp+1184], rdi
        mov	r8, r13
        mov	rdx, r12
        mov	rcx, rsp
        call	sp_3072_mul_24
        mov	r8, QWORD PTR [rsp+1168]
        mov	rdx, QWORD PTR [rsp+1160]
        lea	rcx, QWORD PTR [rsp+384]
        add	r8, 192
        add	rdx, 192
        call	sp_3072_mul_24
        mov	r8, QWORD PTR [rsp+1168]
        mov	rdx, QWORD PTR [rsp+1160]
        mov	rcx, QWORD PTR [rsp+1152]
        call	sp_3072_mul_24
IFDEF _WIN64
        mov	r8, QWORD PTR [rsp+1168]
        mov	rdx, QWORD PTR [rsp+1160]
        mov	rcx, QWORD PTR [rsp+1152]
ENDIF
        mov	r15, QWORD PTR [rsp+1176]
        mov	rdi, QWORD PTR [rsp+1184]
        mov	rsi, QWORD PTR [rsp+1152]
        mov	r11, r15
        lea	r12, QWORD PTR [rsp+768]
        lea	r13, QWORD PTR [rsp+960]
        and	r11, rdi
        neg	r15
        neg	rdi
        add	rsi, 384
        mov	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [r13]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12], rax
        mov	QWORD PTR [r13], r9
        mov	rax, QWORD PTR [r12+8]
        mov	r9, QWORD PTR [r13+8]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+8], rax
        mov	QWORD PTR [r13+8], r9
        mov	rax, QWORD PTR [r12+16]
        mov	r9, QWORD PTR [r13+16]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+16], rax
        mov	QWORD PTR [r13+16], r9
        mov	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [r13+24]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+24], rax
        mov	QWORD PTR [r13+24], r9
        mov	rax, QWORD PTR [r12+32]
        mov	r9, QWORD PTR [r13+32]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+32], rax
        mov	QWORD PTR [r13+32], r9
        mov	rax, QWORD PTR [r12+40]
        mov	r9, QWORD PTR [r13+40]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+40], rax
        mov	QWORD PTR [r13+40], r9
        mov	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [r13+48]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+48], rax
        mov	QWORD PTR [r13+48], r9
        mov	rax, QWORD PTR [r12+56]
        mov	r9, QWORD PTR [r13+56]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+56], rax
        mov	QWORD PTR [r13+56], r9
        mov	rax, QWORD PTR [r12+64]
        mov	r9, QWORD PTR [r13+64]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+64], rax
        mov	QWORD PTR [r13+64], r9
        mov	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [r13+72]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+72], rax
        mov	QWORD PTR [r13+72], r9
        mov	rax, QWORD PTR [r12+80]
        mov	r9, QWORD PTR [r13+80]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+80], rax
        mov	QWORD PTR [r13+80], r9
        mov	rax, QWORD PTR [r12+88]
        mov	r9, QWORD PTR [r13+88]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+88], rax
        mov	QWORD PTR [r13+88], r9
        mov	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [r13+96]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+96], rax
        mov	QWORD PTR [r13+96], r9
        mov	rax, QWORD PTR [r12+104]
        mov	r9, QWORD PTR [r13+104]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+104], rax
        mov	QWORD PTR [r13+104], r9
        mov	rax, QWORD PTR [r12+112]
        mov	r9, QWORD PTR [r13+112]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+112], rax
        mov	QWORD PTR [r13+112], r9
        mov	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [r13+120]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+120], rax
        mov	QWORD PTR [r13+120], r9
        mov	rax, QWORD PTR [r12+128]
        mov	r9, QWORD PTR [r13+128]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+128], rax
        mov	QWORD PTR [r13+128], r9
        mov	rax, QWORD PTR [r12+136]
        mov	r9, QWORD PTR [r13+136]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+136], rax
        mov	QWORD PTR [r13+136], r9
        mov	rax, QWORD PTR [r12+144]
        mov	r9, QWORD PTR [r13+144]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+144], rax
        mov	QWORD PTR [r13+144], r9
        mov	rax, QWORD PTR [r12+152]
        mov	r9, QWORD PTR [r13+152]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+152], rax
        mov	QWORD PTR [r13+152], r9
        mov	rax, QWORD PTR [r12+160]
        mov	r9, QWORD PTR [r13+160]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+160], rax
        mov	QWORD PTR [r13+160], r9
        mov	rax, QWORD PTR [r12+168]
        mov	r9, QWORD PTR [r13+168]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+168], rax
        mov	QWORD PTR [r13+168], r9
        mov	rax, QWORD PTR [r12+176]
        mov	r9, QWORD PTR [r13+176]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+176], rax
        mov	QWORD PTR [r13+176], r9
        mov	rax, QWORD PTR [r12+184]
        mov	r9, QWORD PTR [r13+184]
        and	rax, rdi
        and	r9, r15
        mov	QWORD PTR [r12+184], rax
        mov	QWORD PTR [r13+184], r9
        mov	rax, QWORD PTR [r12]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r13+184]
        mov	QWORD PTR [rsi+184], r10
        adc	r11, 0
        lea	r13, QWORD PTR [rsp+384]
        mov	r12, rsp
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [r13+184]
        mov	rax, QWORD PTR [r12+192]
        mov	QWORD PTR [r12+184], r10
        sbb	rax, QWORD PTR [r13+192]
        mov	r9, QWORD PTR [r12+200]
        mov	QWORD PTR [r12+192], rax
        sbb	r9, QWORD PTR [r13+200]
        mov	r10, QWORD PTR [r12+208]
        mov	QWORD PTR [r12+200], r9
        sbb	r10, QWORD PTR [r13+208]
        mov	rax, QWORD PTR [r12+216]
        mov	QWORD PTR [r12+208], r10
        sbb	rax, QWORD PTR [r13+216]
        mov	r9, QWORD PTR [r12+224]
        mov	QWORD PTR [r12+216], rax
        sbb	r9, QWORD PTR [r13+224]
        mov	r10, QWORD PTR [r12+232]
        mov	QWORD PTR [r12+224], r9
        sbb	r10, QWORD PTR [r13+232]
        mov	rax, QWORD PTR [r12+240]
        mov	QWORD PTR [r12+232], r10
        sbb	rax, QWORD PTR [r13+240]
        mov	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [r12+240], rax
        sbb	r9, QWORD PTR [r13+248]
        mov	r10, QWORD PTR [r12+256]
        mov	QWORD PTR [r12+248], r9
        sbb	r10, QWORD PTR [r13+256]
        mov	rax, QWORD PTR [r12+264]
        mov	QWORD PTR [r12+256], r10
        sbb	rax, QWORD PTR [r13+264]
        mov	r9, QWORD PTR [r12+272]
        mov	QWORD PTR [r12+264], rax
        sbb	r9, QWORD PTR [r13+272]
        mov	r10, QWORD PTR [r12+280]
        mov	QWORD PTR [r12+272], r9
        sbb	r10, QWORD PTR [r13+280]
        mov	rax, QWORD PTR [r12+288]
        mov	QWORD PTR [r12+280], r10
        sbb	rax, QWORD PTR [r13+288]
        mov	r9, QWORD PTR [r12+296]
        mov	QWORD PTR [r12+288], rax
        sbb	r9, QWORD PTR [r13+296]
        mov	r10, QWORD PTR [r12+304]
        mov	QWORD PTR [r12+296], r9
        sbb	r10, QWORD PTR [r13+304]
        mov	rax, QWORD PTR [r12+312]
        mov	QWORD PTR [r12+304], r10
        sbb	rax, QWORD PTR [r13+312]
        mov	r9, QWORD PTR [r12+320]
        mov	QWORD PTR [r12+312], rax
        sbb	r9, QWORD PTR [r13+320]
        mov	r10, QWORD PTR [r12+328]
        mov	QWORD PTR [r12+320], r9
        sbb	r10, QWORD PTR [r13+328]
        mov	rax, QWORD PTR [r12+336]
        mov	QWORD PTR [r12+328], r10
        sbb	rax, QWORD PTR [r13+336]
        mov	r9, QWORD PTR [r12+344]
        mov	QWORD PTR [r12+336], rax
        sbb	r9, QWORD PTR [r13+344]
        mov	r10, QWORD PTR [r12+352]
        mov	QWORD PTR [r12+344], r9
        sbb	r10, QWORD PTR [r13+352]
        mov	rax, QWORD PTR [r12+360]
        mov	QWORD PTR [r12+352], r10
        sbb	rax, QWORD PTR [r13+360]
        mov	r9, QWORD PTR [r12+368]
        mov	QWORD PTR [r12+360], rax
        sbb	r9, QWORD PTR [r13+368]
        mov	r10, QWORD PTR [r12+376]
        mov	QWORD PTR [r12+368], r9
        sbb	r10, QWORD PTR [r13+376]
        mov	QWORD PTR [r12+376], r10
        sbb	r11, 0
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [rcx]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [rcx+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [rcx+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [rcx+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [rcx+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [rcx+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [rcx+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [rcx+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [rcx+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [rcx+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [rcx+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [rcx+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [rcx+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [rcx+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [rcx+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [rcx+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [rcx+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [rcx+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [rcx+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [rcx+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [rcx+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [rcx+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [rcx+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [rcx+184]
        mov	rax, QWORD PTR [r12+192]
        mov	QWORD PTR [r12+184], r10
        sbb	rax, QWORD PTR [rcx+192]
        mov	r9, QWORD PTR [r12+200]
        mov	QWORD PTR [r12+192], rax
        sbb	r9, QWORD PTR [rcx+200]
        mov	r10, QWORD PTR [r12+208]
        mov	QWORD PTR [r12+200], r9
        sbb	r10, QWORD PTR [rcx+208]
        mov	rax, QWORD PTR [r12+216]
        mov	QWORD PTR [r12+208], r10
        sbb	rax, QWORD PTR [rcx+216]
        mov	r9, QWORD PTR [r12+224]
        mov	QWORD PTR [r12+216], rax
        sbb	r9, QWORD PTR [rcx+224]
        mov	r10, QWORD PTR [r12+232]
        mov	QWORD PTR [r12+224], r9
        sbb	r10, QWORD PTR [rcx+232]
        mov	rax, QWORD PTR [r12+240]
        mov	QWORD PTR [r12+232], r10
        sbb	rax, QWORD PTR [rcx+240]
        mov	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [r12+240], rax
        sbb	r9, QWORD PTR [rcx+248]
        mov	r10, QWORD PTR [r12+256]
        mov	QWORD PTR [r12+248], r9
        sbb	r10, QWORD PTR [rcx+256]
        mov	rax, QWORD PTR [r12+264]
        mov	QWORD PTR [r12+256], r10
        sbb	rax, QWORD PTR [rcx+264]
        mov	r9, QWORD PTR [r12+272]
        mov	QWORD PTR [r12+264], rax
        sbb	r9, QWORD PTR [rcx+272]
        mov	r10, QWORD PTR [r12+280]
        mov	QWORD PTR [r12+272], r9
        sbb	r10, QWORD PTR [rcx+280]
        mov	rax, QWORD PTR [r12+288]
        mov	QWORD PTR [r12+280], r10
        sbb	rax, QWORD PTR [rcx+288]
        mov	r9, QWORD PTR [r12+296]
        mov	QWORD PTR [r12+288], rax
        sbb	r9, QWORD PTR [rcx+296]
        mov	r10, QWORD PTR [r12+304]
        mov	QWORD PTR [r12+296], r9
        sbb	r10, QWORD PTR [rcx+304]
        mov	rax, QWORD PTR [r12+312]
        mov	QWORD PTR [r12+304], r10
        sbb	rax, QWORD PTR [rcx+312]
        mov	r9, QWORD PTR [r12+320]
        mov	QWORD PTR [r12+312], rax
        sbb	r9, QWORD PTR [rcx+320]
        mov	r10, QWORD PTR [r12+328]
        mov	QWORD PTR [r12+320], r9
        sbb	r10, QWORD PTR [rcx+328]
        mov	rax, QWORD PTR [r12+336]
        mov	QWORD PTR [r12+328], r10
        sbb	rax, QWORD PTR [rcx+336]
        mov	r9, QWORD PTR [r12+344]
        mov	QWORD PTR [r12+336], rax
        sbb	r9, QWORD PTR [rcx+344]
        mov	r10, QWORD PTR [r12+352]
        mov	QWORD PTR [r12+344], r9
        sbb	r10, QWORD PTR [rcx+352]
        mov	rax, QWORD PTR [r12+360]
        mov	QWORD PTR [r12+352], r10
        sbb	rax, QWORD PTR [rcx+360]
        mov	r9, QWORD PTR [r12+368]
        mov	QWORD PTR [r12+360], rax
        sbb	r9, QWORD PTR [rcx+368]
        mov	r10, QWORD PTR [r12+376]
        mov	QWORD PTR [r12+368], r9
        sbb	r10, QWORD PTR [rcx+376]
        mov	QWORD PTR [r12+376], r10
        sbb	r11, 0
        sub	rsi, 192
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r12+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r12+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r12+128]
        mov	r10, QWORD PTR [rsi+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r12+136]
        mov	rax, QWORD PTR [rsi+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r12+144]
        mov	r9, QWORD PTR [rsi+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r12+152]
        mov	r10, QWORD PTR [rsi+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r12+160]
        mov	rax, QWORD PTR [rsi+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r12+168]
        mov	r9, QWORD PTR [rsi+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r12+176]
        mov	r10, QWORD PTR [rsi+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r12+184]
        mov	rax, QWORD PTR [rsi+192]
        mov	QWORD PTR [rsi+184], r10
        adc	rax, QWORD PTR [r12+192]
        mov	r9, QWORD PTR [rsi+200]
        mov	QWORD PTR [rsi+192], rax
        adc	r9, QWORD PTR [r12+200]
        mov	r10, QWORD PTR [rsi+208]
        mov	QWORD PTR [rsi+200], r9
        adc	r10, QWORD PTR [r12+208]
        mov	rax, QWORD PTR [rsi+216]
        mov	QWORD PTR [rsi+208], r10
        adc	rax, QWORD PTR [r12+216]
        mov	r9, QWORD PTR [rsi+224]
        mov	QWORD PTR [rsi+216], rax
        adc	r9, QWORD PTR [r12+224]
        mov	r10, QWORD PTR [rsi+232]
        mov	QWORD PTR [rsi+224], r9
        adc	r10, QWORD PTR [r12+232]
        mov	rax, QWORD PTR [rsi+240]
        mov	QWORD PTR [rsi+232], r10
        adc	rax, QWORD PTR [r12+240]
        mov	r9, QWORD PTR [rsi+248]
        mov	QWORD PTR [rsi+240], rax
        adc	r9, QWORD PTR [r12+248]
        mov	r10, QWORD PTR [rsi+256]
        mov	QWORD PTR [rsi+248], r9
        adc	r10, QWORD PTR [r12+256]
        mov	rax, QWORD PTR [rsi+264]
        mov	QWORD PTR [rsi+256], r10
        adc	rax, QWORD PTR [r12+264]
        mov	r9, QWORD PTR [rsi+272]
        mov	QWORD PTR [rsi+264], rax
        adc	r9, QWORD PTR [r12+272]
        mov	r10, QWORD PTR [rsi+280]
        mov	QWORD PTR [rsi+272], r9
        adc	r10, QWORD PTR [r12+280]
        mov	rax, QWORD PTR [rsi+288]
        mov	QWORD PTR [rsi+280], r10
        adc	rax, QWORD PTR [r12+288]
        mov	r9, QWORD PTR [rsi+296]
        mov	QWORD PTR [rsi+288], rax
        adc	r9, QWORD PTR [r12+296]
        mov	r10, QWORD PTR [rsi+304]
        mov	QWORD PTR [rsi+296], r9
        adc	r10, QWORD PTR [r12+304]
        mov	rax, QWORD PTR [rsi+312]
        mov	QWORD PTR [rsi+304], r10
        adc	rax, QWORD PTR [r12+312]
        mov	r9, QWORD PTR [rsi+320]
        mov	QWORD PTR [rsi+312], rax
        adc	r9, QWORD PTR [r12+320]
        mov	r10, QWORD PTR [rsi+328]
        mov	QWORD PTR [rsi+320], r9
        adc	r10, QWORD PTR [r12+328]
        mov	rax, QWORD PTR [rsi+336]
        mov	QWORD PTR [rsi+328], r10
        adc	rax, QWORD PTR [r12+336]
        mov	r9, QWORD PTR [rsi+344]
        mov	QWORD PTR [rsi+336], rax
        adc	r9, QWORD PTR [r12+344]
        mov	r10, QWORD PTR [rsi+352]
        mov	QWORD PTR [rsi+344], r9
        adc	r10, QWORD PTR [r12+352]
        mov	rax, QWORD PTR [rsi+360]
        mov	QWORD PTR [rsi+352], r10
        adc	rax, QWORD PTR [r12+360]
        mov	r9, QWORD PTR [rsi+368]
        mov	QWORD PTR [rsi+360], rax
        adc	r9, QWORD PTR [r12+368]
        mov	r10, QWORD PTR [rsi+376]
        mov	QWORD PTR [rsi+368], r9
        adc	r10, QWORD PTR [r12+376]
        mov	QWORD PTR [rsi+376], r10
        adc	r11, 0
        mov	QWORD PTR [rcx+576], r11
        add	rsi, 192
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [rsi+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [rsi+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [rsi+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [rsi+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [rsi+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [rsi+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [rsi+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r13+184]
        mov	rax, QWORD PTR [rsi+192]
        mov	QWORD PTR [rsi+184], r10
        adc	rax, QWORD PTR [r13+192]
        mov	QWORD PTR [rsi+192], rax
        ; Add to zero
        mov	rax, QWORD PTR [r13+200]
        adc	rax, 0
        mov	r9, QWORD PTR [r13+208]
        mov	QWORD PTR [rsi+200], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+216]
        mov	QWORD PTR [rsi+208], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+224]
        mov	QWORD PTR [rsi+216], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+232]
        mov	QWORD PTR [rsi+224], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+240]
        mov	QWORD PTR [rsi+232], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+248]
        mov	QWORD PTR [rsi+240], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+256]
        mov	QWORD PTR [rsi+248], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+264]
        mov	QWORD PTR [rsi+256], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+272]
        mov	QWORD PTR [rsi+264], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+280]
        mov	QWORD PTR [rsi+272], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+288]
        mov	QWORD PTR [rsi+280], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+296]
        mov	QWORD PTR [rsi+288], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+304]
        mov	QWORD PTR [rsi+296], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+312]
        mov	QWORD PTR [rsi+304], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+320]
        mov	QWORD PTR [rsi+312], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+328]
        mov	QWORD PTR [rsi+320], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+336]
        mov	QWORD PTR [rsi+328], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+344]
        mov	QWORD PTR [rsi+336], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+352]
        mov	QWORD PTR [rsi+344], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+360]
        mov	QWORD PTR [rsi+352], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+368]
        mov	QWORD PTR [rsi+360], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+376]
        mov	QWORD PTR [rsi+368], rax
        adc	r9, 0
        mov	QWORD PTR [rsi+376], r9
        add	rsp, 1192
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_3072_mul_48 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_48 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        sub	rsp, 1192
        mov	QWORD PTR [rsp+1152], rcx
        mov	QWORD PTR [rsp+1160], rdx
        mov	QWORD PTR [rsp+1168], r8
        lea	r12, QWORD PTR [rsp+768]
        lea	r14, QWORD PTR [rdx+192]
        ; Add
        mov	rax, QWORD PTR [rdx]
        xor	r15, r15
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [rdx+8]
        mov	QWORD PTR [r12], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [rdx+16]
        mov	QWORD PTR [r12+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [rdx+24]
        mov	QWORD PTR [r12+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [rdx+32]
        mov	QWORD PTR [r12+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [rdx+40]
        mov	QWORD PTR [r12+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r12+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [rdx+56]
        mov	QWORD PTR [r12+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [rdx+64]
        mov	QWORD PTR [r12+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [rdx+72]
        mov	QWORD PTR [r12+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [rdx+80]
        mov	QWORD PTR [r12+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [rdx+88]
        mov	QWORD PTR [r12+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	rax, QWORD PTR [rdx+96]
        mov	QWORD PTR [r12+88], r10
        adc	rax, QWORD PTR [r14+96]
        mov	r9, QWORD PTR [rdx+104]
        mov	QWORD PTR [r12+96], rax
        adc	r9, QWORD PTR [r14+104]
        mov	r10, QWORD PTR [rdx+112]
        mov	QWORD PTR [r12+104], r9
        adc	r10, QWORD PTR [r14+112]
        mov	rax, QWORD PTR [rdx+120]
        mov	QWORD PTR [r12+112], r10
        adc	rax, QWORD PTR [r14+120]
        mov	r9, QWORD PTR [rdx+128]
        mov	QWORD PTR [r12+120], rax
        adc	r9, QWORD PTR [r14+128]
        mov	r10, QWORD PTR [rdx+136]
        mov	QWORD PTR [r12+128], r9
        adc	r10, QWORD PTR [r14+136]
        mov	rax, QWORD PTR [rdx+144]
        mov	QWORD PTR [r12+136], r10
        adc	rax, QWORD PTR [r14+144]
        mov	r9, QWORD PTR [rdx+152]
        mov	QWORD PTR [r12+144], rax
        adc	r9, QWORD PTR [r14+152]
        mov	r10, QWORD PTR [rdx+160]
        mov	QWORD PTR [r12+152], r9
        adc	r10, QWORD PTR [r14+160]
        mov	rax, QWORD PTR [rdx+168]
        mov	QWORD PTR [r12+160], r10
        adc	rax, QWORD PTR [r14+168]
        mov	r9, QWORD PTR [rdx+176]
        mov	QWORD PTR [r12+168], rax
        adc	r9, QWORD PTR [r14+176]
        mov	r10, QWORD PTR [rdx+184]
        mov	QWORD PTR [r12+176], r9
        adc	r10, QWORD PTR [r14+184]
        mov	QWORD PTR [r12+184], r10
        adc	r15, 0
        mov	QWORD PTR [rsp+1176], r15
        lea	r13, QWORD PTR [rsp+960]
        lea	r14, QWORD PTR [r8+192]
        ; Add
        mov	rax, QWORD PTR [r8]
        xor	rdi, rdi
        add	rax, QWORD PTR [r14]
        mov	r9, QWORD PTR [r8+8]
        mov	QWORD PTR [r13], rax
        adc	r9, QWORD PTR [r14+8]
        mov	r10, QWORD PTR [r8+16]
        mov	QWORD PTR [r13+8], r9
        adc	r10, QWORD PTR [r14+16]
        mov	rax, QWORD PTR [r8+24]
        mov	QWORD PTR [r13+16], r10
        adc	rax, QWORD PTR [r14+24]
        mov	r9, QWORD PTR [r8+32]
        mov	QWORD PTR [r13+24], rax
        adc	r9, QWORD PTR [r14+32]
        mov	r10, QWORD PTR [r8+40]
        mov	QWORD PTR [r13+32], r9
        adc	r10, QWORD PTR [r14+40]
        mov	rax, QWORD PTR [r8+48]
        mov	QWORD PTR [r13+40], r10
        adc	rax, QWORD PTR [r14+48]
        mov	r9, QWORD PTR [r8+56]
        mov	QWORD PTR [r13+48], rax
        adc	r9, QWORD PTR [r14+56]
        mov	r10, QWORD PTR [r8+64]
        mov	QWORD PTR [r13+56], r9
        adc	r10, QWORD PTR [r14+64]
        mov	rax, QWORD PTR [r8+72]
        mov	QWORD PTR [r13+64], r10
        adc	rax, QWORD PTR [r14+72]
        mov	r9, QWORD PTR [r8+80]
        mov	QWORD PTR [r13+72], rax
        adc	r9, QWORD PTR [r14+80]
        mov	r10, QWORD PTR [r8+88]
        mov	QWORD PTR [r13+80], r9
        adc	r10, QWORD PTR [r14+88]
        mov	rax, QWORD PTR [r8+96]
        mov	QWORD PTR [r13+88], r10
        adc	rax, QWORD PTR [r14+96]
        mov	r9, QWORD PTR [r8+104]
        mov	QWORD PTR [r13+96], rax
        adc	r9, QWORD PTR [r14+104]
        mov	r10, QWORD PTR [r8+112]
        mov	QWORD PTR [r13+104], r9
        adc	r10, QWORD PTR [r14+112]
        mov	rax, QWORD PTR [r8+120]
        mov	QWORD PTR [r13+112], r10
        adc	rax, QWORD PTR [r14+120]
        mov	r9, QWORD PTR [r8+128]
        mov	QWORD PTR [r13+120], rax
        adc	r9, QWORD PTR [r14+128]
        mov	r10, QWORD PTR [r8+136]
        mov	QWORD PTR [r13+128], r9
        adc	r10, QWORD PTR [r14+136]
        mov	rax, QWORD PTR [r8+144]
        mov	QWORD PTR [r13+136], r10
        adc	rax, QWORD PTR [r14+144]
        mov	r9, QWORD PTR [r8+152]
        mov	QWORD PTR [r13+144], rax
        adc	r9, QWORD PTR [r14+152]
        mov	r10, QWORD PTR [r8+160]
        mov	QWORD PTR [r13+152], r9
        adc	r10, QWORD PTR [r14+160]
        mov	rax, QWORD PTR [r8+168]
        mov	QWORD PTR [r13+160], r10
        adc	rax, QWORD PTR [r14+168]
        mov	r9, QWORD PTR [r8+176]
        mov	QWORD PTR [r13+168], rax
        adc	r9, QWORD PTR [r14+176]
        mov	r10, QWORD PTR [r8+184]
        mov	QWORD PTR [r13+176], r9
        adc	r10, QWORD PTR [r14+184]
        mov	QWORD PTR [r13+184], r10
        adc	rdi, 0
        mov	QWORD PTR [rsp+1184], rdi
        mov	r8, r13
        mov	rdx, r12
        mov	rcx, rsp
        call	sp_3072_mul_avx2_24
        mov	r8, QWORD PTR [rsp+1168]
        mov	rdx, QWORD PTR [rsp+1160]
        lea	rcx, QWORD PTR [rsp+384]
        add	r8, 192
        add	rdx, 192
        call	sp_3072_mul_avx2_24
        mov	r8, QWORD PTR [rsp+1168]
        mov	rdx, QWORD PTR [rsp+1160]
        mov	rcx, QWORD PTR [rsp+1152]
        call	sp_3072_mul_avx2_24
IFDEF _WIN64
        mov	r8, QWORD PTR [rsp+1168]
        mov	rdx, QWORD PTR [rsp+1160]
        mov	rcx, QWORD PTR [rsp+1152]
ENDIF
        mov	r15, QWORD PTR [rsp+1176]
        mov	rdi, QWORD PTR [rsp+1184]
        mov	rsi, QWORD PTR [rsp+1152]
        mov	r11, r15
        lea	r12, QWORD PTR [rsp+768]
        lea	r13, QWORD PTR [rsp+960]
        and	r11, rdi
        neg	r15
        neg	rdi
        add	rsi, 384
        mov	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [r13]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        add	rax, r9
        mov	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [r13+8]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [r13+16]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+8], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [r13+24]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+16], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [r13+32]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+24], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [r13+40]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+32], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [r13+48]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+40], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [r13+56]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+48], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [r13+64]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+56], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [r13+72]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+64], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [r13+80]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+72], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [r13+88]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+80], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [r13+96]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+88], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+104]
        mov	r10, QWORD PTR [r13+104]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+96], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+112]
        mov	rax, QWORD PTR [r13+112]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+104], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [r13+120]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+112], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+128]
        mov	r10, QWORD PTR [r13+128]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+120], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+136]
        mov	rax, QWORD PTR [r13+136]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+128], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+144]
        mov	r9, QWORD PTR [r13+144]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+136], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+152]
        mov	r10, QWORD PTR [r13+152]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+144], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+160]
        mov	rax, QWORD PTR [r13+160]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+152], r9
        adc	r10, rax
        mov	rax, QWORD PTR [r12+168]
        mov	r9, QWORD PTR [r13+168]
        pext	rax, rax, rdi
        pext	r9, r9, r15
        mov	QWORD PTR [rsi+160], r10
        adc	rax, r9
        mov	r9, QWORD PTR [r12+176]
        mov	r10, QWORD PTR [r13+176]
        pext	r9, r9, rdi
        pext	r10, r10, r15
        mov	QWORD PTR [rsi+168], rax
        adc	r9, r10
        mov	r10, QWORD PTR [r12+184]
        mov	rax, QWORD PTR [r13+184]
        pext	r10, r10, rdi
        pext	rax, rax, r15
        mov	QWORD PTR [rsi+176], r9
        adc	r10, rax
        mov	QWORD PTR [rsi+184], r10
        adc	r11, 0
        lea	r13, QWORD PTR [rsp+384]
        mov	r12, rsp
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [r13+184]
        mov	rax, QWORD PTR [r12+192]
        mov	QWORD PTR [r12+184], r10
        sbb	rax, QWORD PTR [r13+192]
        mov	r9, QWORD PTR [r12+200]
        mov	QWORD PTR [r12+192], rax
        sbb	r9, QWORD PTR [r13+200]
        mov	r10, QWORD PTR [r12+208]
        mov	QWORD PTR [r12+200], r9
        sbb	r10, QWORD PTR [r13+208]
        mov	rax, QWORD PTR [r12+216]
        mov	QWORD PTR [r12+208], r10
        sbb	rax, QWORD PTR [r13+216]
        mov	r9, QWORD PTR [r12+224]
        mov	QWORD PTR [r12+216], rax
        sbb	r9, QWORD PTR [r13+224]
        mov	r10, QWORD PTR [r12+232]
        mov	QWORD PTR [r12+224], r9
        sbb	r10, QWORD PTR [r13+232]
        mov	rax, QWORD PTR [r12+240]
        mov	QWORD PTR [r12+232], r10
        sbb	rax, QWORD PTR [r13+240]
        mov	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [r12+240], rax
        sbb	r9, QWORD PTR [r13+248]
        mov	r10, QWORD PTR [r12+256]
        mov	QWORD PTR [r12+248], r9
        sbb	r10, QWORD PTR [r13+256]
        mov	rax, QWORD PTR [r12+264]
        mov	QWORD PTR [r12+256], r10
        sbb	rax, QWORD PTR [r13+264]
        mov	r9, QWORD PTR [r12+272]
        mov	QWORD PTR [r12+264], rax
        sbb	r9, QWORD PTR [r13+272]
        mov	r10, QWORD PTR [r12+280]
        mov	QWORD PTR [r12+272], r9
        sbb	r10, QWORD PTR [r13+280]
        mov	rax, QWORD PTR [r12+288]
        mov	QWORD PTR [r12+280], r10
        sbb	rax, QWORD PTR [r13+288]
        mov	r9, QWORD PTR [r12+296]
        mov	QWORD PTR [r12+288], rax
        sbb	r9, QWORD PTR [r13+296]
        mov	r10, QWORD PTR [r12+304]
        mov	QWORD PTR [r12+296], r9
        sbb	r10, QWORD PTR [r13+304]
        mov	rax, QWORD PTR [r12+312]
        mov	QWORD PTR [r12+304], r10
        sbb	rax, QWORD PTR [r13+312]
        mov	r9, QWORD PTR [r12+320]
        mov	QWORD PTR [r12+312], rax
        sbb	r9, QWORD PTR [r13+320]
        mov	r10, QWORD PTR [r12+328]
        mov	QWORD PTR [r12+320], r9
        sbb	r10, QWORD PTR [r13+328]
        mov	rax, QWORD PTR [r12+336]
        mov	QWORD PTR [r12+328], r10
        sbb	rax, QWORD PTR [r13+336]
        mov	r9, QWORD PTR [r12+344]
        mov	QWORD PTR [r12+336], rax
        sbb	r9, QWORD PTR [r13+344]
        mov	r10, QWORD PTR [r12+352]
        mov	QWORD PTR [r12+344], r9
        sbb	r10, QWORD PTR [r13+352]
        mov	rax, QWORD PTR [r12+360]
        mov	QWORD PTR [r12+352], r10
        sbb	rax, QWORD PTR [r13+360]
        mov	r9, QWORD PTR [r12+368]
        mov	QWORD PTR [r12+360], rax
        sbb	r9, QWORD PTR [r13+368]
        mov	r10, QWORD PTR [r12+376]
        mov	QWORD PTR [r12+368], r9
        sbb	r10, QWORD PTR [r13+376]
        mov	QWORD PTR [r12+376], r10
        sbb	r11, 0
        mov	rax, QWORD PTR [r12]
        sub	rax, QWORD PTR [rcx]
        mov	r9, QWORD PTR [r12+8]
        mov	QWORD PTR [r12], rax
        sbb	r9, QWORD PTR [rcx+8]
        mov	r10, QWORD PTR [r12+16]
        mov	QWORD PTR [r12+8], r9
        sbb	r10, QWORD PTR [rcx+16]
        mov	rax, QWORD PTR [r12+24]
        mov	QWORD PTR [r12+16], r10
        sbb	rax, QWORD PTR [rcx+24]
        mov	r9, QWORD PTR [r12+32]
        mov	QWORD PTR [r12+24], rax
        sbb	r9, QWORD PTR [rcx+32]
        mov	r10, QWORD PTR [r12+40]
        mov	QWORD PTR [r12+32], r9
        sbb	r10, QWORD PTR [rcx+40]
        mov	rax, QWORD PTR [r12+48]
        mov	QWORD PTR [r12+40], r10
        sbb	rax, QWORD PTR [rcx+48]
        mov	r9, QWORD PTR [r12+56]
        mov	QWORD PTR [r12+48], rax
        sbb	r9, QWORD PTR [rcx+56]
        mov	r10, QWORD PTR [r12+64]
        mov	QWORD PTR [r12+56], r9
        sbb	r10, QWORD PTR [rcx+64]
        mov	rax, QWORD PTR [r12+72]
        mov	QWORD PTR [r12+64], r10
        sbb	rax, QWORD PTR [rcx+72]
        mov	r9, QWORD PTR [r12+80]
        mov	QWORD PTR [r12+72], rax
        sbb	r9, QWORD PTR [rcx+80]
        mov	r10, QWORD PTR [r12+88]
        mov	QWORD PTR [r12+80], r9
        sbb	r10, QWORD PTR [rcx+88]
        mov	rax, QWORD PTR [r12+96]
        mov	QWORD PTR [r12+88], r10
        sbb	rax, QWORD PTR [rcx+96]
        mov	r9, QWORD PTR [r12+104]
        mov	QWORD PTR [r12+96], rax
        sbb	r9, QWORD PTR [rcx+104]
        mov	r10, QWORD PTR [r12+112]
        mov	QWORD PTR [r12+104], r9
        sbb	r10, QWORD PTR [rcx+112]
        mov	rax, QWORD PTR [r12+120]
        mov	QWORD PTR [r12+112], r10
        sbb	rax, QWORD PTR [rcx+120]
        mov	r9, QWORD PTR [r12+128]
        mov	QWORD PTR [r12+120], rax
        sbb	r9, QWORD PTR [rcx+128]
        mov	r10, QWORD PTR [r12+136]
        mov	QWORD PTR [r12+128], r9
        sbb	r10, QWORD PTR [rcx+136]
        mov	rax, QWORD PTR [r12+144]
        mov	QWORD PTR [r12+136], r10
        sbb	rax, QWORD PTR [rcx+144]
        mov	r9, QWORD PTR [r12+152]
        mov	QWORD PTR [r12+144], rax
        sbb	r9, QWORD PTR [rcx+152]
        mov	r10, QWORD PTR [r12+160]
        mov	QWORD PTR [r12+152], r9
        sbb	r10, QWORD PTR [rcx+160]
        mov	rax, QWORD PTR [r12+168]
        mov	QWORD PTR [r12+160], r10
        sbb	rax, QWORD PTR [rcx+168]
        mov	r9, QWORD PTR [r12+176]
        mov	QWORD PTR [r12+168], rax
        sbb	r9, QWORD PTR [rcx+176]
        mov	r10, QWORD PTR [r12+184]
        mov	QWORD PTR [r12+176], r9
        sbb	r10, QWORD PTR [rcx+184]
        mov	rax, QWORD PTR [r12+192]
        mov	QWORD PTR [r12+184], r10
        sbb	rax, QWORD PTR [rcx+192]
        mov	r9, QWORD PTR [r12+200]
        mov	QWORD PTR [r12+192], rax
        sbb	r9, QWORD PTR [rcx+200]
        mov	r10, QWORD PTR [r12+208]
        mov	QWORD PTR [r12+200], r9
        sbb	r10, QWORD PTR [rcx+208]
        mov	rax, QWORD PTR [r12+216]
        mov	QWORD PTR [r12+208], r10
        sbb	rax, QWORD PTR [rcx+216]
        mov	r9, QWORD PTR [r12+224]
        mov	QWORD PTR [r12+216], rax
        sbb	r9, QWORD PTR [rcx+224]
        mov	r10, QWORD PTR [r12+232]
        mov	QWORD PTR [r12+224], r9
        sbb	r10, QWORD PTR [rcx+232]
        mov	rax, QWORD PTR [r12+240]
        mov	QWORD PTR [r12+232], r10
        sbb	rax, QWORD PTR [rcx+240]
        mov	r9, QWORD PTR [r12+248]
        mov	QWORD PTR [r12+240], rax
        sbb	r9, QWORD PTR [rcx+248]
        mov	r10, QWORD PTR [r12+256]
        mov	QWORD PTR [r12+248], r9
        sbb	r10, QWORD PTR [rcx+256]
        mov	rax, QWORD PTR [r12+264]
        mov	QWORD PTR [r12+256], r10
        sbb	rax, QWORD PTR [rcx+264]
        mov	r9, QWORD PTR [r12+272]
        mov	QWORD PTR [r12+264], rax
        sbb	r9, QWORD PTR [rcx+272]
        mov	r10, QWORD PTR [r12+280]
        mov	QWORD PTR [r12+272], r9
        sbb	r10, QWORD PTR [rcx+280]
        mov	rax, QWORD PTR [r12+288]
        mov	QWORD PTR [r12+280], r10
        sbb	rax, QWORD PTR [rcx+288]
        mov	r9, QWORD PTR [r12+296]
        mov	QWORD PTR [r12+288], rax
        sbb	r9, QWORD PTR [rcx+296]
        mov	r10, QWORD PTR [r12+304]
        mov	QWORD PTR [r12+296], r9
        sbb	r10, QWORD PTR [rcx+304]
        mov	rax, QWORD PTR [r12+312]
        mov	QWORD PTR [r12+304], r10
        sbb	rax, QWORD PTR [rcx+312]
        mov	r9, QWORD PTR [r12+320]
        mov	QWORD PTR [r12+312], rax
        sbb	r9, QWORD PTR [rcx+320]
        mov	r10, QWORD PTR [r12+328]
        mov	QWORD PTR [r12+320], r9
        sbb	r10, QWORD PTR [rcx+328]
        mov	rax, QWORD PTR [r12+336]
        mov	QWORD PTR [r12+328], r10
        sbb	rax, QWORD PTR [rcx+336]
        mov	r9, QWORD PTR [r12+344]
        mov	QWORD PTR [r12+336], rax
        sbb	r9, QWORD PTR [rcx+344]
        mov	r10, QWORD PTR [r12+352]
        mov	QWORD PTR [r12+344], r9
        sbb	r10, QWORD PTR [rcx+352]
        mov	rax, QWORD PTR [r12+360]
        mov	QWORD PTR [r12+352], r10
        sbb	rax, QWORD PTR [rcx+360]
        mov	r9, QWORD PTR [r12+368]
        mov	QWORD PTR [r12+360], rax
        sbb	r9, QWORD PTR [rcx+368]
        mov	r10, QWORD PTR [r12+376]
        mov	QWORD PTR [r12+368], r9
        sbb	r10, QWORD PTR [rcx+376]
        mov	QWORD PTR [r12+376], r10
        sbb	r11, 0
        sub	rsi, 192
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r12]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r12+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r12+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r12+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r12+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r12+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r12+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r12+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r12+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r12+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r12+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r12+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r12+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r12+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r12+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r12+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r12+128]
        mov	r10, QWORD PTR [rsi+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r12+136]
        mov	rax, QWORD PTR [rsi+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r12+144]
        mov	r9, QWORD PTR [rsi+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r12+152]
        mov	r10, QWORD PTR [rsi+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r12+160]
        mov	rax, QWORD PTR [rsi+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r12+168]
        mov	r9, QWORD PTR [rsi+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r12+176]
        mov	r10, QWORD PTR [rsi+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r12+184]
        mov	rax, QWORD PTR [rsi+192]
        mov	QWORD PTR [rsi+184], r10
        adc	rax, QWORD PTR [r12+192]
        mov	r9, QWORD PTR [rsi+200]
        mov	QWORD PTR [rsi+192], rax
        adc	r9, QWORD PTR [r12+200]
        mov	r10, QWORD PTR [rsi+208]
        mov	QWORD PTR [rsi+200], r9
        adc	r10, QWORD PTR [r12+208]
        mov	rax, QWORD PTR [rsi+216]
        mov	QWORD PTR [rsi+208], r10
        adc	rax, QWORD PTR [r12+216]
        mov	r9, QWORD PTR [rsi+224]
        mov	QWORD PTR [rsi+216], rax
        adc	r9, QWORD PTR [r12+224]
        mov	r10, QWORD PTR [rsi+232]
        mov	QWORD PTR [rsi+224], r9
        adc	r10, QWORD PTR [r12+232]
        mov	rax, QWORD PTR [rsi+240]
        mov	QWORD PTR [rsi+232], r10
        adc	rax, QWORD PTR [r12+240]
        mov	r9, QWORD PTR [rsi+248]
        mov	QWORD PTR [rsi+240], rax
        adc	r9, QWORD PTR [r12+248]
        mov	r10, QWORD PTR [rsi+256]
        mov	QWORD PTR [rsi+248], r9
        adc	r10, QWORD PTR [r12+256]
        mov	rax, QWORD PTR [rsi+264]
        mov	QWORD PTR [rsi+256], r10
        adc	rax, QWORD PTR [r12+264]
        mov	r9, QWORD PTR [rsi+272]
        mov	QWORD PTR [rsi+264], rax
        adc	r9, QWORD PTR [r12+272]
        mov	r10, QWORD PTR [rsi+280]
        mov	QWORD PTR [rsi+272], r9
        adc	r10, QWORD PTR [r12+280]
        mov	rax, QWORD PTR [rsi+288]
        mov	QWORD PTR [rsi+280], r10
        adc	rax, QWORD PTR [r12+288]
        mov	r9, QWORD PTR [rsi+296]
        mov	QWORD PTR [rsi+288], rax
        adc	r9, QWORD PTR [r12+296]
        mov	r10, QWORD PTR [rsi+304]
        mov	QWORD PTR [rsi+296], r9
        adc	r10, QWORD PTR [r12+304]
        mov	rax, QWORD PTR [rsi+312]
        mov	QWORD PTR [rsi+304], r10
        adc	rax, QWORD PTR [r12+312]
        mov	r9, QWORD PTR [rsi+320]
        mov	QWORD PTR [rsi+312], rax
        adc	r9, QWORD PTR [r12+320]
        mov	r10, QWORD PTR [rsi+328]
        mov	QWORD PTR [rsi+320], r9
        adc	r10, QWORD PTR [r12+328]
        mov	rax, QWORD PTR [rsi+336]
        mov	QWORD PTR [rsi+328], r10
        adc	rax, QWORD PTR [r12+336]
        mov	r9, QWORD PTR [rsi+344]
        mov	QWORD PTR [rsi+336], rax
        adc	r9, QWORD PTR [r12+344]
        mov	r10, QWORD PTR [rsi+352]
        mov	QWORD PTR [rsi+344], r9
        adc	r10, QWORD PTR [r12+352]
        mov	rax, QWORD PTR [rsi+360]
        mov	QWORD PTR [rsi+352], r10
        adc	rax, QWORD PTR [r12+360]
        mov	r9, QWORD PTR [rsi+368]
        mov	QWORD PTR [rsi+360], rax
        adc	r9, QWORD PTR [r12+368]
        mov	r10, QWORD PTR [rsi+376]
        mov	QWORD PTR [rsi+368], r9
        adc	r10, QWORD PTR [r12+376]
        mov	QWORD PTR [rsi+376], r10
        adc	r11, 0
        mov	QWORD PTR [rcx+576], r11
        add	rsi, 192
        ; Add
        mov	rax, QWORD PTR [rsi]
        add	rax, QWORD PTR [r13]
        mov	r9, QWORD PTR [rsi+8]
        mov	QWORD PTR [rsi], rax
        adc	r9, QWORD PTR [r13+8]
        mov	r10, QWORD PTR [rsi+16]
        mov	QWORD PTR [rsi+8], r9
        adc	r10, QWORD PTR [r13+16]
        mov	rax, QWORD PTR [rsi+24]
        mov	QWORD PTR [rsi+16], r10
        adc	rax, QWORD PTR [r13+24]
        mov	r9, QWORD PTR [rsi+32]
        mov	QWORD PTR [rsi+24], rax
        adc	r9, QWORD PTR [r13+32]
        mov	r10, QWORD PTR [rsi+40]
        mov	QWORD PTR [rsi+32], r9
        adc	r10, QWORD PTR [r13+40]
        mov	rax, QWORD PTR [rsi+48]
        mov	QWORD PTR [rsi+40], r10
        adc	rax, QWORD PTR [r13+48]
        mov	r9, QWORD PTR [rsi+56]
        mov	QWORD PTR [rsi+48], rax
        adc	r9, QWORD PTR [r13+56]
        mov	r10, QWORD PTR [rsi+64]
        mov	QWORD PTR [rsi+56], r9
        adc	r10, QWORD PTR [r13+64]
        mov	rax, QWORD PTR [rsi+72]
        mov	QWORD PTR [rsi+64], r10
        adc	rax, QWORD PTR [r13+72]
        mov	r9, QWORD PTR [rsi+80]
        mov	QWORD PTR [rsi+72], rax
        adc	r9, QWORD PTR [r13+80]
        mov	r10, QWORD PTR [rsi+88]
        mov	QWORD PTR [rsi+80], r9
        adc	r10, QWORD PTR [r13+88]
        mov	rax, QWORD PTR [rsi+96]
        mov	QWORD PTR [rsi+88], r10
        adc	rax, QWORD PTR [r13+96]
        mov	r9, QWORD PTR [rsi+104]
        mov	QWORD PTR [rsi+96], rax
        adc	r9, QWORD PTR [r13+104]
        mov	r10, QWORD PTR [rsi+112]
        mov	QWORD PTR [rsi+104], r9
        adc	r10, QWORD PTR [r13+112]
        mov	rax, QWORD PTR [rsi+120]
        mov	QWORD PTR [rsi+112], r10
        adc	rax, QWORD PTR [r13+120]
        mov	r9, QWORD PTR [rsi+128]
        mov	QWORD PTR [rsi+120], rax
        adc	r9, QWORD PTR [r13+128]
        mov	r10, QWORD PTR [rsi+136]
        mov	QWORD PTR [rsi+128], r9
        adc	r10, QWORD PTR [r13+136]
        mov	rax, QWORD PTR [rsi+144]
        mov	QWORD PTR [rsi+136], r10
        adc	rax, QWORD PTR [r13+144]
        mov	r9, QWORD PTR [rsi+152]
        mov	QWORD PTR [rsi+144], rax
        adc	r9, QWORD PTR [r13+152]
        mov	r10, QWORD PTR [rsi+160]
        mov	QWORD PTR [rsi+152], r9
        adc	r10, QWORD PTR [r13+160]
        mov	rax, QWORD PTR [rsi+168]
        mov	QWORD PTR [rsi+160], r10
        adc	rax, QWORD PTR [r13+168]
        mov	r9, QWORD PTR [rsi+176]
        mov	QWORD PTR [rsi+168], rax
        adc	r9, QWORD PTR [r13+176]
        mov	r10, QWORD PTR [rsi+184]
        mov	QWORD PTR [rsi+176], r9
        adc	r10, QWORD PTR [r13+184]
        mov	rax, QWORD PTR [rsi+192]
        mov	QWORD PTR [rsi+184], r10
        adc	rax, QWORD PTR [r13+192]
        mov	QWORD PTR [rsi+192], rax
        ; Add to zero
        mov	rax, QWORD PTR [r13+200]
        adc	rax, 0
        mov	r9, QWORD PTR [r13+208]
        mov	QWORD PTR [rsi+200], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+216]
        mov	QWORD PTR [rsi+208], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+224]
        mov	QWORD PTR [rsi+216], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+232]
        mov	QWORD PTR [rsi+224], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+240]
        mov	QWORD PTR [rsi+232], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+248]
        mov	QWORD PTR [rsi+240], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+256]
        mov	QWORD PTR [rsi+248], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+264]
        mov	QWORD PTR [rsi+256], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+272]
        mov	QWORD PTR [rsi+264], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+280]
        mov	QWORD PTR [rsi+272], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+288]
        mov	QWORD PTR [rsi+280], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+296]
        mov	QWORD PTR [rsi+288], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+304]
        mov	QWORD PTR [rsi+296], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+312]
        mov	QWORD PTR [rsi+304], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+320]
        mov	QWORD PTR [rsi+312], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+328]
        mov	QWORD PTR [rsi+320], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+336]
        mov	QWORD PTR [rsi+328], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+344]
        mov	QWORD PTR [rsi+336], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+352]
        mov	QWORD PTR [rsi+344], rax
        adc	r9, 0
        mov	r10, QWORD PTR [r13+360]
        mov	QWORD PTR [rsi+352], r9
        adc	r10, 0
        mov	rax, QWORD PTR [r13+368]
        mov	QWORD PTR [rsi+360], r10
        adc	rax, 0
        mov	r9, QWORD PTR [r13+376]
        mov	QWORD PTR [rsi+368], rax
        adc	r9, 0
        mov	QWORD PTR [rsi+376], r9
        add	rsp, 1192
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_3072_mul_avx2_48 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_sqr_12 PROC
        push	r12
        push	r13
        push	r14
        mov	r8, rdx
        sub	rsp, 96
        ; A[0] * A[0]
        mov	rax, QWORD PTR [r8]
        mul	rax
        xor	r11, r11
        mov	QWORD PTR [rsp], rax
        mov	r10, rdx
        ; A[0] * A[1]
        mov	rax, QWORD PTR [r8+8]
        mul	QWORD PTR [r8]
        xor	r9, r9
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        mov	QWORD PTR [rsp+8], r10
        ; A[0] * A[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r8]
        xor	r10, r10
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        ; A[1] * A[1]
        mov	rax, QWORD PTR [r8+8]
        mul	rax
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        mov	QWORD PTR [rsp+16], r11
        ; A[0] * A[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r8]
        xor	r11, r11
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[1] * A[2]
        mov	rax, QWORD PTR [r8+16]
        mul	QWORD PTR [r8+8]
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rsp+24], r9
        ; A[0] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r8]
        xor	r9, r9
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        ; A[1] * A[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r8+8]
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        ; A[2] * A[2]
        mov	rax, QWORD PTR [r8+16]
        mul	rax
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        mov	QWORD PTR [rsp+32], r10
        ; A[0] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[3]
        mov	rax, QWORD PTR [r8+24]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rsp+40], r11
        ; A[0] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[3]
        mov	rax, QWORD PTR [r8+24]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rsp+48], r9
        ; A[0] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rsp+56], r10
        ; A[0] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[4]
        mov	rax, QWORD PTR [r8+32]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rsp+64], r11
        ; A[0] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rsp+72], r9
        ; A[0] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[5]
        mov	rax, QWORD PTR [r8+40]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rsp+80], r10
        ; A[0] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[1] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+8]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[2] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rsp+88], r11
        ; A[1] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+8]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[2] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+16]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[3] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[6]
        mov	rax, QWORD PTR [r8+48]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rcx+96], r9
        ; A[2] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+16]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[3] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+24]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[4] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rcx+104], r10
        ; A[3] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+24]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[4] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+32]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[5] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[7]
        mov	rax, QWORD PTR [r8+56]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rcx+112], r11
        ; A[4] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+32]
        xor	r11, r11
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[5] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+40]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[6] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r9, r12
        adc	r10, r13
        adc	r11, r14
        mov	QWORD PTR [rcx+120], r9
        ; A[5] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+40]
        xor	r9, r9
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[6] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+48]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[7] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[8] * A[8]
        mov	rax, QWORD PTR [r8+64]
        mul	rax
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r10, r12
        adc	r11, r13
        adc	r9, r14
        mov	QWORD PTR [rcx+128], r10
        ; A[6] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+48]
        xor	r10, r10
        xor	r14, r14
        mov	r12, rax
        mov	r13, rdx
        ; A[7] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+56]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        ; A[8] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	QWORD PTR [r8+64]
        add	r12, rax
        adc	r13, rdx
        adc	r14, 0
        add	r12, r12
        adc	r13, r13
        adc	r14, r14
        add	r11, r12
        adc	r9, r13
        adc	r10, r14
        mov	QWORD PTR [rcx+136], r11
        ; A[7] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+56]
        xor	r11, r11
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[8] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+64]
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * A[9]
        mov	rax, QWORD PTR [r8+72]
        mul	rax
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+144], r9
        ; A[8] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+64]
        xor	r9, r9
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        ; A[9] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	QWORD PTR [r8+72]
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        add	r10, rax
        adc	r11, rdx
        adc	r9, 0
        mov	QWORD PTR [rcx+152], r10
        ; A[9] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+72]
        xor	r10, r10
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        ; A[10] * A[10]
        mov	rax, QWORD PTR [r8+80]
        mul	rax
        add	r11, rax
        adc	r9, rdx
        adc	r10, 0
        mov	QWORD PTR [rcx+160], r11
        ; A[10] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	QWORD PTR [r8+80]
        xor	r11, r11
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        add	r9, rax
        adc	r10, rdx
        adc	r11, 0
        mov	QWORD PTR [rcx+168], r9
        ; A[11] * A[11]
        mov	rax, QWORD PTR [r8+88]
        mul	rax
        add	r10, rax
        adc	r11, rdx
        mov	QWORD PTR [rcx+176], r10
        mov	QWORD PTR [rcx+184], r11
        mov	rax, QWORD PTR [rsp]
        mov	rdx, QWORD PTR [rsp+8]
        mov	r12, QWORD PTR [rsp+16]
        mov	r13, QWORD PTR [rsp+24]
        mov	QWORD PTR [rcx], rax
        mov	QWORD PTR [rcx+8], rdx
        mov	QWORD PTR [rcx+16], r12
        mov	QWORD PTR [rcx+24], r13
        mov	rax, QWORD PTR [rsp+32]
        mov	rdx, QWORD PTR [rsp+40]
        mov	r12, QWORD PTR [rsp+48]
        mov	r13, QWORD PTR [rsp+56]
        mov	QWORD PTR [rcx+32], rax
        mov	QWORD PTR [rcx+40], rdx
        mov	QWORD PTR [rcx+48], r12
        mov	QWORD PTR [rcx+56], r13
        mov	rax, QWORD PTR [rsp+64]
        mov	rdx, QWORD PTR [rsp+72]
        mov	r12, QWORD PTR [rsp+80]
        mov	r13, QWORD PTR [rsp+88]
        mov	QWORD PTR [rcx+64], rax
        mov	QWORD PTR [rcx+72], rdx
        mov	QWORD PTR [rcx+80], r12
        mov	QWORD PTR [rcx+88], r13
        add	rsp, 96
        pop	r14
        pop	r13
        pop	r12
        ret
sp_3072_sqr_12 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_12 PROC
        push	rbp
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        push	rbx
        mov	r8, rcx
        mov	r9, rdx
        sub	rsp, 96
        cmp	r9, r8
        mov	rbp, rsp
        cmovne	rbp, r8
        add	r8, 96
        xor	r12, r12
        ; Diagonal 1
        ; Zero into %r9
        ; A[1] x A[0]
        mov	rdx, QWORD PTR [r9]
        mulx	r11, r10, QWORD PTR [r9+8]
        mov	QWORD PTR [rbp+8], r10
        ; Zero into %r8
        ; A[2] x A[0]
        mulx	r10, rax, QWORD PTR [r9+16]
        adcx	r11, rax
        adox	r10, r12
        mov	QWORD PTR [rbp+16], r11
        ; Zero into %r9
        ; A[3] x A[0]
        mulx	r11, rax, QWORD PTR [r9+24]
        adcx	r10, rax
        adox	r11, r12
        mov	QWORD PTR [rbp+24], r10
        ; Zero into %r8
        ; A[4] x A[0]
        mulx	r10, rax, QWORD PTR [r9+32]
        adcx	r11, rax
        adox	r10, r12
        mov	QWORD PTR [rbp+32], r11
        ; Zero into %r9
        ; A[5] x A[0]
        mulx	r11, rax, QWORD PTR [r9+40]
        adcx	r10, rax
        adox	r11, r12
        mov	QWORD PTR [rbp+40], r10
        ; No load %r12 - %r8
        ; A[6] x A[0]
        mulx	r14, rax, QWORD PTR [r9+48]
        adcx	r11, rax
        adox	r14, r12
        mov	QWORD PTR [rbp+48], r11
        ; No load %r13 - %r9
        ; A[7] x A[0]
        mulx	r15, rax, QWORD PTR [r9+56]
        adcx	r14, rax
        adox	r15, r12
        ; No store %r12 - %r8
        ; No load %r14 - %r8
        ; A[8] x A[0]
        mulx	rdi, rax, QWORD PTR [r9+64]
        adcx	r15, rax
        adox	rdi, r12
        ; No store %r13 - %r9
        ; No load %r15 - %r9
        ; A[9] x A[0]
        mulx	rsi, rax, QWORD PTR [r9+72]
        adcx	rdi, rax
        adox	rsi, r12
        ; No store %r14 - %r8
        ; No load %rbx - %r8
        ; A[10] x A[0]
        mulx	rbx, rax, QWORD PTR [r9+80]
        adcx	rsi, rax
        adox	rbx, r12
        ; No store %r15 - %r9
        ; Zero into %r9
        ; A[11] x A[0]
        mulx	r11, rax, QWORD PTR [r9+88]
        adcx	rbx, rax
        adox	r11, r12
        ; No store %rbx - %r8
        ;  Carry
        adcx	r11, r12
        mov	r13, r12
        adcx	r13, r12
        adox	r13, r12
        mov	QWORD PTR [r8], r11
        ; Diagonal 2
        mov	r11, QWORD PTR [rbp+24]
        mov	r10, QWORD PTR [rbp+32]
        ; A[2] x A[1]
        mov	rdx, QWORD PTR [r9+8]
        mulx	rcx, rax, QWORD PTR [r9+16]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [rbp+24], r11
        mov	r11, QWORD PTR [rbp+40]
        ; A[3] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+24]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbp+32], r10
        mov	r10, QWORD PTR [rbp+48]
        ; A[4] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+32]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [rbp+40], r11
        ; No load %r12 - %r9
        ; A[5] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+40]
        adcx	r10, rax
        adox	r14, rcx
        mov	QWORD PTR [rbp+48], r10
        ; No load %r13 - %r8
        ; A[6] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	r14, rax
        adox	r15, rcx
        ; No store %r12 - %r9
        ; No load %r14 - %r9
        ; A[7] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	r15, rax
        adox	rdi, rcx
        ; No store %r13 - %r8
        ; No load %r15 - %r8
        ; A[8] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	rdi, rax
        adox	rsi, rcx
        ; No store %r14 - %r9
        ; No load %rbx - %r9
        ; A[9] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r15 - %r8
        mov	r10, QWORD PTR [r8]
        ; A[10] x A[1]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	rbx, rax
        adox	r10, rcx
        ; No store %rbx - %r9
        ; Zero into %r9
        ; A[11] x A[1]
        mulx	r11, rax, QWORD PTR [r9+88]
        adcx	r10, rax
        adox	r11, r12
        mov	QWORD PTR [r8], r10
        ; Zero into %r8
        ; A[11] x A[2]
        mov	rdx, QWORD PTR [r9+16]
        mulx	r10, rax, QWORD PTR [r9+88]
        adcx	r11, rax
        adox	r10, r12
        mov	QWORD PTR [r8+8], r11
        ;  Carry
        adcx	r10, r13
        mov	r13, r12
        adcx	r13, r12
        adox	r13, r12
        mov	QWORD PTR [r8+16], r10
        ; Diagonal 3
        mov	r10, QWORD PTR [rbp+40]
        mov	r11, QWORD PTR [rbp+48]
        ; A[3] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+24]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [rbp+40], r10
        ; No load %r12 - %r8
        ; A[4] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+32]
        adcx	r11, rax
        adox	r14, rcx
        mov	QWORD PTR [rbp+48], r11
        ; No load %r13 - %r9
        ; A[5] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+40]
        adcx	r14, rax
        adox	r15, rcx
        ; No store %r12 - %r8
        ; No load %r14 - %r8
        ; A[6] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	r15, rax
        adox	rdi, rcx
        ; No store %r13 - %r9
        ; No load %r15 - %r9
        ; A[7] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	rdi, rax
        adox	rsi, rcx
        ; No store %r14 - %r8
        ; No load %rbx - %r8
        ; A[8] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r15 - %r9
        mov	r11, QWORD PTR [r8]
        ; A[9] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	rbx, rax
        adox	r11, rcx
        ; No store %rbx - %r8
        mov	r10, QWORD PTR [r8+8]
        ; A[10] x A[2]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8], r11
        mov	r11, QWORD PTR [r8+16]
        ; A[10] x A[3]
        mov	rdx, QWORD PTR [r9+24]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+8], r10
        ; Zero into %r8
        ; A[10] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	r10, rax, QWORD PTR [r9+80]
        adcx	r11, rax
        adox	r10, r12
        mov	QWORD PTR [r8+16], r11
        ; Zero into %r9
        ; A[10] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	r11, rax, QWORD PTR [r9+80]
        adcx	r10, rax
        adox	r11, r12
        mov	QWORD PTR [r8+24], r10
        ;  Carry
        adcx	r11, r13
        mov	r13, r12
        adcx	r13, r12
        adox	r13, r12
        mov	QWORD PTR [r8+32], r11
        ; Diagonal 4
        ; No load %r13 - %r8
        ; A[4] x A[3]
        mov	rdx, QWORD PTR [r9+24]
        mulx	rcx, rax, QWORD PTR [r9+32]
        adcx	r14, rax
        adox	r15, rcx
        ; No store %r12 - %r9
        ; No load %r14 - %r9
        ; A[5] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+40]
        adcx	r15, rax
        adox	rdi, rcx
        ; No store %r13 - %r8
        ; No load %r15 - %r8
        ; A[6] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	rdi, rax
        adox	rsi, rcx
        ; No store %r14 - %r9
        ; No load %rbx - %r9
        ; A[7] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r15 - %r8
        mov	r10, QWORD PTR [r8]
        ; A[8] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	rbx, rax
        adox	r10, rcx
        ; No store %rbx - %r9
        mov	r11, QWORD PTR [r8+8]
        ; A[9] x A[3]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	r10, QWORD PTR [r8+16]
        ; A[9] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+8], r11
        mov	r11, QWORD PTR [r8+24]
        ; A[9] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+16], r10
        mov	r10, QWORD PTR [r8+32]
        ; A[9] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+72]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+24], r11
        ; Zero into %r9
        ; A[9] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	r11, rax, QWORD PTR [r9+72]
        adcx	r10, rax
        adox	r11, r12
        mov	QWORD PTR [r8+32], r10
        ; Zero into %r8
        ; A[9] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	r10, rax, QWORD PTR [r9+72]
        adcx	r11, rax
        adox	r10, r12
        mov	QWORD PTR [r8+40], r11
        ;  Carry
        adcx	r10, r13
        mov	r13, r12
        adcx	r13, r12
        adox	r13, r12
        mov	QWORD PTR [r8+48], r10
        ; Diagonal 5
        ; No load %r15 - %r9
        ; A[5] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	rcx, rax, QWORD PTR [r9+40]
        adcx	rdi, rax
        adox	rsi, rcx
        ; No store %r14 - %r8
        ; No load %rbx - %r8
        ; A[6] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	rsi, rax
        adox	rbx, rcx
        ; No store %r15 - %r9
        mov	r11, QWORD PTR [r8]
        ; A[7] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	rbx, rax
        adox	r11, rcx
        ; No store %rbx - %r8
        mov	r10, QWORD PTR [r8+8]
        ; A[8] x A[4]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8], r11
        mov	r11, QWORD PTR [r8+16]
        ; A[8] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+8], r10
        mov	r10, QWORD PTR [r8+24]
        ; A[8] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+16], r11
        mov	r11, QWORD PTR [r8+32]
        ; A[8] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+64]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+24], r10
        mov	r10, QWORD PTR [r8+40]
        ; A[10] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+32], r11
        mov	r11, QWORD PTR [r8+48]
        ; A[10] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+80]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+40], r10
        ; Zero into %r8
        ; A[10] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	r10, rax, QWORD PTR [r9+80]
        adcx	r11, rax
        adox	r10, r12
        mov	QWORD PTR [r8+48], r11
        ; Zero into %r9
        ; A[10] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	r11, rax, QWORD PTR [r9+80]
        adcx	r10, rax
        adox	r11, r12
        mov	QWORD PTR [r8+56], r10
        ;  Carry
        adcx	r11, r13
        mov	r13, r12
        adcx	r13, r12
        adox	r13, r12
        mov	QWORD PTR [r8+64], r11
        ; Diagonal 6
        mov	r10, QWORD PTR [r8]
        ; A[6] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, QWORD PTR [r9+48]
        adcx	rbx, rax
        adox	r10, rcx
        ; No store %rbx - %r9
        mov	r11, QWORD PTR [r8+8]
        ; A[7] x A[5]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	r10, QWORD PTR [r8+16]
        ; A[7] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+56]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+8], r11
        mov	r11, QWORD PTR [r8+24]
        ; A[11] x A[3]
        mov	rdx, QWORD PTR [r9+24]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+16], r10
        mov	r10, QWORD PTR [r8+32]
        ; A[11] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+24], r11
        mov	r11, QWORD PTR [r8+40]
        ; A[11] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+32], r10
        mov	r10, QWORD PTR [r8+48]
        ; A[11] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+40], r11
        mov	r11, QWORD PTR [r8+56]
        ; A[11] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r10, rax
        adox	r11, rcx
        mov	QWORD PTR [r8+48], r10
        mov	r10, QWORD PTR [r8+64]
        ; A[11] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, QWORD PTR [r9+88]
        adcx	r11, rax
        adox	r10, rcx
        mov	QWORD PTR [r8+56], r11
        ; Zero into %r9
        ; A[11] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	r11, rax, QWORD PTR [r9+88]
        adcx	r10, rax
        adox	r11, r12
        mov	QWORD PTR [r8+64], r10
        ; Zero into %r8
        ; A[11] x A[10]
        mov	rdx, QWORD PTR [r9+80]
        mulx	r10, rax, QWORD PTR [r9+88]
        adcx	r11, rax
        adox	r10, r12
        mov	QWORD PTR [r8+72], r11
        ;  Carry
        adcx	r10, r13
        mov	r13, r12
        adcx	r13, r12
        adox	r13, r12
        mov	QWORD PTR [r8+80], r10
        mov	QWORD PTR [r8+88], r13
        ; Double and Add in A[i] x A[i]
        mov	r11, QWORD PTR [rbp+8]
        ; A[0] x A[0]
        mov	rdx, QWORD PTR [r9]
        mulx	rcx, rax, rdx
        mov	QWORD PTR [rbp], rax
        adox	r11, r11
        adcx	r11, rcx
        mov	QWORD PTR [rbp+8], r11
        mov	r10, QWORD PTR [rbp+16]
        mov	r11, QWORD PTR [rbp+24]
        ; A[1] x A[1]
        mov	rdx, QWORD PTR [r9+8]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [rbp+16], r10
        mov	QWORD PTR [rbp+24], r11
        mov	r10, QWORD PTR [rbp+32]
        mov	r11, QWORD PTR [rbp+40]
        ; A[2] x A[2]
        mov	rdx, QWORD PTR [r9+16]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [rbp+32], r10
        mov	QWORD PTR [rbp+40], r11
        mov	r10, QWORD PTR [rbp+48]
        ; A[3] x A[3]
        mov	rdx, QWORD PTR [r9+24]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r14, r14
        adcx	r10, rax
        adcx	r14, rcx
        mov	QWORD PTR [rbp+48], r10
        ; A[4] x A[4]
        mov	rdx, QWORD PTR [r9+32]
        mulx	rcx, rax, rdx
        adox	r15, r15
        adox	rdi, rdi
        adcx	r15, rax
        adcx	rdi, rcx
        ; A[5] x A[5]
        mov	rdx, QWORD PTR [r9+40]
        mulx	rcx, rax, rdx
        adox	rsi, rsi
        adox	rbx, rbx
        adcx	rsi, rax
        adcx	rbx, rcx
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        ; A[6] x A[6]
        mov	rdx, QWORD PTR [r9+48]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8], r10
        mov	QWORD PTR [r8+8], r11
        mov	r10, QWORD PTR [r8+16]
        mov	r11, QWORD PTR [r8+24]
        ; A[7] x A[7]
        mov	rdx, QWORD PTR [r9+56]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+16], r10
        mov	QWORD PTR [r8+24], r11
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        ; A[8] x A[8]
        mov	rdx, QWORD PTR [r9+64]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+32], r10
        mov	QWORD PTR [r8+40], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        ; A[9] x A[9]
        mov	rdx, QWORD PTR [r9+72]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+48], r10
        mov	QWORD PTR [r8+56], r11
        mov	r10, QWORD PTR [r8+64]
        mov	r11, QWORD PTR [r8+72]
        ; A[10] x A[10]
        mov	rdx, QWORD PTR [r9+80]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+64], r10
        mov	QWORD PTR [r8+72], r11
        mov	r10, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [r8+88]
        ; A[11] x A[11]
        mov	rdx, QWORD PTR [r9+88]
        mulx	rcx, rax, rdx
        adox	r10, r10
        adox	r11, r11
        adcx	r10, rax
        adcx	r11, rcx
        mov	QWORD PTR [r8+80], r10
        mov	QWORD PTR [r8+88], r11
        mov	QWORD PTR [r8+-40], r14
        mov	QWORD PTR [r8+-32], r15
        mov	QWORD PTR [r8+-24], rdi
        mov	QWORD PTR [r8+-16], rsi
        mov	QWORD PTR [r8+-8], rbx
        sub	r8, 96
        cmp	r9, r8
        jne	L_end_3072_sqr_avx2_12
        vmovdqu	xmm0, OWORD PTR [rbp]
        vmovups	OWORD PTR [r8], xmm0
        vmovdqu	xmm0, OWORD PTR [rbp+16]
        vmovups	OWORD PTR [r8+16], xmm0
        vmovdqu	xmm0, OWORD PTR [rbp+32]
        vmovups	OWORD PTR [r8+32], xmm0
        mov	rax, QWORD PTR [rbp+48]
        mov	QWORD PTR [r8+48], rax
L_end_3072_sqr_avx2_12:
        add	rsp, 96
        pop	rbx
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        pop	rbp
        ret
sp_3072_sqr_avx2_12 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
;  *
;  * Karatsuba: ah^2, al^2, (al - ah)^2
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_sqr_24 PROC
        sub	rsp, 208
        mov	QWORD PTR [rsp+192], rcx
        mov	QWORD PTR [rsp+200], rdx
        mov	r9, 0
        mov	r10, rsp
        lea	r11, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [rdx]
        sub	rax, QWORD PTR [r11]
        mov	r8, QWORD PTR [rdx+8]
        mov	QWORD PTR [r10], rax
        sbb	r8, QWORD PTR [r11+8]
        mov	rax, QWORD PTR [rdx+16]
        mov	QWORD PTR [r10+8], r8
        sbb	rax, QWORD PTR [r11+16]
        mov	r8, QWORD PTR [rdx+24]
        mov	QWORD PTR [r10+16], rax
        sbb	r8, QWORD PTR [r11+24]
        mov	rax, QWORD PTR [rdx+32]
        mov	QWORD PTR [r10+24], r8
        sbb	rax, QWORD PTR [r11+32]
        mov	r8, QWORD PTR [rdx+40]
        mov	QWORD PTR [r10+32], rax
        sbb	r8, QWORD PTR [r11+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r10+40], r8
        sbb	rax, QWORD PTR [r11+48]
        mov	r8, QWORD PTR [rdx+56]
        mov	QWORD PTR [r10+48], rax
        sbb	r8, QWORD PTR [r11+56]
        mov	rax, QWORD PTR [rdx+64]
        mov	QWORD PTR [r10+56], r8
        sbb	rax, QWORD PTR [r11+64]
        mov	r8, QWORD PTR [rdx+72]
        mov	QWORD PTR [r10+64], rax
        sbb	r8, QWORD PTR [r11+72]
        mov	rax, QWORD PTR [rdx+80]
        mov	QWORD PTR [r10+72], r8
        sbb	rax, QWORD PTR [r11+80]
        mov	r8, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+80], rax
        sbb	r8, QWORD PTR [r11+88]
        mov	QWORD PTR [r10+88], r8
        sbb	r9, 0
        ; Cond Negate
        mov	rax, QWORD PTR [r10]
        mov	r11, r9
        xor	rax, r9
        neg	r11
        sub	rax, r9
        mov	r8, QWORD PTR [r10+8]
        sbb	r11, 0
        mov	QWORD PTR [r10], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+16]
        setc	r11b
        mov	QWORD PTR [r10+8], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+24]
        setc	r11b
        mov	QWORD PTR [r10+16], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+32]
        setc	r11b
        mov	QWORD PTR [r10+24], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+40]
        setc	r11b
        mov	QWORD PTR [r10+32], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+48]
        setc	r11b
        mov	QWORD PTR [r10+40], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+56]
        setc	r11b
        mov	QWORD PTR [r10+48], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+64]
        setc	r11b
        mov	QWORD PTR [r10+56], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+72]
        setc	r11b
        mov	QWORD PTR [r10+64], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+80]
        setc	r11b
        mov	QWORD PTR [r10+72], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+88]
        setc	r11b
        mov	QWORD PTR [r10+80], rax
        xor	r8, r9
        add	r8, r11
        mov	QWORD PTR [r10+88], r8
        mov	rdx, r10
        mov	rcx, rsp
        call	sp_3072_sqr_12
        mov	rdx, QWORD PTR [rsp+200]
        mov	rcx, QWORD PTR [rsp+192]
        add	rdx, 96
        add	rcx, 192
        call	sp_3072_sqr_12
        mov	rdx, QWORD PTR [rsp+200]
        mov	rcx, QWORD PTR [rsp+192]
        call	sp_3072_sqr_12
IFDEF _WIN64
        mov	rdx, QWORD PTR [rsp+200]
        mov	rcx, QWORD PTR [rsp+192]
ENDIF
        mov	rdx, QWORD PTR [rsp+192]
        lea	r10, QWORD PTR [rsp+96]
        add	rdx, 288
        mov	r9, 0
        mov	r8, QWORD PTR [r10+-96]
        sub	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+88], rax
        sbb	r9, 0
        sub	rdx, 192
        mov	r8, QWORD PTR [r10+-96]
        sub	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+88], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+192]
        neg	r9
        add	rcx, 192
        mov	r8, QWORD PTR [rcx+-96]
        sub	r8, QWORD PTR [r10+-96]
        mov	rax, QWORD PTR [rcx+-88]
        mov	QWORD PTR [rcx+-96], r8
        sbb	rax, QWORD PTR [r10+-88]
        mov	r8, QWORD PTR [rcx+-80]
        mov	QWORD PTR [rcx+-88], rax
        sbb	r8, QWORD PTR [r10+-80]
        mov	rax, QWORD PTR [rcx+-72]
        mov	QWORD PTR [rcx+-80], r8
        sbb	rax, QWORD PTR [r10+-72]
        mov	r8, QWORD PTR [rcx+-64]
        mov	QWORD PTR [rcx+-72], rax
        sbb	r8, QWORD PTR [r10+-64]
        mov	rax, QWORD PTR [rcx+-56]
        mov	QWORD PTR [rcx+-64], r8
        sbb	rax, QWORD PTR [r10+-56]
        mov	r8, QWORD PTR [rcx+-48]
        mov	QWORD PTR [rcx+-56], rax
        sbb	r8, QWORD PTR [r10+-48]
        mov	rax, QWORD PTR [rcx+-40]
        mov	QWORD PTR [rcx+-48], r8
        sbb	rax, QWORD PTR [r10+-40]
        mov	r8, QWORD PTR [rcx+-32]
        mov	QWORD PTR [rcx+-40], rax
        sbb	r8, QWORD PTR [r10+-32]
        mov	rax, QWORD PTR [rcx+-24]
        mov	QWORD PTR [rcx+-32], r8
        sbb	rax, QWORD PTR [r10+-24]
        mov	r8, QWORD PTR [rcx+-16]
        mov	QWORD PTR [rcx+-24], rax
        sbb	r8, QWORD PTR [r10+-16]
        mov	rax, QWORD PTR [rcx+-8]
        mov	QWORD PTR [rcx+-16], r8
        sbb	rax, QWORD PTR [r10+-8]
        mov	r8, QWORD PTR [rcx]
        mov	QWORD PTR [rcx+-8], rax
        sbb	r8, QWORD PTR [r10]
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	rax, QWORD PTR [r10+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        sbb	r8, QWORD PTR [r10+16]
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	rax, QWORD PTR [r10+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        sbb	r8, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	rax, QWORD PTR [r10+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        sbb	r8, QWORD PTR [r10+48]
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	rax, QWORD PTR [r10+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        sbb	r8, QWORD PTR [r10+64]
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	rax, QWORD PTR [r10+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        sbb	r8, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [rcx+88], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+192]
        add	rcx, 288
        ; Add in word
        mov	r8, QWORD PTR [rcx]
        add	r8, r9
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        adc	rax, 0
        mov	QWORD PTR [rcx+88], rax
        mov	rdx, QWORD PTR [rsp+200]
        mov	rcx, QWORD PTR [rsp+192]
        add	rsp, 208
        ret
sp_3072_sqr_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
;  *
;  * Karatsuba: ah^2, al^2, (al - ah)^2
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_24 PROC
        sub	rsp, 208
        mov	QWORD PTR [rsp+192], rcx
        mov	QWORD PTR [rsp+200], rdx
        mov	r9, 0
        mov	r10, rsp
        lea	r11, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [rdx]
        sub	rax, QWORD PTR [r11]
        mov	r8, QWORD PTR [rdx+8]
        mov	QWORD PTR [r10], rax
        sbb	r8, QWORD PTR [r11+8]
        mov	rax, QWORD PTR [rdx+16]
        mov	QWORD PTR [r10+8], r8
        sbb	rax, QWORD PTR [r11+16]
        mov	r8, QWORD PTR [rdx+24]
        mov	QWORD PTR [r10+16], rax
        sbb	r8, QWORD PTR [r11+24]
        mov	rax, QWORD PTR [rdx+32]
        mov	QWORD PTR [r10+24], r8
        sbb	rax, QWORD PTR [r11+32]
        mov	r8, QWORD PTR [rdx+40]
        mov	QWORD PTR [r10+32], rax
        sbb	r8, QWORD PTR [r11+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r10+40], r8
        sbb	rax, QWORD PTR [r11+48]
        mov	r8, QWORD PTR [rdx+56]
        mov	QWORD PTR [r10+48], rax
        sbb	r8, QWORD PTR [r11+56]
        mov	rax, QWORD PTR [rdx+64]
        mov	QWORD PTR [r10+56], r8
        sbb	rax, QWORD PTR [r11+64]
        mov	r8, QWORD PTR [rdx+72]
        mov	QWORD PTR [r10+64], rax
        sbb	r8, QWORD PTR [r11+72]
        mov	rax, QWORD PTR [rdx+80]
        mov	QWORD PTR [r10+72], r8
        sbb	rax, QWORD PTR [r11+80]
        mov	r8, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+80], rax
        sbb	r8, QWORD PTR [r11+88]
        mov	QWORD PTR [r10+88], r8
        sbb	r9, 0
        ; Cond Negate
        mov	rax, QWORD PTR [r10]
        mov	r11, r9
        xor	rax, r9
        neg	r11
        sub	rax, r9
        mov	r8, QWORD PTR [r10+8]
        sbb	r11, 0
        mov	QWORD PTR [r10], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+16]
        setc	r11b
        mov	QWORD PTR [r10+8], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+24]
        setc	r11b
        mov	QWORD PTR [r10+16], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+32]
        setc	r11b
        mov	QWORD PTR [r10+24], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+40]
        setc	r11b
        mov	QWORD PTR [r10+32], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+48]
        setc	r11b
        mov	QWORD PTR [r10+40], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+56]
        setc	r11b
        mov	QWORD PTR [r10+48], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+64]
        setc	r11b
        mov	QWORD PTR [r10+56], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+72]
        setc	r11b
        mov	QWORD PTR [r10+64], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+80]
        setc	r11b
        mov	QWORD PTR [r10+72], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+88]
        setc	r11b
        mov	QWORD PTR [r10+80], rax
        xor	r8, r9
        add	r8, r11
        mov	QWORD PTR [r10+88], r8
        mov	rdx, r10
        mov	rcx, rsp
        call	sp_3072_sqr_avx2_12
        mov	rdx, QWORD PTR [rsp+200]
        mov	rcx, QWORD PTR [rsp+192]
        add	rdx, 96
        add	rcx, 192
        call	sp_3072_sqr_avx2_12
        mov	rdx, QWORD PTR [rsp+200]
        mov	rcx, QWORD PTR [rsp+192]
        call	sp_3072_sqr_avx2_12
IFDEF _WIN64
        mov	rdx, QWORD PTR [rsp+200]
        mov	rcx, QWORD PTR [rsp+192]
ENDIF
        mov	rdx, QWORD PTR [rsp+192]
        lea	r10, QWORD PTR [rsp+96]
        add	rdx, 288
        mov	r9, 0
        mov	r8, QWORD PTR [r10+-96]
        sub	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+88], rax
        sbb	r9, 0
        sub	rdx, 192
        mov	r8, QWORD PTR [r10+-96]
        sub	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+88], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+192]
        neg	r9
        add	rcx, 192
        mov	r8, QWORD PTR [rcx+-96]
        sub	r8, QWORD PTR [r10+-96]
        mov	rax, QWORD PTR [rcx+-88]
        mov	QWORD PTR [rcx+-96], r8
        sbb	rax, QWORD PTR [r10+-88]
        mov	r8, QWORD PTR [rcx+-80]
        mov	QWORD PTR [rcx+-88], rax
        sbb	r8, QWORD PTR [r10+-80]
        mov	rax, QWORD PTR [rcx+-72]
        mov	QWORD PTR [rcx+-80], r8
        sbb	rax, QWORD PTR [r10+-72]
        mov	r8, QWORD PTR [rcx+-64]
        mov	QWORD PTR [rcx+-72], rax
        sbb	r8, QWORD PTR [r10+-64]
        mov	rax, QWORD PTR [rcx+-56]
        mov	QWORD PTR [rcx+-64], r8
        sbb	rax, QWORD PTR [r10+-56]
        mov	r8, QWORD PTR [rcx+-48]
        mov	QWORD PTR [rcx+-56], rax
        sbb	r8, QWORD PTR [r10+-48]
        mov	rax, QWORD PTR [rcx+-40]
        mov	QWORD PTR [rcx+-48], r8
        sbb	rax, QWORD PTR [r10+-40]
        mov	r8, QWORD PTR [rcx+-32]
        mov	QWORD PTR [rcx+-40], rax
        sbb	r8, QWORD PTR [r10+-32]
        mov	rax, QWORD PTR [rcx+-24]
        mov	QWORD PTR [rcx+-32], r8
        sbb	rax, QWORD PTR [r10+-24]
        mov	r8, QWORD PTR [rcx+-16]
        mov	QWORD PTR [rcx+-24], rax
        sbb	r8, QWORD PTR [r10+-16]
        mov	rax, QWORD PTR [rcx+-8]
        mov	QWORD PTR [rcx+-16], r8
        sbb	rax, QWORD PTR [r10+-8]
        mov	r8, QWORD PTR [rcx]
        mov	QWORD PTR [rcx+-8], rax
        sbb	r8, QWORD PTR [r10]
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	rax, QWORD PTR [r10+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        sbb	r8, QWORD PTR [r10+16]
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	rax, QWORD PTR [r10+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        sbb	r8, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	rax, QWORD PTR [r10+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        sbb	r8, QWORD PTR [r10+48]
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	rax, QWORD PTR [r10+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        sbb	r8, QWORD PTR [r10+64]
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	rax, QWORD PTR [r10+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        sbb	r8, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [rcx+88], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+192]
        add	rcx, 288
        ; Add in word
        mov	r8, QWORD PTR [rcx]
        add	r8, r9
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        adc	rax, 0
        mov	QWORD PTR [rcx+88], rax
        mov	rdx, QWORD PTR [rsp+200]
        mov	rcx, QWORD PTR [rsp+192]
        add	rsp, 208
        ret
sp_3072_sqr_avx2_24 ENDP
_text ENDS
ENDIF
; /* Square a and put result in r. (r = a * a)
;  *
;  * Karatsuba: ah^2, al^2, (al - ah)^2
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_sqr_48 PROC
        sub	rsp, 400
        mov	QWORD PTR [rsp+384], rcx
        mov	QWORD PTR [rsp+392], rdx
        mov	r9, 0
        mov	r10, rsp
        lea	r11, QWORD PTR [rdx+192]
        mov	rax, QWORD PTR [rdx]
        sub	rax, QWORD PTR [r11]
        mov	r8, QWORD PTR [rdx+8]
        mov	QWORD PTR [r10], rax
        sbb	r8, QWORD PTR [r11+8]
        mov	rax, QWORD PTR [rdx+16]
        mov	QWORD PTR [r10+8], r8
        sbb	rax, QWORD PTR [r11+16]
        mov	r8, QWORD PTR [rdx+24]
        mov	QWORD PTR [r10+16], rax
        sbb	r8, QWORD PTR [r11+24]
        mov	rax, QWORD PTR [rdx+32]
        mov	QWORD PTR [r10+24], r8
        sbb	rax, QWORD PTR [r11+32]
        mov	r8, QWORD PTR [rdx+40]
        mov	QWORD PTR [r10+32], rax
        sbb	r8, QWORD PTR [r11+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r10+40], r8
        sbb	rax, QWORD PTR [r11+48]
        mov	r8, QWORD PTR [rdx+56]
        mov	QWORD PTR [r10+48], rax
        sbb	r8, QWORD PTR [r11+56]
        mov	rax, QWORD PTR [rdx+64]
        mov	QWORD PTR [r10+56], r8
        sbb	rax, QWORD PTR [r11+64]
        mov	r8, QWORD PTR [rdx+72]
        mov	QWORD PTR [r10+64], rax
        sbb	r8, QWORD PTR [r11+72]
        mov	rax, QWORD PTR [rdx+80]
        mov	QWORD PTR [r10+72], r8
        sbb	rax, QWORD PTR [r11+80]
        mov	r8, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+80], rax
        sbb	r8, QWORD PTR [r11+88]
        mov	rax, QWORD PTR [rdx+96]
        mov	QWORD PTR [r10+88], r8
        sbb	rax, QWORD PTR [r11+96]
        mov	r8, QWORD PTR [rdx+104]
        mov	QWORD PTR [r10+96], rax
        sbb	r8, QWORD PTR [r11+104]
        mov	rax, QWORD PTR [rdx+112]
        mov	QWORD PTR [r10+104], r8
        sbb	rax, QWORD PTR [r11+112]
        mov	r8, QWORD PTR [rdx+120]
        mov	QWORD PTR [r10+112], rax
        sbb	r8, QWORD PTR [r11+120]
        mov	rax, QWORD PTR [rdx+128]
        mov	QWORD PTR [r10+120], r8
        sbb	rax, QWORD PTR [r11+128]
        mov	r8, QWORD PTR [rdx+136]
        mov	QWORD PTR [r10+128], rax
        sbb	r8, QWORD PTR [r11+136]
        mov	rax, QWORD PTR [rdx+144]
        mov	QWORD PTR [r10+136], r8
        sbb	rax, QWORD PTR [r11+144]
        mov	r8, QWORD PTR [rdx+152]
        mov	QWORD PTR [r10+144], rax
        sbb	r8, QWORD PTR [r11+152]
        mov	rax, QWORD PTR [rdx+160]
        mov	QWORD PTR [r10+152], r8
        sbb	rax, QWORD PTR [r11+160]
        mov	r8, QWORD PTR [rdx+168]
        mov	QWORD PTR [r10+160], rax
        sbb	r8, QWORD PTR [r11+168]
        mov	rax, QWORD PTR [rdx+176]
        mov	QWORD PTR [r10+168], r8
        sbb	rax, QWORD PTR [r11+176]
        mov	r8, QWORD PTR [rdx+184]
        mov	QWORD PTR [r10+176], rax
        sbb	r8, QWORD PTR [r11+184]
        mov	QWORD PTR [r10+184], r8
        sbb	r9, 0
        ; Cond Negate
        mov	rax, QWORD PTR [r10]
        mov	r11, r9
        xor	rax, r9
        neg	r11
        sub	rax, r9
        mov	r8, QWORD PTR [r10+8]
        sbb	r11, 0
        mov	QWORD PTR [r10], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+16]
        setc	r11b
        mov	QWORD PTR [r10+8], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+24]
        setc	r11b
        mov	QWORD PTR [r10+16], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+32]
        setc	r11b
        mov	QWORD PTR [r10+24], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+40]
        setc	r11b
        mov	QWORD PTR [r10+32], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+48]
        setc	r11b
        mov	QWORD PTR [r10+40], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+56]
        setc	r11b
        mov	QWORD PTR [r10+48], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+64]
        setc	r11b
        mov	QWORD PTR [r10+56], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+72]
        setc	r11b
        mov	QWORD PTR [r10+64], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+80]
        setc	r11b
        mov	QWORD PTR [r10+72], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+88]
        setc	r11b
        mov	QWORD PTR [r10+80], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+96]
        setc	r11b
        mov	QWORD PTR [r10+88], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+104]
        setc	r11b
        mov	QWORD PTR [r10+96], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+112]
        setc	r11b
        mov	QWORD PTR [r10+104], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+120]
        setc	r11b
        mov	QWORD PTR [r10+112], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+128]
        setc	r11b
        mov	QWORD PTR [r10+120], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+136]
        setc	r11b
        mov	QWORD PTR [r10+128], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+144]
        setc	r11b
        mov	QWORD PTR [r10+136], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+152]
        setc	r11b
        mov	QWORD PTR [r10+144], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+160]
        setc	r11b
        mov	QWORD PTR [r10+152], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+168]
        setc	r11b
        mov	QWORD PTR [r10+160], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+176]
        setc	r11b
        mov	QWORD PTR [r10+168], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+184]
        setc	r11b
        mov	QWORD PTR [r10+176], rax
        xor	r8, r9
        add	r8, r11
        mov	QWORD PTR [r10+184], r8
        mov	rdx, r10
        mov	rcx, rsp
        call	sp_3072_sqr_24
        mov	rdx, QWORD PTR [rsp+392]
        mov	rcx, QWORD PTR [rsp+384]
        add	rdx, 192
        add	rcx, 384
        call	sp_3072_sqr_24
        mov	rdx, QWORD PTR [rsp+392]
        mov	rcx, QWORD PTR [rsp+384]
        call	sp_3072_sqr_24
IFDEF _WIN64
        mov	rdx, QWORD PTR [rsp+392]
        mov	rcx, QWORD PTR [rsp+384]
ENDIF
        mov	rdx, QWORD PTR [rsp+384]
        lea	r10, QWORD PTR [rsp+192]
        add	rdx, 576
        mov	r9, 0
        mov	r8, QWORD PTR [r10+-192]
        sub	r8, QWORD PTR [rdx+-192]
        mov	rax, QWORD PTR [r10+-184]
        mov	QWORD PTR [r10+-192], r8
        sbb	rax, QWORD PTR [rdx+-184]
        mov	r8, QWORD PTR [r10+-176]
        mov	QWORD PTR [r10+-184], rax
        sbb	r8, QWORD PTR [rdx+-176]
        mov	rax, QWORD PTR [r10+-168]
        mov	QWORD PTR [r10+-176], r8
        sbb	rax, QWORD PTR [rdx+-168]
        mov	r8, QWORD PTR [r10+-160]
        mov	QWORD PTR [r10+-168], rax
        sbb	r8, QWORD PTR [rdx+-160]
        mov	rax, QWORD PTR [r10+-152]
        mov	QWORD PTR [r10+-160], r8
        sbb	rax, QWORD PTR [rdx+-152]
        mov	r8, QWORD PTR [r10+-144]
        mov	QWORD PTR [r10+-152], rax
        sbb	r8, QWORD PTR [rdx+-144]
        mov	rax, QWORD PTR [r10+-136]
        mov	QWORD PTR [r10+-144], r8
        sbb	rax, QWORD PTR [rdx+-136]
        mov	r8, QWORD PTR [r10+-128]
        mov	QWORD PTR [r10+-136], rax
        sbb	r8, QWORD PTR [rdx+-128]
        mov	rax, QWORD PTR [r10+-120]
        mov	QWORD PTR [r10+-128], r8
        sbb	rax, QWORD PTR [rdx+-120]
        mov	r8, QWORD PTR [r10+-112]
        mov	QWORD PTR [r10+-120], rax
        sbb	r8, QWORD PTR [rdx+-112]
        mov	rax, QWORD PTR [r10+-104]
        mov	QWORD PTR [r10+-112], r8
        sbb	rax, QWORD PTR [rdx+-104]
        mov	r8, QWORD PTR [r10+-96]
        mov	QWORD PTR [r10+-104], rax
        sbb	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [r10+96]
        mov	QWORD PTR [r10+88], rax
        sbb	r8, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [r10+104]
        mov	QWORD PTR [r10+96], r8
        sbb	rax, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [r10+112]
        mov	QWORD PTR [r10+104], rax
        sbb	r8, QWORD PTR [rdx+112]
        mov	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [r10+112], r8
        sbb	rax, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [r10+128]
        mov	QWORD PTR [r10+120], rax
        sbb	r8, QWORD PTR [rdx+128]
        mov	rax, QWORD PTR [r10+136]
        mov	QWORD PTR [r10+128], r8
        sbb	rax, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [r10+144]
        mov	QWORD PTR [r10+136], rax
        sbb	r8, QWORD PTR [rdx+144]
        mov	rax, QWORD PTR [r10+152]
        mov	QWORD PTR [r10+144], r8
        sbb	rax, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [r10+160]
        mov	QWORD PTR [r10+152], rax
        sbb	r8, QWORD PTR [rdx+160]
        mov	rax, QWORD PTR [r10+168]
        mov	QWORD PTR [r10+160], r8
        sbb	rax, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [r10+176]
        mov	QWORD PTR [r10+168], rax
        sbb	r8, QWORD PTR [rdx+176]
        mov	rax, QWORD PTR [r10+184]
        mov	QWORD PTR [r10+176], r8
        sbb	rax, QWORD PTR [rdx+184]
        mov	QWORD PTR [r10+184], rax
        sbb	r9, 0
        sub	rdx, 384
        mov	r8, QWORD PTR [r10+-192]
        sub	r8, QWORD PTR [rdx+-192]
        mov	rax, QWORD PTR [r10+-184]
        mov	QWORD PTR [r10+-192], r8
        sbb	rax, QWORD PTR [rdx+-184]
        mov	r8, QWORD PTR [r10+-176]
        mov	QWORD PTR [r10+-184], rax
        sbb	r8, QWORD PTR [rdx+-176]
        mov	rax, QWORD PTR [r10+-168]
        mov	QWORD PTR [r10+-176], r8
        sbb	rax, QWORD PTR [rdx+-168]
        mov	r8, QWORD PTR [r10+-160]
        mov	QWORD PTR [r10+-168], rax
        sbb	r8, QWORD PTR [rdx+-160]
        mov	rax, QWORD PTR [r10+-152]
        mov	QWORD PTR [r10+-160], r8
        sbb	rax, QWORD PTR [rdx+-152]
        mov	r8, QWORD PTR [r10+-144]
        mov	QWORD PTR [r10+-152], rax
        sbb	r8, QWORD PTR [rdx+-144]
        mov	rax, QWORD PTR [r10+-136]
        mov	QWORD PTR [r10+-144], r8
        sbb	rax, QWORD PTR [rdx+-136]
        mov	r8, QWORD PTR [r10+-128]
        mov	QWORD PTR [r10+-136], rax
        sbb	r8, QWORD PTR [rdx+-128]
        mov	rax, QWORD PTR [r10+-120]
        mov	QWORD PTR [r10+-128], r8
        sbb	rax, QWORD PTR [rdx+-120]
        mov	r8, QWORD PTR [r10+-112]
        mov	QWORD PTR [r10+-120], rax
        sbb	r8, QWORD PTR [rdx+-112]
        mov	rax, QWORD PTR [r10+-104]
        mov	QWORD PTR [r10+-112], r8
        sbb	rax, QWORD PTR [rdx+-104]
        mov	r8, QWORD PTR [r10+-96]
        mov	QWORD PTR [r10+-104], rax
        sbb	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [r10+96]
        mov	QWORD PTR [r10+88], rax
        sbb	r8, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [r10+104]
        mov	QWORD PTR [r10+96], r8
        sbb	rax, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [r10+112]
        mov	QWORD PTR [r10+104], rax
        sbb	r8, QWORD PTR [rdx+112]
        mov	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [r10+112], r8
        sbb	rax, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [r10+128]
        mov	QWORD PTR [r10+120], rax
        sbb	r8, QWORD PTR [rdx+128]
        mov	rax, QWORD PTR [r10+136]
        mov	QWORD PTR [r10+128], r8
        sbb	rax, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [r10+144]
        mov	QWORD PTR [r10+136], rax
        sbb	r8, QWORD PTR [rdx+144]
        mov	rax, QWORD PTR [r10+152]
        mov	QWORD PTR [r10+144], r8
        sbb	rax, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [r10+160]
        mov	QWORD PTR [r10+152], rax
        sbb	r8, QWORD PTR [rdx+160]
        mov	rax, QWORD PTR [r10+168]
        mov	QWORD PTR [r10+160], r8
        sbb	rax, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [r10+176]
        mov	QWORD PTR [r10+168], rax
        sbb	r8, QWORD PTR [rdx+176]
        mov	rax, QWORD PTR [r10+184]
        mov	QWORD PTR [r10+176], r8
        sbb	rax, QWORD PTR [rdx+184]
        mov	QWORD PTR [r10+184], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+384]
        neg	r9
        add	rcx, 384
        mov	r8, QWORD PTR [rcx+-192]
        sub	r8, QWORD PTR [r10+-192]
        mov	rax, QWORD PTR [rcx+-184]
        mov	QWORD PTR [rcx+-192], r8
        sbb	rax, QWORD PTR [r10+-184]
        mov	r8, QWORD PTR [rcx+-176]
        mov	QWORD PTR [rcx+-184], rax
        sbb	r8, QWORD PTR [r10+-176]
        mov	rax, QWORD PTR [rcx+-168]
        mov	QWORD PTR [rcx+-176], r8
        sbb	rax, QWORD PTR [r10+-168]
        mov	r8, QWORD PTR [rcx+-160]
        mov	QWORD PTR [rcx+-168], rax
        sbb	r8, QWORD PTR [r10+-160]
        mov	rax, QWORD PTR [rcx+-152]
        mov	QWORD PTR [rcx+-160], r8
        sbb	rax, QWORD PTR [r10+-152]
        mov	r8, QWORD PTR [rcx+-144]
        mov	QWORD PTR [rcx+-152], rax
        sbb	r8, QWORD PTR [r10+-144]
        mov	rax, QWORD PTR [rcx+-136]
        mov	QWORD PTR [rcx+-144], r8
        sbb	rax, QWORD PTR [r10+-136]
        mov	r8, QWORD PTR [rcx+-128]
        mov	QWORD PTR [rcx+-136], rax
        sbb	r8, QWORD PTR [r10+-128]
        mov	rax, QWORD PTR [rcx+-120]
        mov	QWORD PTR [rcx+-128], r8
        sbb	rax, QWORD PTR [r10+-120]
        mov	r8, QWORD PTR [rcx+-112]
        mov	QWORD PTR [rcx+-120], rax
        sbb	r8, QWORD PTR [r10+-112]
        mov	rax, QWORD PTR [rcx+-104]
        mov	QWORD PTR [rcx+-112], r8
        sbb	rax, QWORD PTR [r10+-104]
        mov	r8, QWORD PTR [rcx+-96]
        mov	QWORD PTR [rcx+-104], rax
        sbb	r8, QWORD PTR [r10+-96]
        mov	rax, QWORD PTR [rcx+-88]
        mov	QWORD PTR [rcx+-96], r8
        sbb	rax, QWORD PTR [r10+-88]
        mov	r8, QWORD PTR [rcx+-80]
        mov	QWORD PTR [rcx+-88], rax
        sbb	r8, QWORD PTR [r10+-80]
        mov	rax, QWORD PTR [rcx+-72]
        mov	QWORD PTR [rcx+-80], r8
        sbb	rax, QWORD PTR [r10+-72]
        mov	r8, QWORD PTR [rcx+-64]
        mov	QWORD PTR [rcx+-72], rax
        sbb	r8, QWORD PTR [r10+-64]
        mov	rax, QWORD PTR [rcx+-56]
        mov	QWORD PTR [rcx+-64], r8
        sbb	rax, QWORD PTR [r10+-56]
        mov	r8, QWORD PTR [rcx+-48]
        mov	QWORD PTR [rcx+-56], rax
        sbb	r8, QWORD PTR [r10+-48]
        mov	rax, QWORD PTR [rcx+-40]
        mov	QWORD PTR [rcx+-48], r8
        sbb	rax, QWORD PTR [r10+-40]
        mov	r8, QWORD PTR [rcx+-32]
        mov	QWORD PTR [rcx+-40], rax
        sbb	r8, QWORD PTR [r10+-32]
        mov	rax, QWORD PTR [rcx+-24]
        mov	QWORD PTR [rcx+-32], r8
        sbb	rax, QWORD PTR [r10+-24]
        mov	r8, QWORD PTR [rcx+-16]
        mov	QWORD PTR [rcx+-24], rax
        sbb	r8, QWORD PTR [r10+-16]
        mov	rax, QWORD PTR [rcx+-8]
        mov	QWORD PTR [rcx+-16], r8
        sbb	rax, QWORD PTR [r10+-8]
        mov	r8, QWORD PTR [rcx]
        mov	QWORD PTR [rcx+-8], rax
        sbb	r8, QWORD PTR [r10]
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	rax, QWORD PTR [r10+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        sbb	r8, QWORD PTR [r10+16]
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	rax, QWORD PTR [r10+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        sbb	r8, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	rax, QWORD PTR [r10+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        sbb	r8, QWORD PTR [r10+48]
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	rax, QWORD PTR [r10+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        sbb	r8, QWORD PTR [r10+64]
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	rax, QWORD PTR [r10+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        sbb	r8, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	rax, QWORD PTR [r10+88]
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], rax
        sbb	r8, QWORD PTR [r10+96]
        mov	rax, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        sbb	rax, QWORD PTR [r10+104]
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], rax
        sbb	r8, QWORD PTR [r10+112]
        mov	rax, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        sbb	rax, QWORD PTR [r10+120]
        mov	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rcx+120], rax
        sbb	r8, QWORD PTR [r10+128]
        mov	rax, QWORD PTR [rcx+136]
        mov	QWORD PTR [rcx+128], r8
        sbb	rax, QWORD PTR [r10+136]
        mov	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rcx+136], rax
        sbb	r8, QWORD PTR [r10+144]
        mov	rax, QWORD PTR [rcx+152]
        mov	QWORD PTR [rcx+144], r8
        sbb	rax, QWORD PTR [r10+152]
        mov	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rcx+152], rax
        sbb	r8, QWORD PTR [r10+160]
        mov	rax, QWORD PTR [rcx+168]
        mov	QWORD PTR [rcx+160], r8
        sbb	rax, QWORD PTR [r10+168]
        mov	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rcx+168], rax
        sbb	r8, QWORD PTR [r10+176]
        mov	rax, QWORD PTR [rcx+184]
        mov	QWORD PTR [rcx+176], r8
        sbb	rax, QWORD PTR [r10+184]
        mov	QWORD PTR [rcx+184], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+384]
        add	rcx, 576
        ; Add in word
        mov	r8, QWORD PTR [rcx]
        add	r8, r9
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rcx+120], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+136]
        mov	QWORD PTR [rcx+128], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rcx+136], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+152]
        mov	QWORD PTR [rcx+144], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rcx+152], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+168]
        mov	QWORD PTR [rcx+160], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rcx+168], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+184]
        mov	QWORD PTR [rcx+176], r8
        adc	rax, 0
        mov	QWORD PTR [rcx+184], rax
        mov	rdx, QWORD PTR [rsp+392]
        mov	rcx, QWORD PTR [rsp+384]
        add	rsp, 400
        ret
sp_3072_sqr_48 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
;  *
;  * Karatsuba: ah^2, al^2, (al - ah)^2
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_48 PROC
        sub	rsp, 400
        mov	QWORD PTR [rsp+384], rcx
        mov	QWORD PTR [rsp+392], rdx
        mov	r9, 0
        mov	r10, rsp
        lea	r11, QWORD PTR [rdx+192]
        mov	rax, QWORD PTR [rdx]
        sub	rax, QWORD PTR [r11]
        mov	r8, QWORD PTR [rdx+8]
        mov	QWORD PTR [r10], rax
        sbb	r8, QWORD PTR [r11+8]
        mov	rax, QWORD PTR [rdx+16]
        mov	QWORD PTR [r10+8], r8
        sbb	rax, QWORD PTR [r11+16]
        mov	r8, QWORD PTR [rdx+24]
        mov	QWORD PTR [r10+16], rax
        sbb	r8, QWORD PTR [r11+24]
        mov	rax, QWORD PTR [rdx+32]
        mov	QWORD PTR [r10+24], r8
        sbb	rax, QWORD PTR [r11+32]
        mov	r8, QWORD PTR [rdx+40]
        mov	QWORD PTR [r10+32], rax
        sbb	r8, QWORD PTR [r11+40]
        mov	rax, QWORD PTR [rdx+48]
        mov	QWORD PTR [r10+40], r8
        sbb	rax, QWORD PTR [r11+48]
        mov	r8, QWORD PTR [rdx+56]
        mov	QWORD PTR [r10+48], rax
        sbb	r8, QWORD PTR [r11+56]
        mov	rax, QWORD PTR [rdx+64]
        mov	QWORD PTR [r10+56], r8
        sbb	rax, QWORD PTR [r11+64]
        mov	r8, QWORD PTR [rdx+72]
        mov	QWORD PTR [r10+64], rax
        sbb	r8, QWORD PTR [r11+72]
        mov	rax, QWORD PTR [rdx+80]
        mov	QWORD PTR [r10+72], r8
        sbb	rax, QWORD PTR [r11+80]
        mov	r8, QWORD PTR [rdx+88]
        mov	QWORD PTR [r10+80], rax
        sbb	r8, QWORD PTR [r11+88]
        mov	rax, QWORD PTR [rdx+96]
        mov	QWORD PTR [r10+88], r8
        sbb	rax, QWORD PTR [r11+96]
        mov	r8, QWORD PTR [rdx+104]
        mov	QWORD PTR [r10+96], rax
        sbb	r8, QWORD PTR [r11+104]
        mov	rax, QWORD PTR [rdx+112]
        mov	QWORD PTR [r10+104], r8
        sbb	rax, QWORD PTR [r11+112]
        mov	r8, QWORD PTR [rdx+120]
        mov	QWORD PTR [r10+112], rax
        sbb	r8, QWORD PTR [r11+120]
        mov	rax, QWORD PTR [rdx+128]
        mov	QWORD PTR [r10+120], r8
        sbb	rax, QWORD PTR [r11+128]
        mov	r8, QWORD PTR [rdx+136]
        mov	QWORD PTR [r10+128], rax
        sbb	r8, QWORD PTR [r11+136]
        mov	rax, QWORD PTR [rdx+144]
        mov	QWORD PTR [r10+136], r8
        sbb	rax, QWORD PTR [r11+144]
        mov	r8, QWORD PTR [rdx+152]
        mov	QWORD PTR [r10+144], rax
        sbb	r8, QWORD PTR [r11+152]
        mov	rax, QWORD PTR [rdx+160]
        mov	QWORD PTR [r10+152], r8
        sbb	rax, QWORD PTR [r11+160]
        mov	r8, QWORD PTR [rdx+168]
        mov	QWORD PTR [r10+160], rax
        sbb	r8, QWORD PTR [r11+168]
        mov	rax, QWORD PTR [rdx+176]
        mov	QWORD PTR [r10+168], r8
        sbb	rax, QWORD PTR [r11+176]
        mov	r8, QWORD PTR [rdx+184]
        mov	QWORD PTR [r10+176], rax
        sbb	r8, QWORD PTR [r11+184]
        mov	QWORD PTR [r10+184], r8
        sbb	r9, 0
        ; Cond Negate
        mov	rax, QWORD PTR [r10]
        mov	r11, r9
        xor	rax, r9
        neg	r11
        sub	rax, r9
        mov	r8, QWORD PTR [r10+8]
        sbb	r11, 0
        mov	QWORD PTR [r10], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+16]
        setc	r11b
        mov	QWORD PTR [r10+8], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+24]
        setc	r11b
        mov	QWORD PTR [r10+16], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+32]
        setc	r11b
        mov	QWORD PTR [r10+24], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+40]
        setc	r11b
        mov	QWORD PTR [r10+32], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+48]
        setc	r11b
        mov	QWORD PTR [r10+40], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+56]
        setc	r11b
        mov	QWORD PTR [r10+48], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+64]
        setc	r11b
        mov	QWORD PTR [r10+56], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+72]
        setc	r11b
        mov	QWORD PTR [r10+64], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+80]
        setc	r11b
        mov	QWORD PTR [r10+72], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+88]
        setc	r11b
        mov	QWORD PTR [r10+80], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+96]
        setc	r11b
        mov	QWORD PTR [r10+88], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+104]
        setc	r11b
        mov	QWORD PTR [r10+96], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+112]
        setc	r11b
        mov	QWORD PTR [r10+104], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+120]
        setc	r11b
        mov	QWORD PTR [r10+112], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+128]
        setc	r11b
        mov	QWORD PTR [r10+120], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+136]
        setc	r11b
        mov	QWORD PTR [r10+128], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+144]
        setc	r11b
        mov	QWORD PTR [r10+136], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+152]
        setc	r11b
        mov	QWORD PTR [r10+144], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+160]
        setc	r11b
        mov	QWORD PTR [r10+152], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+168]
        setc	r11b
        mov	QWORD PTR [r10+160], rax
        xor	r8, r9
        add	r8, r11
        mov	rax, QWORD PTR [r10+176]
        setc	r11b
        mov	QWORD PTR [r10+168], r8
        xor	rax, r9
        add	rax, r11
        mov	r8, QWORD PTR [r10+184]
        setc	r11b
        mov	QWORD PTR [r10+176], rax
        xor	r8, r9
        add	r8, r11
        mov	QWORD PTR [r10+184], r8
        mov	rdx, r10
        mov	rcx, rsp
        call	sp_3072_sqr_avx2_24
        mov	rdx, QWORD PTR [rsp+392]
        mov	rcx, QWORD PTR [rsp+384]
        add	rdx, 192
        add	rcx, 384
        call	sp_3072_sqr_avx2_24
        mov	rdx, QWORD PTR [rsp+392]
        mov	rcx, QWORD PTR [rsp+384]
        call	sp_3072_sqr_avx2_24
IFDEF _WIN64
        mov	rdx, QWORD PTR [rsp+392]
        mov	rcx, QWORD PTR [rsp+384]
ENDIF
        mov	rdx, QWORD PTR [rsp+384]
        lea	r10, QWORD PTR [rsp+192]
        add	rdx, 576
        mov	r9, 0
        mov	r8, QWORD PTR [r10+-192]
        sub	r8, QWORD PTR [rdx+-192]
        mov	rax, QWORD PTR [r10+-184]
        mov	QWORD PTR [r10+-192], r8
        sbb	rax, QWORD PTR [rdx+-184]
        mov	r8, QWORD PTR [r10+-176]
        mov	QWORD PTR [r10+-184], rax
        sbb	r8, QWORD PTR [rdx+-176]
        mov	rax, QWORD PTR [r10+-168]
        mov	QWORD PTR [r10+-176], r8
        sbb	rax, QWORD PTR [rdx+-168]
        mov	r8, QWORD PTR [r10+-160]
        mov	QWORD PTR [r10+-168], rax
        sbb	r8, QWORD PTR [rdx+-160]
        mov	rax, QWORD PTR [r10+-152]
        mov	QWORD PTR [r10+-160], r8
        sbb	rax, QWORD PTR [rdx+-152]
        mov	r8, QWORD PTR [r10+-144]
        mov	QWORD PTR [r10+-152], rax
        sbb	r8, QWORD PTR [rdx+-144]
        mov	rax, QWORD PTR [r10+-136]
        mov	QWORD PTR [r10+-144], r8
        sbb	rax, QWORD PTR [rdx+-136]
        mov	r8, QWORD PTR [r10+-128]
        mov	QWORD PTR [r10+-136], rax
        sbb	r8, QWORD PTR [rdx+-128]
        mov	rax, QWORD PTR [r10+-120]
        mov	QWORD PTR [r10+-128], r8
        sbb	rax, QWORD PTR [rdx+-120]
        mov	r8, QWORD PTR [r10+-112]
        mov	QWORD PTR [r10+-120], rax
        sbb	r8, QWORD PTR [rdx+-112]
        mov	rax, QWORD PTR [r10+-104]
        mov	QWORD PTR [r10+-112], r8
        sbb	rax, QWORD PTR [rdx+-104]
        mov	r8, QWORD PTR [r10+-96]
        mov	QWORD PTR [r10+-104], rax
        sbb	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [r10+96]
        mov	QWORD PTR [r10+88], rax
        sbb	r8, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [r10+104]
        mov	QWORD PTR [r10+96], r8
        sbb	rax, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [r10+112]
        mov	QWORD PTR [r10+104], rax
        sbb	r8, QWORD PTR [rdx+112]
        mov	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [r10+112], r8
        sbb	rax, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [r10+128]
        mov	QWORD PTR [r10+120], rax
        sbb	r8, QWORD PTR [rdx+128]
        mov	rax, QWORD PTR [r10+136]
        mov	QWORD PTR [r10+128], r8
        sbb	rax, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [r10+144]
        mov	QWORD PTR [r10+136], rax
        sbb	r8, QWORD PTR [rdx+144]
        mov	rax, QWORD PTR [r10+152]
        mov	QWORD PTR [r10+144], r8
        sbb	rax, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [r10+160]
        mov	QWORD PTR [r10+152], rax
        sbb	r8, QWORD PTR [rdx+160]
        mov	rax, QWORD PTR [r10+168]
        mov	QWORD PTR [r10+160], r8
        sbb	rax, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [r10+176]
        mov	QWORD PTR [r10+168], rax
        sbb	r8, QWORD PTR [rdx+176]
        mov	rax, QWORD PTR [r10+184]
        mov	QWORD PTR [r10+176], r8
        sbb	rax, QWORD PTR [rdx+184]
        mov	QWORD PTR [r10+184], rax
        sbb	r9, 0
        sub	rdx, 384
        mov	r8, QWORD PTR [r10+-192]
        sub	r8, QWORD PTR [rdx+-192]
        mov	rax, QWORD PTR [r10+-184]
        mov	QWORD PTR [r10+-192], r8
        sbb	rax, QWORD PTR [rdx+-184]
        mov	r8, QWORD PTR [r10+-176]
        mov	QWORD PTR [r10+-184], rax
        sbb	r8, QWORD PTR [rdx+-176]
        mov	rax, QWORD PTR [r10+-168]
        mov	QWORD PTR [r10+-176], r8
        sbb	rax, QWORD PTR [rdx+-168]
        mov	r8, QWORD PTR [r10+-160]
        mov	QWORD PTR [r10+-168], rax
        sbb	r8, QWORD PTR [rdx+-160]
        mov	rax, QWORD PTR [r10+-152]
        mov	QWORD PTR [r10+-160], r8
        sbb	rax, QWORD PTR [rdx+-152]
        mov	r8, QWORD PTR [r10+-144]
        mov	QWORD PTR [r10+-152], rax
        sbb	r8, QWORD PTR [rdx+-144]
        mov	rax, QWORD PTR [r10+-136]
        mov	QWORD PTR [r10+-144], r8
        sbb	rax, QWORD PTR [rdx+-136]
        mov	r8, QWORD PTR [r10+-128]
        mov	QWORD PTR [r10+-136], rax
        sbb	r8, QWORD PTR [rdx+-128]
        mov	rax, QWORD PTR [r10+-120]
        mov	QWORD PTR [r10+-128], r8
        sbb	rax, QWORD PTR [rdx+-120]
        mov	r8, QWORD PTR [r10+-112]
        mov	QWORD PTR [r10+-120], rax
        sbb	r8, QWORD PTR [rdx+-112]
        mov	rax, QWORD PTR [r10+-104]
        mov	QWORD PTR [r10+-112], r8
        sbb	rax, QWORD PTR [rdx+-104]
        mov	r8, QWORD PTR [r10+-96]
        mov	QWORD PTR [r10+-104], rax
        sbb	r8, QWORD PTR [rdx+-96]
        mov	rax, QWORD PTR [r10+-88]
        mov	QWORD PTR [r10+-96], r8
        sbb	rax, QWORD PTR [rdx+-88]
        mov	r8, QWORD PTR [r10+-80]
        mov	QWORD PTR [r10+-88], rax
        sbb	r8, QWORD PTR [rdx+-80]
        mov	rax, QWORD PTR [r10+-72]
        mov	QWORD PTR [r10+-80], r8
        sbb	rax, QWORD PTR [rdx+-72]
        mov	r8, QWORD PTR [r10+-64]
        mov	QWORD PTR [r10+-72], rax
        sbb	r8, QWORD PTR [rdx+-64]
        mov	rax, QWORD PTR [r10+-56]
        mov	QWORD PTR [r10+-64], r8
        sbb	rax, QWORD PTR [rdx+-56]
        mov	r8, QWORD PTR [r10+-48]
        mov	QWORD PTR [r10+-56], rax
        sbb	r8, QWORD PTR [rdx+-48]
        mov	rax, QWORD PTR [r10+-40]
        mov	QWORD PTR [r10+-48], r8
        sbb	rax, QWORD PTR [rdx+-40]
        mov	r8, QWORD PTR [r10+-32]
        mov	QWORD PTR [r10+-40], rax
        sbb	r8, QWORD PTR [rdx+-32]
        mov	rax, QWORD PTR [r10+-24]
        mov	QWORD PTR [r10+-32], r8
        sbb	rax, QWORD PTR [rdx+-24]
        mov	r8, QWORD PTR [r10+-16]
        mov	QWORD PTR [r10+-24], rax
        sbb	r8, QWORD PTR [rdx+-16]
        mov	rax, QWORD PTR [r10+-8]
        mov	QWORD PTR [r10+-16], r8
        sbb	rax, QWORD PTR [rdx+-8]
        mov	r8, QWORD PTR [r10]
        mov	QWORD PTR [r10+-8], rax
        sbb	r8, QWORD PTR [rdx]
        mov	rax, QWORD PTR [r10+8]
        mov	QWORD PTR [r10], r8
        sbb	rax, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [r10+16]
        mov	QWORD PTR [r10+8], rax
        sbb	r8, QWORD PTR [rdx+16]
        mov	rax, QWORD PTR [r10+24]
        mov	QWORD PTR [r10+16], r8
        sbb	rax, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [r10+32]
        mov	QWORD PTR [r10+24], rax
        sbb	r8, QWORD PTR [rdx+32]
        mov	rax, QWORD PTR [r10+40]
        mov	QWORD PTR [r10+32], r8
        sbb	rax, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [r10+48]
        mov	QWORD PTR [r10+40], rax
        sbb	r8, QWORD PTR [rdx+48]
        mov	rax, QWORD PTR [r10+56]
        mov	QWORD PTR [r10+48], r8
        sbb	rax, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [r10+64]
        mov	QWORD PTR [r10+56], rax
        sbb	r8, QWORD PTR [rdx+64]
        mov	rax, QWORD PTR [r10+72]
        mov	QWORD PTR [r10+64], r8
        sbb	rax, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [r10+80]
        mov	QWORD PTR [r10+72], rax
        sbb	r8, QWORD PTR [rdx+80]
        mov	rax, QWORD PTR [r10+88]
        mov	QWORD PTR [r10+80], r8
        sbb	rax, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [r10+96]
        mov	QWORD PTR [r10+88], rax
        sbb	r8, QWORD PTR [rdx+96]
        mov	rax, QWORD PTR [r10+104]
        mov	QWORD PTR [r10+96], r8
        sbb	rax, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [r10+112]
        mov	QWORD PTR [r10+104], rax
        sbb	r8, QWORD PTR [rdx+112]
        mov	rax, QWORD PTR [r10+120]
        mov	QWORD PTR [r10+112], r8
        sbb	rax, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [r10+128]
        mov	QWORD PTR [r10+120], rax
        sbb	r8, QWORD PTR [rdx+128]
        mov	rax, QWORD PTR [r10+136]
        mov	QWORD PTR [r10+128], r8
        sbb	rax, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [r10+144]
        mov	QWORD PTR [r10+136], rax
        sbb	r8, QWORD PTR [rdx+144]
        mov	rax, QWORD PTR [r10+152]
        mov	QWORD PTR [r10+144], r8
        sbb	rax, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [r10+160]
        mov	QWORD PTR [r10+152], rax
        sbb	r8, QWORD PTR [rdx+160]
        mov	rax, QWORD PTR [r10+168]
        mov	QWORD PTR [r10+160], r8
        sbb	rax, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [r10+176]
        mov	QWORD PTR [r10+168], rax
        sbb	r8, QWORD PTR [rdx+176]
        mov	rax, QWORD PTR [r10+184]
        mov	QWORD PTR [r10+176], r8
        sbb	rax, QWORD PTR [rdx+184]
        mov	QWORD PTR [r10+184], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+384]
        neg	r9
        add	rcx, 384
        mov	r8, QWORD PTR [rcx+-192]
        sub	r8, QWORD PTR [r10+-192]
        mov	rax, QWORD PTR [rcx+-184]
        mov	QWORD PTR [rcx+-192], r8
        sbb	rax, QWORD PTR [r10+-184]
        mov	r8, QWORD PTR [rcx+-176]
        mov	QWORD PTR [rcx+-184], rax
        sbb	r8, QWORD PTR [r10+-176]
        mov	rax, QWORD PTR [rcx+-168]
        mov	QWORD PTR [rcx+-176], r8
        sbb	rax, QWORD PTR [r10+-168]
        mov	r8, QWORD PTR [rcx+-160]
        mov	QWORD PTR [rcx+-168], rax
        sbb	r8, QWORD PTR [r10+-160]
        mov	rax, QWORD PTR [rcx+-152]
        mov	QWORD PTR [rcx+-160], r8
        sbb	rax, QWORD PTR [r10+-152]
        mov	r8, QWORD PTR [rcx+-144]
        mov	QWORD PTR [rcx+-152], rax
        sbb	r8, QWORD PTR [r10+-144]
        mov	rax, QWORD PTR [rcx+-136]
        mov	QWORD PTR [rcx+-144], r8
        sbb	rax, QWORD PTR [r10+-136]
        mov	r8, QWORD PTR [rcx+-128]
        mov	QWORD PTR [rcx+-136], rax
        sbb	r8, QWORD PTR [r10+-128]
        mov	rax, QWORD PTR [rcx+-120]
        mov	QWORD PTR [rcx+-128], r8
        sbb	rax, QWORD PTR [r10+-120]
        mov	r8, QWORD PTR [rcx+-112]
        mov	QWORD PTR [rcx+-120], rax
        sbb	r8, QWORD PTR [r10+-112]
        mov	rax, QWORD PTR [rcx+-104]
        mov	QWORD PTR [rcx+-112], r8
        sbb	rax, QWORD PTR [r10+-104]
        mov	r8, QWORD PTR [rcx+-96]
        mov	QWORD PTR [rcx+-104], rax
        sbb	r8, QWORD PTR [r10+-96]
        mov	rax, QWORD PTR [rcx+-88]
        mov	QWORD PTR [rcx+-96], r8
        sbb	rax, QWORD PTR [r10+-88]
        mov	r8, QWORD PTR [rcx+-80]
        mov	QWORD PTR [rcx+-88], rax
        sbb	r8, QWORD PTR [r10+-80]
        mov	rax, QWORD PTR [rcx+-72]
        mov	QWORD PTR [rcx+-80], r8
        sbb	rax, QWORD PTR [r10+-72]
        mov	r8, QWORD PTR [rcx+-64]
        mov	QWORD PTR [rcx+-72], rax
        sbb	r8, QWORD PTR [r10+-64]
        mov	rax, QWORD PTR [rcx+-56]
        mov	QWORD PTR [rcx+-64], r8
        sbb	rax, QWORD PTR [r10+-56]
        mov	r8, QWORD PTR [rcx+-48]
        mov	QWORD PTR [rcx+-56], rax
        sbb	r8, QWORD PTR [r10+-48]
        mov	rax, QWORD PTR [rcx+-40]
        mov	QWORD PTR [rcx+-48], r8
        sbb	rax, QWORD PTR [r10+-40]
        mov	r8, QWORD PTR [rcx+-32]
        mov	QWORD PTR [rcx+-40], rax
        sbb	r8, QWORD PTR [r10+-32]
        mov	rax, QWORD PTR [rcx+-24]
        mov	QWORD PTR [rcx+-32], r8
        sbb	rax, QWORD PTR [r10+-24]
        mov	r8, QWORD PTR [rcx+-16]
        mov	QWORD PTR [rcx+-24], rax
        sbb	r8, QWORD PTR [r10+-16]
        mov	rax, QWORD PTR [rcx+-8]
        mov	QWORD PTR [rcx+-16], r8
        sbb	rax, QWORD PTR [r10+-8]
        mov	r8, QWORD PTR [rcx]
        mov	QWORD PTR [rcx+-8], rax
        sbb	r8, QWORD PTR [r10]
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        sbb	rax, QWORD PTR [r10+8]
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        sbb	r8, QWORD PTR [r10+16]
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        sbb	rax, QWORD PTR [r10+24]
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        sbb	r8, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        sbb	rax, QWORD PTR [r10+40]
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        sbb	r8, QWORD PTR [r10+48]
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        sbb	rax, QWORD PTR [r10+56]
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        sbb	r8, QWORD PTR [r10+64]
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        sbb	rax, QWORD PTR [r10+72]
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        sbb	r8, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        sbb	rax, QWORD PTR [r10+88]
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], rax
        sbb	r8, QWORD PTR [r10+96]
        mov	rax, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        sbb	rax, QWORD PTR [r10+104]
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], rax
        sbb	r8, QWORD PTR [r10+112]
        mov	rax, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        sbb	rax, QWORD PTR [r10+120]
        mov	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rcx+120], rax
        sbb	r8, QWORD PTR [r10+128]
        mov	rax, QWORD PTR [rcx+136]
        mov	QWORD PTR [rcx+128], r8
        sbb	rax, QWORD PTR [r10+136]
        mov	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rcx+136], rax
        sbb	r8, QWORD PTR [r10+144]
        mov	rax, QWORD PTR [rcx+152]
        mov	QWORD PTR [rcx+144], r8
        sbb	rax, QWORD PTR [r10+152]
        mov	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rcx+152], rax
        sbb	r8, QWORD PTR [r10+160]
        mov	rax, QWORD PTR [rcx+168]
        mov	QWORD PTR [rcx+160], r8
        sbb	rax, QWORD PTR [r10+168]
        mov	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rcx+168], rax
        sbb	r8, QWORD PTR [r10+176]
        mov	rax, QWORD PTR [rcx+184]
        mov	QWORD PTR [rcx+176], r8
        sbb	rax, QWORD PTR [r10+184]
        mov	QWORD PTR [rcx+184], rax
        sbb	r9, 0
        mov	rcx, QWORD PTR [rsp+384]
        add	rcx, 576
        ; Add in word
        mov	r8, QWORD PTR [rcx]
        add	r8, r9
        mov	rax, QWORD PTR [rcx+8]
        mov	QWORD PTR [rcx], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+16]
        mov	QWORD PTR [rcx+8], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+24]
        mov	QWORD PTR [rcx+16], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+32]
        mov	QWORD PTR [rcx+24], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+40]
        mov	QWORD PTR [rcx+32], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+48]
        mov	QWORD PTR [rcx+40], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+56]
        mov	QWORD PTR [rcx+48], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+64]
        mov	QWORD PTR [rcx+56], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+72]
        mov	QWORD PTR [rcx+64], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+80]
        mov	QWORD PTR [rcx+72], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+88]
        mov	QWORD PTR [rcx+80], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+96]
        mov	QWORD PTR [rcx+88], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+104]
        mov	QWORD PTR [rcx+96], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+112]
        mov	QWORD PTR [rcx+104], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+120]
        mov	QWORD PTR [rcx+112], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+128]
        mov	QWORD PTR [rcx+120], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+136]
        mov	QWORD PTR [rcx+128], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+144]
        mov	QWORD PTR [rcx+136], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+152]
        mov	QWORD PTR [rcx+144], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+160]
        mov	QWORD PTR [rcx+152], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+168]
        mov	QWORD PTR [rcx+160], r8
        adc	rax, 0
        mov	r8, QWORD PTR [rcx+176]
        mov	QWORD PTR [rcx+168], rax
        adc	r8, 0
        mov	rax, QWORD PTR [rcx+184]
        mov	QWORD PTR [rcx+176], r8
        adc	rax, 0
        mov	QWORD PTR [rcx+184], rax
        mov	rdx, QWORD PTR [rsp+392]
        mov	rcx, QWORD PTR [rsp+384]
        add	rsp, 400
        ret
sp_3072_sqr_avx2_48 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision digit.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_d_48 PROC
        push	r12
        mov	r9, rdx
        ; A[0] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9]
        mov	r10, rax
        mov	r11, rdx
        mov	QWORD PTR [rcx], r10
        ; A[1] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+8]
        add	r11, rax
        mov	QWORD PTR [rcx+8], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+16]
        add	r12, rax
        mov	QWORD PTR [rcx+16], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+24]
        add	r10, rax
        mov	QWORD PTR [rcx+24], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+32]
        add	r11, rax
        mov	QWORD PTR [rcx+32], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+40]
        add	r12, rax
        mov	QWORD PTR [rcx+40], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+48]
        add	r10, rax
        mov	QWORD PTR [rcx+48], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+56]
        add	r11, rax
        mov	QWORD PTR [rcx+56], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+64]
        add	r12, rax
        mov	QWORD PTR [rcx+64], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+72]
        add	r10, rax
        mov	QWORD PTR [rcx+72], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+80]
        add	r11, rax
        mov	QWORD PTR [rcx+80], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+88]
        add	r12, rax
        mov	QWORD PTR [rcx+88], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+96]
        add	r10, rax
        mov	QWORD PTR [rcx+96], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+104]
        add	r11, rax
        mov	QWORD PTR [rcx+104], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+112]
        add	r12, rax
        mov	QWORD PTR [rcx+112], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+120]
        add	r10, rax
        mov	QWORD PTR [rcx+120], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[16] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+128]
        add	r11, rax
        mov	QWORD PTR [rcx+128], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[17] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+136]
        add	r12, rax
        mov	QWORD PTR [rcx+136], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[18] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+144]
        add	r10, rax
        mov	QWORD PTR [rcx+144], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[19] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+152]
        add	r11, rax
        mov	QWORD PTR [rcx+152], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[20] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+160]
        add	r12, rax
        mov	QWORD PTR [rcx+160], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[21] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+168]
        add	r10, rax
        mov	QWORD PTR [rcx+168], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[22] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+176]
        add	r11, rax
        mov	QWORD PTR [rcx+176], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[23] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+184]
        add	r12, rax
        mov	QWORD PTR [rcx+184], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[24] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+192]
        add	r10, rax
        mov	QWORD PTR [rcx+192], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[25] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+200]
        add	r11, rax
        mov	QWORD PTR [rcx+200], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[26] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+208]
        add	r12, rax
        mov	QWORD PTR [rcx+208], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[27] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+216]
        add	r10, rax
        mov	QWORD PTR [rcx+216], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[28] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+224]
        add	r11, rax
        mov	QWORD PTR [rcx+224], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[29] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+232]
        add	r12, rax
        mov	QWORD PTR [rcx+232], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[30] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+240]
        add	r10, rax
        mov	QWORD PTR [rcx+240], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[31] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+248]
        add	r11, rax
        mov	QWORD PTR [rcx+248], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[32] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+256]
        add	r12, rax
        mov	QWORD PTR [rcx+256], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[33] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+264]
        add	r10, rax
        mov	QWORD PTR [rcx+264], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[34] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+272]
        add	r11, rax
        mov	QWORD PTR [rcx+272], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[35] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+280]
        add	r12, rax
        mov	QWORD PTR [rcx+280], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[36] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+288]
        add	r10, rax
        mov	QWORD PTR [rcx+288], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[37] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+296]
        add	r11, rax
        mov	QWORD PTR [rcx+296], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[38] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+304]
        add	r12, rax
        mov	QWORD PTR [rcx+304], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[39] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+312]
        add	r10, rax
        mov	QWORD PTR [rcx+312], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[40] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+320]
        add	r11, rax
        mov	QWORD PTR [rcx+320], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[41] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+328]
        add	r12, rax
        mov	QWORD PTR [rcx+328], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[42] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+336]
        add	r10, rax
        mov	QWORD PTR [rcx+336], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[43] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+344]
        add	r11, rax
        mov	QWORD PTR [rcx+344], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[44] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+352]
        add	r12, rax
        mov	QWORD PTR [rcx+352], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[45] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+360]
        add	r10, rax
        mov	QWORD PTR [rcx+360], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[46] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+368]
        add	r11, rax
        mov	QWORD PTR [rcx+368], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[47] * B
        mov	rax, r8
        mul	QWORD PTR [r9+376]
        add	r12, rax
        adc	r10, rdx
        mov	QWORD PTR [rcx+376], r12
        mov	QWORD PTR [rcx+384], r10
        pop	r12
        ret
sp_3072_mul_d_48 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
;  * m is -1 to subtract and 0 when not copying.
;  *
;  * r  A single precision number representing condition subtract result.
;  * a  A single precision number to subtract from.
;  * b  A single precision number to subtract.
;  * m  Mask value to apply.
;  */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_24 PROC
        sub	rsp, 192
        mov	r10, QWORD PTR [r8]
        mov	r11, QWORD PTR [r8+8]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp], r10
        mov	QWORD PTR [rsp+8], r11
        mov	r10, QWORD PTR [r8+16]
        mov	r11, QWORD PTR [r8+24]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+16], r10
        mov	QWORD PTR [rsp+24], r11
        mov	r10, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [r8+40]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+32], r10
        mov	QWORD PTR [rsp+40], r11
        mov	r10, QWORD PTR [r8+48]
        mov	r11, QWORD PTR [r8+56]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+48], r10
        mov	QWORD PTR [rsp+56], r11
        mov	r10, QWORD PTR [r8+64]
        mov	r11, QWORD PTR [r8+72]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+64], r10
        mov	QWORD PTR [rsp+72], r11
        mov	r10, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [r8+88]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+80], r10
        mov	QWORD PTR [rsp+88], r11
        mov	r10, QWORD PTR [r8+96]
        mov	r11, QWORD PTR [r8+104]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+96], r10
        mov	QWORD PTR [rsp+104], r11
        mov	r10, QWORD PTR [r8+112]
        mov	r11, QWORD PTR [r8+120]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+112], r10
        mov	QWORD PTR [rsp+120], r11
        mov	r10, QWORD PTR [r8+128]
        mov	r11, QWORD PTR [r8+136]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+128], r10
        mov	QWORD PTR [rsp+136], r11
        mov	r10, QWORD PTR [r8+144]
        mov	r11, QWORD PTR [r8+152]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+144], r10
        mov	QWORD PTR [rsp+152], r11
        mov	r10, QWORD PTR [r8+160]
        mov	r11, QWORD PTR [r8+168]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+160], r10
        mov	QWORD PTR [rsp+168], r11
        mov	r10, QWORD PTR [r8+176]
        mov	r11, QWORD PTR [r8+184]
        and	r10, r9
        and	r11, r9
        mov	QWORD PTR [rsp+176], r10
        mov	QWORD PTR [rsp+184], r11
        mov	r10, QWORD PTR [rdx]
        mov	r8, QWORD PTR [rsp]
        sub	r10, r8
        mov	r11, QWORD PTR [rdx+8]
        mov	r8, QWORD PTR [rsp+8]
        sbb	r11, r8
        mov	QWORD PTR [rcx], r10
        mov	r10, QWORD PTR [rdx+16]
        mov	r8, QWORD PTR [rsp+16]
        sbb	r10, r8
        mov	QWORD PTR [rcx+8], r11
        mov	r11, QWORD PTR [rdx+24]
        mov	r8, QWORD PTR [rsp+24]
        sbb	r11, r8
        mov	QWORD PTR [rcx+16], r10
        mov	r10, QWORD PTR [rdx+32]
        mov	r8, QWORD PTR [rsp+32]
        sbb	r10, r8
        mov	QWORD PTR [rcx+24], r11
        mov	r11, QWORD PTR [rdx+40]
        mov	r8, QWORD PTR [rsp+40]
        sbb	r11, r8
        mov	QWORD PTR [rcx+32], r10
        mov	r10, QWORD PTR [rdx+48]
        mov	r8, QWORD PTR [rsp+48]
        sbb	r10, r8
        mov	QWORD PTR [rcx+40], r11
        mov	r11, QWORD PTR [rdx+56]
        mov	r8, QWORD PTR [rsp+56]
        sbb	r11, r8
        mov	QWORD PTR [rcx+48], r10
        mov	r10, QWORD PTR [rdx+64]
        mov	r8, QWORD PTR [rsp+64]
        sbb	r10, r8
        mov	QWORD PTR [rcx+56], r11
        mov	r11, QWORD PTR [rdx+72]
        mov	r8, QWORD PTR [rsp+72]
        sbb	r11, r8
        mov	QWORD PTR [rcx+64], r10
        mov	r10, QWORD PTR [rdx+80]
        mov	r8, QWORD PTR [rsp+80]
        sbb	r10, r8
        mov	QWORD PTR [rcx+72], r11
        mov	r11, QWORD PTR [rdx+88]
        mov	r8, QWORD PTR [rsp+88]
        sbb	r11, r8
        mov	QWORD PTR [rcx+80], r10
        mov	r10, QWORD PTR [rdx+96]
        mov	r8, QWORD PTR [rsp+96]
        sbb	r10, r8
        mov	QWORD PTR [rcx+88], r11
        mov	r11, QWORD PTR [rdx+104]
        mov	r8, QWORD PTR [rsp+104]
        sbb	r11, r8
        mov	QWORD PTR [rcx+96], r10
        mov	r10, QWORD PTR [rdx+112]
        mov	r8, QWORD PTR [rsp+112]
        sbb	r10, r8
        mov	QWORD PTR [rcx+104], r11
        mov	r11, QWORD PTR [rdx+120]
        mov	r8, QWORD PTR [rsp+120]
        sbb	r11, r8
        mov	QWORD PTR [rcx+112], r10
        mov	r10, QWORD PTR [rdx+128]
        mov	r8, QWORD PTR [rsp+128]
        sbb	r10, r8
        mov	QWORD PTR [rcx+120], r11
        mov	r11, QWORD PTR [rdx+136]
        mov	r8, QWORD PTR [rsp+136]
        sbb	r11, r8
        mov	QWORD PTR [rcx+128], r10
        mov	r10, QWORD PTR [rdx+144]
        mov	r8, QWORD PTR [rsp+144]
        sbb	r10, r8
        mov	QWORD PTR [rcx+136], r11
        mov	r11, QWORD PTR [rdx+152]
        mov	r8, QWORD PTR [rsp+152]
        sbb	r11, r8
        mov	QWORD PTR [rcx+144], r10
        mov	r10, QWORD PTR [rdx+160]
        mov	r8, QWORD PTR [rsp+160]
        sbb	r10, r8
        mov	QWORD PTR [rcx+152], r11
        mov	r11, QWORD PTR [rdx+168]
        mov	r8, QWORD PTR [rsp+168]
        sbb	r11, r8
        mov	QWORD PTR [rcx+160], r10
        mov	r10, QWORD PTR [rdx+176]
        mov	r8, QWORD PTR [rsp+176]
        sbb	r10, r8
        mov	QWORD PTR [rcx+168], r11
        mov	r11, QWORD PTR [rdx+184]
        mov	r8, QWORD PTR [rsp+184]
        sbb	r11, r8
        mov	QWORD PTR [rcx+176], r10
        mov	QWORD PTR [rcx+184], r11
        sbb	rax, rax
        add	rsp, 192
        ret
sp_3072_cond_sub_24 ENDP
_text ENDS
; /* Reduce the number back to 3072 bits using Montgomery reduction.
;  *
;  * a   A single precision number to reduce in place.
;  * m   The single precision number representing the modulus.
;  * mp  The digit representing the negative inverse of m mod 2^n.
;  */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_24 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        mov	r9, rdx
        xor	rsi, rsi
        ; i = 24
        mov	r10, 24
        mov	r15, QWORD PTR [rcx]
        mov	rdi, QWORD PTR [rcx+8]
L_3072_mont_reduce_24_loop:
        ; mu = a[i] * mp
        mov	r13, r15
        imul	r13, r8
        ; a[i+0] += m[0] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9]
        add	r15, rax
        adc	r12, rdx
        ; a[i+1] += m[1] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+8]
        mov	r15, rdi
        add	r15, rax
        adc	r11, rdx
        add	r15, r12
        adc	r11, 0
        ; a[i+2] += m[2] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+16]
        mov	rdi, QWORD PTR [rcx+16]
        add	rdi, rax
        adc	r12, rdx
        add	rdi, r11
        adc	r12, 0
        ; a[i+3] += m[3] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+24]
        mov	r14, QWORD PTR [rcx+24]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+24], r14
        adc	r11, 0
        ; a[i+4] += m[4] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+32]
        mov	r14, QWORD PTR [rcx+32]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+32], r14
        adc	r12, 0
        ; a[i+5] += m[5] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+40]
        mov	r14, QWORD PTR [rcx+40]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+40], r14
        adc	r11, 0
        ; a[i+6] += m[6] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+48]
        mov	r14, QWORD PTR [rcx+48]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+48], r14
        adc	r12, 0
        ; a[i+7] += m[7] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+56]
        mov	r14, QWORD PTR [rcx+56]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+56], r14
        adc	r11, 0
        ; a[i+8] += m[8] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+64]
        mov	r14, QWORD PTR [rcx+64]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+64], r14
        adc	r12, 0
        ; a[i+9] += m[9] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+72]
        mov	r14, QWORD PTR [rcx+72]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+72], r14
        adc	r11, 0
        ; a[i+10] += m[10] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+80]
        mov	r14, QWORD PTR [rcx+80]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+80], r14
        adc	r12, 0
        ; a[i+11] += m[11] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+88]
        mov	r14, QWORD PTR [rcx+88]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+88], r14
        adc	r11, 0
        ; a[i+12] += m[12] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+96]
        mov	r14, QWORD PTR [rcx+96]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+96], r14
        adc	r12, 0
        ; a[i+13] += m[13] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+104]
        mov	r14, QWORD PTR [rcx+104]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+104], r14
        adc	r11, 0
        ; a[i+14] += m[14] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+112]
        mov	r14, QWORD PTR [rcx+112]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+112], r14
        adc	r12, 0
        ; a[i+15] += m[15] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+120]
        mov	r14, QWORD PTR [rcx+120]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+120], r14
        adc	r11, 0
        ; a[i+16] += m[16] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+128]
        mov	r14, QWORD PTR [rcx+128]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+128], r14
        adc	r12, 0
        ; a[i+17] += m[17] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+136]
        mov	r14, QWORD PTR [rcx+136]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+136], r14
        adc	r11, 0
        ; a[i+18] += m[18] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+144]
        mov	r14, QWORD PTR [rcx+144]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+144], r14
        adc	r12, 0
        ; a[i+19] += m[19] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+152]
        mov	r14, QWORD PTR [rcx+152]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+152], r14
        adc	r11, 0
        ; a[i+20] += m[20] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+160]
        mov	r14, QWORD PTR [rcx+160]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+160], r14
        adc	r12, 0
        ; a[i+21] += m[21] * mu
        mov	rax, r13
        xor	r11, r11
        mul	QWORD PTR [r9+168]
        mov	r14, QWORD PTR [rcx+168]
        add	r14, rax
        adc	r11, rdx
        add	r14, r12
        mov	QWORD PTR [rcx+168], r14
        adc	r11, 0
        ; a[i+22] += m[22] * mu
        mov	rax, r13
        xor	r12, r12
        mul	QWORD PTR [r9+176]
        mov	r14, QWORD PTR [rcx+176]
        add	r14, rax
        adc	r12, rdx
        add	r14, r11
        mov	QWORD PTR [rcx+176], r14
        adc	r12, 0
        ; a[i+23] += m[23] * mu
        mov	rax, r13
        mul	QWORD PTR [r9+184]
        mov	r14, QWORD PTR [rcx+184]
        add	r12, rax
        adc	rdx, rsi
        mov	rsi, 0
        adc	rsi, 0
        add	r14, r12
        mov	QWORD PTR [rcx+184], r14
        adc	QWORD PTR [rcx+192], rdx
        adc	rsi, 0
        ; i -= 1
        add	rcx, 8
        dec	r10
        jnz	L_3072_mont_reduce_24_loop
        mov	QWORD PTR [rcx], r15
        mov	QWORD PTR [rcx+8], rdi
        neg	rsi
IFDEF _WIN64
        mov	r8, r9
        mov	r9, rsi
ELSE
        mov	r9, rsi
        mov	r8, r9
ENDIF
        mov	rdx, rcx
        mov	rcx, rcx
        sub	rcx, 192
        call	sp_3072_cond_sub_24
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_3072_mont_reduce_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
;  * m is -1 to subtract and 0 when not copying.
;  *
;  * r  A single precision number representing condition subtract result.
;  * a  A single precision number to subtract from.
;  * b  A single precision number to subtract.
;  * m  Mask value to apply.
;  */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_avx2_24 PROC
        push	r12
        mov	r12, QWORD PTR [r8]
        mov	r10, QWORD PTR [rdx]
        pext	r12, r12, r9
        sub	r10, r12
        mov	r12, QWORD PTR [r8+8]
        mov	r11, QWORD PTR [rdx+8]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+16]
        mov	r12, QWORD PTR [rdx+16]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+8], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+24]
        mov	r10, QWORD PTR [rdx+24]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+16], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+32]
        mov	r11, QWORD PTR [rdx+32]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+24], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+40]
        mov	r12, QWORD PTR [rdx+40]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+32], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+48]
        mov	r10, QWORD PTR [rdx+48]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+40], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+56]
        mov	r11, QWORD PTR [rdx+56]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+48], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+64]
        mov	r12, QWORD PTR [rdx+64]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+56], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+72]
        mov	r10, QWORD PTR [rdx+72]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+64], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+80]
        mov	r11, QWORD PTR [rdx+80]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+72], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+88]
        mov	r12, QWORD PTR [rdx+88]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+80], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+96]
        mov	r10, QWORD PTR [rdx+96]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+88], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+104]
        mov	r11, QWORD PTR [rdx+104]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+96], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+112]
        mov	r12, QWORD PTR [rdx+112]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+104], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+120]
        mov	r10, QWORD PTR [rdx+120]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+112], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+128]
        mov	r11, QWORD PTR [rdx+128]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+120], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+136]
        mov	r12, QWORD PTR [rdx+136]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+128], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+144]
        mov	r10, QWORD PTR [rdx+144]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+136], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+152]
        mov	r11, QWORD PTR [rdx+152]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+144], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+160]
        mov	r12, QWORD PTR [rdx+160]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+152], r11
        sbb	r12, r10
        mov	r11, QWORD PTR [r8+168]
        mov	r10, QWORD PTR [rdx+168]
        pext	r11, r11, r9
        mov	QWORD PTR [rcx+160], r12
        sbb	r10, r11
        mov	r12, QWORD PTR [r8+176]
        mov	r11, QWORD PTR [rdx+176]
        pext	r12, r12, r9
        mov	QWORD PTR [rcx+168], r10
        sbb	r11, r12
        mov	r10, QWORD PTR [r8+184]
        mov	r12, QWORD PTR [rdx+184]
        pext	r10, r10, r9
        mov	QWORD PTR [rcx+176], r11
        sbb	r12, r10
        mov	QWORD PTR [rcx+184], r12
        sbb	rax, rax
        pop	r12
        ret
sp_3072_cond_sub_avx2_24 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision digit.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_d_24 PROC
        push	r12
        mov	r9, rdx
        ; A[0] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9]
        mov	r10, rax
        mov	r11, rdx
        mov	QWORD PTR [rcx], r10
        ; A[1] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+8]
        add	r11, rax
        mov	QWORD PTR [rcx+8], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[2] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+16]
        add	r12, rax
        mov	QWORD PTR [rcx+16], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[3] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+24]
        add	r10, rax
        mov	QWORD PTR [rcx+24], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[4] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+32]
        add	r11, rax
        mov	QWORD PTR [rcx+32], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[5] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+40]
        add	r12, rax
        mov	QWORD PTR [rcx+40], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[6] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+48]
        add	r10, rax
        mov	QWORD PTR [rcx+48], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[7] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+56]
        add	r11, rax
        mov	QWORD PTR [rcx+56], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[8] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+64]
        add	r12, rax
        mov	QWORD PTR [rcx+64], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[9] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+72]
        add	r10, rax
        mov	QWORD PTR [rcx+72], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[10] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+80]
        add	r11, rax
        mov	QWORD PTR [rcx+80], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[11] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+88]
        add	r12, rax
        mov	QWORD PTR [rcx+88], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[12] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+96]
        add	r10, rax
        mov	QWORD PTR [rcx+96], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[13] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+104]
        add	r11, rax
        mov	QWORD PTR [rcx+104], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[14] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+112]
        add	r12, rax
        mov	QWORD PTR [rcx+112], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[15] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+120]
        add	r10, rax
        mov	QWORD PTR [rcx+120], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[16] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+128]
        add	r11, rax
        mov	QWORD PTR [rcx+128], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[17] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+136]
        add	r12, rax
        mov	QWORD PTR [rcx+136], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[18] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+144]
        add	r10, rax
        mov	QWORD PTR [rcx+144], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[19] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+152]
        add	r11, rax
        mov	QWORD PTR [rcx+152], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[20] * B
        mov	rax, r8
        xor	r11, r11
        mul	QWORD PTR [r9+160]
        add	r12, rax
        mov	QWORD PTR [rcx+160], r12
        adc	r10, rdx
        adc	r11, 0
        ; A[21] * B
        mov	rax, r8
        xor	r12, r12
        mul	QWORD PTR [r9+168]
        add	r10, rax
        mov	QWORD PTR [rcx+168], r10
        adc	r11, rdx
        adc	r12, 0
        ; A[22] * B
        mov	rax, r8
        xor	r10, r10
        mul	QWORD PTR [r9+176]
        add	r11, rax
        mov	QWORD PTR [rcx+176], r11
        adc	r12, rdx
        adc	r10, 0
        ; A[23] * B
        mov	rax, r8
        mul	QWORD PTR [r9+184]
        add	r12, rax
        adc	r10, rdx
        mov	QWORD PTR [rcx+184], r12
        mov	QWORD PTR [rcx+192], r10
        pop	r12
        ret
sp_3072_mul_d_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
;  *
;  * r  A single precision integer.
;  * a  A single precision integer.
;  * b  A single precision digit.
;  */
_text SEGMENT READONLY PARA
sp_3072_mul_d_avx2_24 PROC
        push	r12
        push	r13
        mov	rax, rdx
        ; A[0] * B
        mov	rdx, r8
        xor	r13, r13
        mulx	r12, r11, QWORD PTR [rax]
        mov	QWORD PTR [rcx], r11
        ; A[1] * B
        mulx	r10, r9, QWORD PTR [rax+8]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+8], r12
        ; A[2] * B
        mulx	r10, r9, QWORD PTR [rax+16]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+16], r11
        ; A[3] * B
        mulx	r10, r9, QWORD PTR [rax+24]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+24], r12
        ; A[4] * B
        mulx	r10, r9, QWORD PTR [rax+32]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+32], r11
        ; A[5] * B
        mulx	r10, r9, QWORD PTR [rax+40]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+40], r12
        ; A[6] * B
        mulx	r10, r9, QWORD PTR [rax+48]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+48], r11
        ; A[7] * B
        mulx	r10, r9, QWORD PTR [rax+56]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+56], r12
        ; A[8] * B
        mulx	r10, r9, QWORD PTR [rax+64]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+64], r11
        ; A[9] * B
        mulx	r10, r9, QWORD PTR [rax+72]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+72], r12
        ; A[10] * B
        mulx	r10, r9, QWORD PTR [rax+80]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+80], r11
        ; A[11] * B
        mulx	r10, r9, QWORD PTR [rax+88]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+88], r12
        ; A[12] * B
        mulx	r10, r9, QWORD PTR [rax+96]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+96], r11
        ; A[13] * B
        mulx	r10, r9, QWORD PTR [rax+104]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+104], r12
        ; A[14] * B
        mulx	r10, r9, QWORD PTR [rax+112]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+112], r11
        ; A[15] * B
        mulx	r10, r9, QWORD PTR [rax+120]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+120], r12
        ; A[16] * B
        mulx	r10, r9, QWORD PTR [rax+128]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+128], r11
        ; A[17] * B
        mulx	r10, r9, QWORD PTR [rax+136]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+136], r12
        ; A[18] * B
        mulx	r10, r9, QWORD PTR [rax+144]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+144], r11
        ; A[19] * B
        mulx	r10, r9, QWORD PTR [rax+152]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+152], r12
        ; A[20] * B
        mulx	r10, r9, QWORD PTR [rax+160]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+160], r11
        ; A[21] * B
        mulx	r10, r9, QWORD PTR [rax+168]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        mov	QWORD PTR [rcx+168], r12
        ; A[22] * B
        mulx	r10, r9, QWORD PTR [rax+176]
        mov	r12, r13
        adcx	r11, r9
        adox	r12, r10
        mov	QWORD PTR [rcx+176], r11
        ; A[23] * B
        mulx	r10, r9, QWORD PTR [rax+184]
        mov	r11, r13
        adcx	r12, r9
        adox	r11, r10
        adcx	r11, r13
        mov	QWORD PTR [rcx+184], r12
        mov	QWORD PTR [rcx+192], r11
        pop	r13
        pop	r12
        ret
sp_3072_mul_d_avx2_24 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
;  *
;  * d1   The high order half of the number to divide.
;  * d0   The low order half of the number to divide.
;  * div  The dividend.
;  * returns the result of the division.
;  */
_text SEGMENT READONLY PARA
div_3072_word_asm_24 PROC
        mov	r9, rdx
        mov	rax, r9
        mov	rdx, rcx
        div	r8
        ret
div_3072_word_asm_24 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
;  *
;  * a  A single precision integer.
;  * b  A single precision integer.
;  * return -ve, 0 or +ve if a is less than, equal to or greater than b
;  * respectively.
;  */
_text SEGMENT READONLY PARA
sp_3072_cmp_24 PROC
        push	r12
        xor	r9, r9
        mov	r8, -1
        mov	rax, -1
        mov	r10, 1
        mov	r11, QWORD PTR [rcx+184]
        mov	r12, QWORD PTR [rdx+184]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+176]
        mov	r12, QWORD PTR [rdx+176]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+168]
        mov	r12, QWORD PTR [rdx+168]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+160]
        mov	r12, QWORD PTR [rdx+160]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+152]
        mov	r12, QWORD PTR [rdx+152]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+144]
        mov	r12, QWORD PTR [rdx+144]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+136]
        mov	r12, QWORD PTR [rdx+136]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+128]
        mov	r12, QWORD PTR [rdx+128]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+120]
        mov	r12, QWORD PTR [rdx+120]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+112]
        mov	r12, QWORD PTR [rdx+112]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+104]
        mov	r12, QWORD PTR [rdx+104]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+96]
        mov	r12, QWORD PTR [rdx+96]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+88]
        mov	r12, QWORD PTR [rdx+88]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+80]
        mov	r12, QWORD PTR [rdx+80]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+72]
        mov	r12, QWORD PTR [rdx+72]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+64]
        mov	r12, QWORD PTR [rdx+64]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+56]
        mov	r12, QWORD PTR [rdx+56]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+48]
        mov	r12, QWORD PTR [rdx+48]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+40]
        mov	r12, QWORD PTR [rdx+40]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+32]
        mov	r12, QWORD PTR [rdx+32]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+24]
        mov	r12, QWORD PTR [rdx+24]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+16]
        mov	r12, QWORD PTR [rdx+16]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx+8]
        mov	r12, QWORD PTR [rdx+8]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        mov	r11, QWORD PTR [rcx]
        mov	r12, QWORD PTR [rdx]
        and	r11, r8
        and	r12, r8
        sub	r11, r12
        cmova	rax, r10
        cmovc	rax, r8
        cmovnz	r8, r9
        xor	rax, r8
        pop	r12
        ret
sp_3072_cmp_24 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_3072_get_from_table_24 PROC
        sub	rsp, 128
        vmovdqu	OWORD PTR [rsp], xmm6
        vmovdqu	OWORD PTR [rsp+16], xmm7
        vmovdqu	OWORD PTR [rsp+32], xmm8
        vmovdqu	OWORD PTR [rsp+48], xmm9
        vmovdqu	OWORD PTR [rsp+64], xmm10
        vmovdqu	OWORD PTR [rsp+80], xmm11
        vmovdqu	OWORD PTR [rsp+96], xmm12
        vmovdqu	OWORD PTR [rsp+112], xmm13
        mov	rax, 1
        movd	xmm10, r8
        movd	xmm11, rax
        pxor	xmm13, xmm13
        pshufd	xmm11, xmm11, 0
        pshufd	xmm10, xmm10, 0
        ; START: 0-7
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        add	rcx, 64
        ; END: 0-7
        ; START: 8-15
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        add	r9, 64
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        add	rcx, 64
        ; END: 8-15
        ; START: 16-23
        pxor	xmm13, xmm13
        pxor	xmm4, xmm4
        pxor	xmm5, xmm5
        pxor	xmm6, xmm6
        pxor	xmm7, xmm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        ; ENTRY: 31
        mov	r9, QWORD PTR [rdx+248]
        add	r9, 128
        movdqu	xmm12, xmm13
        pcmpeqd	xmm12, xmm10
        movdqu	xmm0, [r9]
        movdqu	xmm1, [r9+16]
        movdqu	xmm2, [r9+32]
        movdqu	xmm3, [r9+48]
        pand	xmm0, xmm12
        pand	xmm1, xmm12
        pand	xmm2, xmm12
        pand	xmm3, xmm12
        por	xmm4, xmm0
        por	xmm5, xmm1
        por	xmm6, xmm2
        por	xmm7, xmm3
        paddd	xmm13, xmm11
        movdqu	[rcx], xmm4
        movdqu	[rcx+16], xmm5
        movdqu	[rcx+32], xmm6
        movdqu	[rcx+48], xmm7
        ; END: 16-23
        vmovdqu	xmm6, OWORD PTR [rsp]
        vmovdqu	xmm7, OWORD PTR [rsp+16]
        vmovdqu	xmm8, OWORD PTR [rsp+32]
        vmovdqu	xmm9, OWORD PTR [rsp+48]
        vmovdqu	xmm10, OWORD PTR [rsp+64]
        vmovdqu	xmm11, OWORD PTR [rsp+80]
        vmovdqu	xmm12, OWORD PTR [rsp+96]
        vmovdqu	xmm13, OWORD PTR [rsp+112]
        add	rsp, 128
        ret
sp_3072_get_from_table_24 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 3072 bits using Montgomery reduction.
;  *
;  * a   A single precision number to reduce in place.
;  * m   The single precision number representing the modulus.
;  * mp  The digit representing the negative inverse of m mod 2^n.
;  */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_avx2_24 PROC
        push	r12
        push	r13
        push	r14
        push	r15
        push	rdi
        push	rsi
        push	rbx
        push	rbp
        mov	r9, rcx
        mov	r10, rdx
        xor	rbp, rbp
        ; i = 24
        mov	r11, 24
        mov	r14, QWORD PTR [r9]
        mov	r15, QWORD PTR [r9+8]
        mov	rdi, QWORD PTR [r9+16]
        mov	rsi, QWORD PTR [r9+24]
        add	r9, 96
        xor	rbp, rbp
L_3072_mont_reduce_avx2_24_loop:
        ; mu = a[i] * mp
        mov	rdx, r14
        mov	r12, r14
        imul	rdx, r8
        xor	rbx, rbx
        ; a[i+0] += m[0] * mu
        mulx	rcx, rax, QWORD PTR [r10]
        mov	r14, r15
        adcx	r12, rax
        adox	r14, rcx
        ; a[i+1] += m[1] * mu
        mulx	rcx, rax, QWORD PTR [r10+8]
        mov	r15, rdi
        adcx	r14, rax
        adox	r15, rcx
        ; a[i+2] += m[2] * mu
        mulx	rcx, rax, QWORD PTR [r10+16]
        mov	rdi, rsi
        adcx	r15, rax
        adox	rdi, rcx
        ; a[i+3] += m[3] * mu
        mulx	rcx, rax, QWORD PTR [r10+24]
        mov	rsi, QWORD PTR [r9+-64]
        adcx	rdi, rax
        adox	rsi, rcx
        ; a[i+4] += m[4] * mu
        mulx	rcx, rax, QWORD PTR [r10+32]
        mov	r13, QWORD PTR [r9+-56]
        adcx	rsi, rax
        adox	r13, rcx
        ; a[i+5] += m[5] * mu
        mulx	rcx, rax, QWORD PTR [r10+40]
        mov	r12, QWORD PTR [r9+-48]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-56], r13
        ; a[i+6] += m[6] * mu
        mulx	rcx, rax, QWORD PTR [r10+48]
        mov	r13, QWORD PTR [r9+-40]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-48], r12
        ; a[i+7] += m[7] * mu
        mulx	rcx, rax, QWORD PTR [r10+56]
        mov	r12, QWORD PTR [r9+-32]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-40], r13
        ; a[i+8] += m[8] * mu
        mulx	rcx, rax, QWORD PTR [r10+64]
        mov	r13, QWORD PTR [r9+-24]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-32], r12
        ; a[i+9] += m[9] * mu
        mulx	rcx, rax, QWORD PTR [r10+72]
        mov	r12, QWORD PTR [r9+-16]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-24], r13
        ; a[i+10] += m[10] * mu
        mulx	rcx, rax, QWORD PTR [r10+80]
        mov	r13, QWORD PTR [r9+-8]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+-16], r12
        ; a[i+11] += m[11] * mu
        mulx	rcx, rax, QWORD PTR [r10+88]
        mov	r12, QWORD PTR [r9]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+-8], r13
        ; a[i+12] += m[12] * mu
        mulx	rcx, rax, QWORD PTR [r10+96]
        mov	r13, QWORD PTR [r9+8]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9], r12
        ; a[i+13] += m[13] * mu
        mulx	rcx, rax, QWORD PTR [r10+104]
        mov	r12, QWORD PTR [r9+16]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+8], r13
        ; a[i+14] += m[14] * mu
        mulx	rcx, rax, QWORD PTR [r10+112]
        mov	r13, QWORD PTR [r9+24]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+16], r12
        ; a[i+15] += m[15] * mu
        mulx	rcx, rax, QWORD PTR [r10+120]
        mov	r12, QWORD PTR [r9+32]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+24], r13
        ; a[i+16] += m[16] * mu
        mulx	rcx, rax, QWORD PTR [r10+128]
        mov	r13, QWORD PTR [r9+40]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+32], r12
        ; a[i+17] += m[17] * mu
        mulx	rcx, rax, QWORD PTR [r10+136]
        mov	r12, QWORD PTR [r9+48]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+40], r13
        ; a[i+18] += m[18] * mu
        mulx	rcx, rax, QWORD PTR [r10+144]
        mov	r13, QWORD PTR [r9+56]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+48], r12
        ; a[i+19] += m[19] * mu
        mulx	rcx, rax, QWORD PTR [r10+152]
        mov	r12, QWORD PTR [r9+64]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+56], r13
        ; a[i+20] += m[20] * mu
        mulx	rcx, rax, QWORD PTR [r10+160]
        mov	r13, QWORD PTR [r9+72]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+64], r12
        ; a[i+21] += m[21] * mu
        mulx	rcx, rax, QWORD PTR [r10+168]
        mov	r12, QWORD PTR [r9+80]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+72], r13
        ; a[i+22] += m[22] * mu
        mulx	rcx, rax, QWORD PTR [r10+176]
        mov	r13, QWORD PTR [r9+88]
        adcx	r12, rax
        adox	r13, rcx
        mov	QWORD PTR [r9+80], r12
        ; a[i+23] += m[23] * mu
        mulx	rcx, rax, QWORD PTR [r10+184]
        mov	r12, QWORD PTR [r9+96]
        adcx	r13, rax
        adox	r12, rcx
        mov	QWORD PTR [r9+88], r13
        adcx	r12, rbp
        mov	rbp, rbx
        mov	QWORD PTR [r9+96], r12
        adox	rbp, rbx
        adcx	rbp, rbx
        ; a += 1
        add	r9, 8
        ; i -= 1
        sub	r11, 1
        jnz	L_3072_mont_reduce_avx2_24_loop
        sub	r9, 96
        neg	rbp
        mov	r8, r9
        sub	r9, 192
        mov	rcx, QWORD PTR [r10]
        mov	rdx, r14
        pext	rcx, rcx, rbp
        sub	rdx, rcx
        mov	rcx, QWORD PTR [r10+8]
        mov	rax, r15
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+16]
        mov	rcx, rdi
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+8], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+24]
        mov	rdx, rsi
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+16], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+32]
        mov	rax, QWORD PTR [r8+32]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+24], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+40]
        mov	rcx, QWORD PTR [r8+40]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+32], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+48]
        mov	rdx, QWORD PTR [r8+48]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+40], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+56]
        mov	rax, QWORD PTR [r8+56]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+48], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+64]
        mov	rcx, QWORD PTR [r8+64]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+56], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+72]
        mov	rdx, QWORD PTR [r8+72]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+64], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+80]
        mov	rax, QWORD PTR [r8+80]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+72], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+88]
        mov	rcx, QWORD PTR [r8+88]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+80], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+96]
        mov	rdx, QWORD PTR [r8+96]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+88], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+104]
        mov	rax, QWORD PTR [r8+104]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+96], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+112]
        mov	rcx, QWORD PTR [r8+112]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+104], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+120]
        mov	rdx, QWORD PTR [r8+120]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+112], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+128]
        mov	rax, QWORD PTR [r8+128]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+120], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+136]
        mov	rcx, QWORD PTR [r8+136]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+128], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+144]
        mov	rdx, QWORD PTR [r8+144]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+136], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+152]
        mov	rax, QWORD PTR [r8+152]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+144], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+160]
        mov	rcx, QWORD PTR [r8+160]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+152], rax
        sbb	rcx, rdx
        mov	rax, QWORD PTR [r10+168]
        mov	rdx, QWORD PTR [r8+168]
        pext	rax, rax, rbp
        mov	QWORD PTR [r9+160], rcx
        sbb	rdx, rax
        mov	rcx, QWORD PTR [r10+176]
        mov	rax, QWORD PTR [r8+176]
        pext	rcx, rcx, rbp
        mov	QWORD PTR [r9+168], rdx
        sbb	rax, rcx
        mov	rdx, QWORD PTR [r10+184]
        mov	rcx, QWORD PTR [r8+184]
        pext	rdx, rdx, rbp
        mov	QWORD PTR [r9+176], rax
        sbb	rcx, rdx
        mov	QWORD PTR [r9+184], rcx
        pop	rbp
        pop	rbx
        pop	rsi
        pop	rdi
        pop	r15
        pop	r14
        pop	r13
        pop	r12
        ret
sp_3072_mont_reduce_avx2_24 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
_text SEGMENT READONLY PARA
sp_3072_get_from_table_avx2_24 PROC
        sub	rsp, 128
        vmovdqu	OWORD PTR [rsp], xmm6
        vmovdqu	OWORD PTR [rsp+16], xmm7
        vmovdqu	OWORD PTR [rsp+32], xmm8
        vmovdqu	OWORD PTR [rsp+48], xmm9
        vmovdqu	OWORD PTR [rsp+64], xmm10
        vmovdqu	OWORD PTR [rsp+80], xmm11
        vmovdqu	OWORD PTR [rsp+96], xmm12
        vmovdqu	OWORD PTR [rsp+112], xmm13
        mov	rax, 1
        movd	xmm10, r8
        movd	xmm11, rax
        vpxor	ymm13, ymm13, ymm13
        vpermd	ymm10, ymm13, ymm10
        vpermd	ymm11, ymm13, ymm11
        ; START: 0-15
        vpxor	ymm13, ymm13, ymm13
        vpxor	ymm4, ymm4, ymm4
        vpxor	ymm5, ymm5, ymm5
        vpxor	ymm6, ymm6, ymm6
        vpxor	ymm7, ymm7, ymm7
        ; ENTRY: 0
        mov	r9, QWORD PTR [rdx]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 1
        mov	r9, QWORD PTR [rdx+8]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 2
        mov	r9, QWORD PTR [rdx+16]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 3
        mov	r9, QWORD PTR [rdx+24]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 4
        mov	r9, QWORD PTR [rdx+32]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 5
        mov	r9, QWORD PTR [rdx+40]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 6
        mov	r9, QWORD PTR [rdx+48]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 7
        mov	r9, QWORD PTR [rdx+56]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 8
        mov	r9, QWORD PTR [rdx+64]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 9
        mov	r9, QWORD PTR [rdx+72]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 10
        mov	r9, QWORD PTR [rdx+80]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 11
        mov	r9, QWORD PTR [rdx+88]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 12
        mov	r9, QWORD PTR [rdx+96]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 13
        mov	r9, QWORD PTR [rdx+104]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 14
        mov	r9, QWORD PTR [rdx+112]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 15
        mov	r9, QWORD PTR [rdx+120]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 16
        mov	r9, QWORD PTR [rdx+128]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 17
        mov	r9, QWORD PTR [rdx+136]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 18
        mov	r9, QWORD PTR [rdx+144]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 19
        mov	r9, QWORD PTR [rdx+152]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 20
        mov	r9, QWORD PTR [rdx+160]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 21
        mov	r9, QWORD PTR [rdx+168]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 22
        mov	r9, QWORD PTR [rdx+176]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 23
        mov	r9, QWORD PTR [rdx+184]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 24
        mov	r9, QWORD PTR [rdx+192]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 25
        mov	r9, QWORD PTR [rdx+200]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 26
        mov	r9, QWORD PTR [rdx+208]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 27
        mov	r9, QWORD PTR [rdx+216]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 28
        mov	r9, QWORD PTR [rdx+224]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 29
        mov	r9, QWORD PTR [rdx+232]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ymm1, ymm1, ymm12
        vpand	ymm2, ymm2, ymm12
        vpand	ymm3, ymm3, ymm12
        vpor	ymm4, ymm4, ymm0
        vpor	ymm5, ymm5, ymm1
        vpor	ymm6, ymm6, ymm2
        vpor	ymm7, ymm7, ymm3
        vpaddd	ymm13, ymm13, ymm11
        ; ENTRY: 30
        mov	r9, QWORD PTR [rdx+240]
        vpcmpeqd	ymm12, ymm13, ymm10
        vmovdqu	ymm0, YMMWORD PTR [r9]
        vmovdqu	ymm1, YMMWORD PTR [r9+32]
        vmovdqu	ymm2, YMMWORD PTR [r9+64]
        vmovdqu	ymm3, YMMWORD PTR [r9+96]
        vpand	ymm0, ymm0, ymm12
        vpand	ym