/* sp.c
 *
 * Copyright (C) 2006-2019 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

/* Implementation by Sean Parkinson. */

#ifdef HAVE_CONFIG_H
    #include <config.h>
#endif

#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/cpuid.h>
#ifdef NO_INLINE
    #include <wolfssl/wolfcrypt/misc.h>
#else
    #define WOLFSSL_MISC_INCLUDED
    #include <wolfcrypt/src/misc.c>
#endif

#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
                                    defined(WOLFSSL_HAVE_SP_ECC)

#ifdef RSA_LOW_MEM
#ifndef WOLFSSL_SP_SMALL
#define WOLFSSL_SP_SMALL
#endif
#endif

#include <wolfssl/wolfcrypt/sp.h>

#ifdef WOLFSSL_SP_ARM32_ASM
#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
#ifndef WOLFSSL_SP_NO_2048
/* Read big endian unsigned byte array into r.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  Byte array.
 * n  Number of bytes in array to read.
 */
static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
{
    int i, j = 0;
    word32 s = 0;

    r[0] = 0;
    for (i = n-1; i >= 0; i--) {
        r[j] |= (((sp_digit)a[i]) << s);
        if (s >= 24U) {
            r[j] &= 0xffffffff;
            s = 32U - s;
            if (j + 1 >= size) {
                break;
            }
            r[++j] = (sp_digit)a[i] >> s;
            s = 8U - s;
        }
        else {
            s += 8U;
        }
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
}

/* Convert an mp_int to an array of sp_digit.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  A multi-precision integer.
 */
static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
{
#if DIGIT_BIT == 32
    int j;

    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);

    for (j = a->used; j < size; j++) {
        r[j] = 0;
    }
#elif DIGIT_BIT > 32
    int i, j = 0;
    word32 s = 0;

    r[0] = 0;
    for (i = 0; i < a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i] << s);
        r[j] &= 0xffffffff;
        s = 32U - s;
        if (j + 1 >= size) {
            break;
        }
        /* lint allow cast of mismatch word32 and mp_digit */
        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
        while ((s + 32U) <= (word32)DIGIT_BIT) {
            s += 32U;
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            if (s < (word32)DIGIT_BIT) {
                /* lint allow cast of mismatch word32 and mp_digit */
                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
            }
            else {
                r[++j] = 0L;
            }
        }
        s = (word32)DIGIT_BIT - s;
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#else
    int i, j = 0, s = 0;

    r[0] = 0;
    for (i = 0; i < a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i]) << s;
        if (s + DIGIT_BIT >= 32) {
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            s = 32 - s;
            if (s == DIGIT_BIT) {
                r[++j] = 0;
                s = 0;
            }
            else {
                r[++j] = a->dp[i] >> s;
                s = DIGIT_BIT - s;
            }
        }
        else {
            s += DIGIT_BIT;
        }
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#endif
}

/* Write r as big endian to byte array.
 * Fixed length number of bytes written: 256
 *
 * r  A single precision integer.
 * a  Byte array.
 */
static void sp_2048_to_bin(sp_digit* r, byte* a)
{
    int i, j, s = 0, b;

    j = 2048 / 8 - 1;
    a[j] = 0;
    for (i=0; i<64 && j>=0; i++) {
        b = 0;
        /* lint allow cast of mismatch sp_digit and int */
        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
        if (j < 0) {
            break;
        }
        while (b < 32) {
            a[j--] = r[i] >> b; b += 8;
            if (j < 0) {
                break;
            }
        }
        s = 8 - (b - 32);
        if (j >= 0) {
            a[j] = 0;
        }
        if (s != 0) {
            j++;
        }
    }
}

#ifndef WOLFSSL_SP_SMALL
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #32\n\t"
        "mov	r10, #0\n\t"
        "#  A[0] * B[0]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r3, r4, r8, r9\n\t"
        "mov	r5, #0\n\t"
        "str	r3, [sp]\n\t"
        "#  A[0] * B[1]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[0]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #4]\n\t"
        "#  A[0] * B[2]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[1]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[0]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #8]\n\t"
        "#  A[0] * B[3]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[2]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[1]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[0]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #12]\n\t"
        "#  A[0] * B[4]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[3]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[2]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[1]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[0]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #16]\n\t"
        "#  A[0] * B[5]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[4]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[3]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[2]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[1]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[0]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #20]\n\t"
        "#  A[0] * B[6]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[5]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[4]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[3]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[2]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[1]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[0]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #24]\n\t"
        "#  A[0] * B[7]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[6]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[5]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[4]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[3]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[2]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[1]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[0]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #28]\n\t"
        "#  A[1] * B[7]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[2] * B[6]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[5]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[4]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[3]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[2]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[1]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #32]\n\t"
        "#  A[2] * B[7]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[3] * B[6]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[5]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[4]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[3]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[2]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #36]\n\t"
        "#  A[3] * B[7]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[4] * B[6]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[5]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[4]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[3]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #40]\n\t"
        "#  A[4] * B[7]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[5] * B[6]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[5]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[4]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #44]\n\t"
        "#  A[5] * B[7]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[6] * B[6]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[5]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #48]\n\t"
        "#  A[6] * B[7]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[7] * B[6]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #52]\n\t"
        "#  A[7] * B[7]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r5, [%[r], #56]\n\t"
        "str	r3, [%[r], #60]\n\t"
        "ldr	r3, [sp, #0]\n\t"
        "ldr	r4, [sp, #4]\n\t"
        "ldr	r5, [sp, #8]\n\t"
        "ldr	r6, [sp, #12]\n\t"
        "str	r3, [%[r], #0]\n\t"
        "str	r4, [%[r], #4]\n\t"
        "str	r5, [%[r], #8]\n\t"
        "str	r6, [%[r], #12]\n\t"
        "ldr	r3, [sp, #16]\n\t"
        "ldr	r4, [sp, #20]\n\t"
        "ldr	r5, [sp, #24]\n\t"
        "ldr	r6, [sp, #28]\n\t"
        "str	r3, [%[r], #16]\n\t"
        "str	r4, [%[r], #20]\n\t"
        "str	r5, [%[r], #24]\n\t"
        "str	r6, [%[r], #28]\n\t"
        "add	sp, sp, #32\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #32\n\t"
        "mov	r14, #0\n\t"
        "#  A[0] * A[0]\n\t"
        "ldr	r10, [%[a], #0]\n\t"
        "umull	r8, r3, r10, r10\n\t"
        "mov	r4, #0\n\t"
        "str	r8, [sp]\n\t"
        "#  A[0] * A[1]\n\t"
        "ldr	r10, [%[a], #4]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r14, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "str	r3, [sp, #4]\n\t"
        "#  A[0] * A[2]\n\t"
        "ldr	r10, [%[a], #8]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r14, r14\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "#  A[1] * A[1]\n\t"
        "ldr	r10, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "str	r4, [sp, #8]\n\t"
        "#  A[0] * A[3]\n\t"
        "ldr	r10, [%[a], #12]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r14, r14\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "#  A[1] * A[2]\n\t"
        "ldr	r10, [%[a], #8]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "str	r2, [sp, #12]\n\t"
        "#  A[0] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r14, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "#  A[1] * A[3]\n\t"
        "ldr	r10, [%[a], #12]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "#  A[2] * A[2]\n\t"
        "ldr	r10, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "str	r3, [sp, #16]\n\t"
        "#  A[0] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r3, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[3]\n\t"
        "ldr	r10, [%[a], #12]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r4, r4, r5\n\t"
        "adcs	r2, r2, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r4, [sp, #20]\n\t"
        "#  A[0] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r4, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[3]\n\t"
        "ldr	r10, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r2, r2, r5\n\t"
        "adcs	r3, r3, r6\n\t"
        "adc	r4, r4, r7\n\t"
        "str	r2, [sp, #24]\n\t"
        "#  A[0] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r2, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r3, r3, r5\n\t"
        "adcs	r4, r4, r6\n\t"
        "adc	r2, r2, r7\n\t"
        "str	r3, [sp, #28]\n\t"
        "#  A[1] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r3, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[2] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[4] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r4, r4, r5\n\t"
        "adcs	r2, r2, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r4, [%[r], #32]\n\t"
        "#  A[2] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r4, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[3] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[4] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r2, r2, r5\n\t"
        "adcs	r3, r3, r6\n\t"
        "adc	r4, r4, r7\n\t"
        "str	r2, [%[r], #36]\n\t"
        "#  A[3] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r14, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "#  A[4] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "#  A[5] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "str	r3, [%[r], #40]\n\t"
        "#  A[4] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r14, r14\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "#  A[5] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "str	r4, [%[r], #44]\n\t"
        "#  A[5] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r14, r14\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "#  A[6] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "str	r2, [%[r], #48]\n\t"
        "#  A[6] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r14, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "str	r3, [%[r], #52]\n\t"
        "#  A[7] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r4, r4, r8\n\t"
        "adc	r2, r2, r9\n\t"
        "str	r4, [%[r], #56]\n\t"
        "str	r2, [%[r], #60]\n\t"
        "ldr	r2, [sp, #0]\n\t"
        "ldr	r3, [sp, #4]\n\t"
        "ldr	r4, [sp, #8]\n\t"
        "ldr	r8, [sp, #12]\n\t"
        "str	r2, [%[r], #0]\n\t"
        "str	r3, [%[r], #4]\n\t"
        "str	r4, [%[r], #8]\n\t"
        "str	r8, [%[r], #12]\n\t"
        "ldr	r2, [sp, #16]\n\t"
        "ldr	r3, [sp, #20]\n\t"
        "ldr	r4, [sp, #24]\n\t"
        "ldr	r8, [sp, #28]\n\t"
        "str	r2, [%[r], #16]\n\t"
        "str	r3, [%[r], #20]\n\t"
        "str	r4, [%[r], #24]\n\t"
        "str	r8, [%[r], #28]\n\t"
        "add	sp, sp, #32\n\t"
        :
        : [r] "r" (r), [a] "r" (a)
        : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
    );
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer and result.
 * b  A single precision integer.
 */
static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "ldr	r2, [%[a], #0]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r5, [%[a], #12]\n\t"
        "ldr	r6, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "ldr	r8, [%[b], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "subs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #0]\n\t"
        "str	r3, [%[a], #4]\n\t"
        "str	r4, [%[a], #8]\n\t"
        "str	r5, [%[a], #12]\n\t"
        "ldr	r2, [%[a], #16]\n\t"
        "ldr	r3, [%[a], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r5, [%[a], #28]\n\t"
        "ldr	r6, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "ldr	r8, [%[b], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #16]\n\t"
        "str	r3, [%[a], #20]\n\t"
        "str	r4, [%[a], #24]\n\t"
        "str	r5, [%[a], #28]\n\t"
        "ldr	r2, [%[a], #32]\n\t"
        "ldr	r3, [%[a], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r5, [%[a], #44]\n\t"
        "ldr	r6, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "ldr	r8, [%[b], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #32]\n\t"
        "str	r3, [%[a], #36]\n\t"
        "str	r4, [%[a], #40]\n\t"
        "str	r5, [%[a], #44]\n\t"
        "ldr	r2, [%[a], #48]\n\t"
        "ldr	r3, [%[a], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r5, [%[a], #60]\n\t"
        "ldr	r6, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "ldr	r8, [%[b], #56]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #48]\n\t"
        "str	r3, [%[a], #52]\n\t"
        "str	r4, [%[a], #56]\n\t"
        "str	r5, [%[a], #60]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
    );

    return c;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r5, [%[a], #52]\n\t"
        "ldr	r6, [%[a], #56]\n\t"
        "ldr	r7, [%[a], #60]\n\t"
        "ldr	r8, [%[b], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "ldr	r10, [%[b], #56]\n\t"
        "ldr	r14, [%[b], #60]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r5, [%[r], #52]\n\t"
        "str	r6, [%[r], #56]\n\t"
        "str	r7, [%[r], #60]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<8; i++) {
        r[i] = a[i] & m;
    }
#else
    r[0] = a[0] & m;
    r[1] = a[1] & m;
    r[2] = a[2] & m;
    r[3] = a[3] & m;
    r[4] = a[4] & m;
    r[5] = a[5] & m;
    r[6] = a[6] & m;
    r[7] = a[7] & m;
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[16];
    sp_digit a1[8];
    sp_digit b1[8];
    sp_digit z2[16];
    sp_digit u, ca, cb;

    ca = sp_2048_add_8(a1, a, &a[8]);
    cb = sp_2048_add_8(b1, b, &b[8]);
    u  = ca & cb;
    sp_2048_mul_8(z1, a1, b1);
    sp_2048_mul_8(z2, &a[8], &b[8]);
    sp_2048_mul_8(z0, a, b);
    sp_2048_mask_8(r + 16, a1, 0 - cb);
    sp_2048_mask_8(b1, b1, 0 - ca);
    u += sp_2048_add_8(r + 16, r + 16, b1);
    u += sp_2048_sub_in_place_16(z1, z2);
    u += sp_2048_sub_in_place_16(z1, z0);
    u += sp_2048_add_16(r + 8, r + 8, z1);
    r[24] = u;
    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
    (void)sp_2048_add_16(r + 16, r + 16, z2);
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit z2[16];
    sp_digit z1[16];
    sp_digit a1[8];
    sp_digit u;

    u = sp_2048_add_8(a1, a, &a[8]);
    sp_2048_sqr_8(z1, a1);
    sp_2048_sqr_8(z2, &a[8]);
    sp_2048_sqr_8(z0, a);
    sp_2048_mask_8(r + 16, a1, 0 - u);
    u += sp_2048_add_8(r + 16, r + 16, r + 16);
    u += sp_2048_sub_in_place_16(z1, z2);
    u += sp_2048_sub_in_place_16(z1, z0);
    u += sp_2048_add_16(r + 8, r + 8, z1);
    r[24] = u;
    XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
    (void)sp_2048_add_16(r + 16, r + 16, z2);
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer and result.
 * b  A single precision integer.
 */
static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "ldr	r2, [%[a], #0]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r5, [%[a], #12]\n\t"
        "ldr	r6, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "ldr	r8, [%[b], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "subs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #0]\n\t"
        "str	r3, [%[a], #4]\n\t"
        "str	r4, [%[a], #8]\n\t"
        "str	r5, [%[a], #12]\n\t"
        "ldr	r2, [%[a], #16]\n\t"
        "ldr	r3, [%[a], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r5, [%[a], #28]\n\t"
        "ldr	r6, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "ldr	r8, [%[b], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #16]\n\t"
        "str	r3, [%[a], #20]\n\t"
        "str	r4, [%[a], #24]\n\t"
        "str	r5, [%[a], #28]\n\t"
        "ldr	r2, [%[a], #32]\n\t"
        "ldr	r3, [%[a], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r5, [%[a], #44]\n\t"
        "ldr	r6, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "ldr	r8, [%[b], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #32]\n\t"
        "str	r3, [%[a], #36]\n\t"
        "str	r4, [%[a], #40]\n\t"
        "str	r5, [%[a], #44]\n\t"
        "ldr	r2, [%[a], #48]\n\t"
        "ldr	r3, [%[a], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r5, [%[a], #60]\n\t"
        "ldr	r6, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "ldr	r8, [%[b], #56]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #48]\n\t"
        "str	r3, [%[a], #52]\n\t"
        "str	r4, [%[a], #56]\n\t"
        "str	r5, [%[a], #60]\n\t"
        "ldr	r2, [%[a], #64]\n\t"
        "ldr	r3, [%[a], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r5, [%[a], #76]\n\t"
        "ldr	r6, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "ldr	r8, [%[b], #72]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #64]\n\t"
        "str	r3, [%[a], #68]\n\t"
        "str	r4, [%[a], #72]\n\t"
        "str	r5, [%[a], #76]\n\t"
        "ldr	r2, [%[a], #80]\n\t"
        "ldr	r3, [%[a], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r5, [%[a], #92]\n\t"
        "ldr	r6, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "ldr	r8, [%[b], #88]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #80]\n\t"
        "str	r3, [%[a], #84]\n\t"
        "str	r4, [%[a], #88]\n\t"
        "str	r5, [%[a], #92]\n\t"
        "ldr	r2, [%[a], #96]\n\t"
        "ldr	r3, [%[a], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r5, [%[a], #108]\n\t"
        "ldr	r6, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "ldr	r8, [%[b], #104]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #96]\n\t"
        "str	r3, [%[a], #100]\n\t"
        "str	r4, [%[a], #104]\n\t"
        "str	r5, [%[a], #108]\n\t"
        "ldr	r2, [%[a], #112]\n\t"
        "ldr	r3, [%[a], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r5, [%[a], #124]\n\t"
        "ldr	r6, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "ldr	r8, [%[b], #120]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #112]\n\t"
        "str	r3, [%[a], #116]\n\t"
        "str	r4, [%[a], #120]\n\t"
        "str	r5, [%[a], #124]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
    );

    return c;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r5, [%[a], #52]\n\t"
        "ldr	r6, [%[a], #56]\n\t"
        "ldr	r7, [%[a], #60]\n\t"
        "ldr	r8, [%[b], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "ldr	r10, [%[b], #56]\n\t"
        "ldr	r14, [%[b], #60]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r5, [%[r], #52]\n\t"
        "str	r6, [%[r], #56]\n\t"
        "str	r7, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r5, [%[a], #68]\n\t"
        "ldr	r6, [%[a], #72]\n\t"
        "ldr	r7, [%[a], #76]\n\t"
        "ldr	r8, [%[b], #64]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "ldr	r10, [%[b], #72]\n\t"
        "ldr	r14, [%[b], #76]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r5, [%[r], #68]\n\t"
        "str	r6, [%[r], #72]\n\t"
        "str	r7, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r5, [%[a], #84]\n\t"
        "ldr	r6, [%[a], #88]\n\t"
        "ldr	r7, [%[a], #92]\n\t"
        "ldr	r8, [%[b], #80]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "ldr	r10, [%[b], #88]\n\t"
        "ldr	r14, [%[b], #92]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r5, [%[r], #84]\n\t"
        "str	r6, [%[r], #88]\n\t"
        "str	r7, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r5, [%[a], #100]\n\t"
        "ldr	r6, [%[a], #104]\n\t"
        "ldr	r7, [%[a], #108]\n\t"
        "ldr	r8, [%[b], #96]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "ldr	r10, [%[b], #104]\n\t"
        "ldr	r14, [%[b], #108]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r5, [%[r], #100]\n\t"
        "str	r6, [%[r], #104]\n\t"
        "str	r7, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r5, [%[a], #116]\n\t"
        "ldr	r6, [%[a], #120]\n\t"
        "ldr	r7, [%[a], #124]\n\t"
        "ldr	r8, [%[b], #112]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "ldr	r10, [%[b], #120]\n\t"
        "ldr	r14, [%[b], #124]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r5, [%[r], #116]\n\t"
        "str	r6, [%[r], #120]\n\t"
        "str	r7, [%[r], #124]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<16; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 16; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[32];
    sp_digit a1[16];
    sp_digit b1[16];
    sp_digit z2[32];
    sp_digit u, ca, cb;

    ca = sp_2048_add_16(a1, a, &a[16]);
    cb = sp_2048_add_16(b1, b, &b[16]);
    u  = ca & cb;
    sp_2048_mul_16(z1, a1, b1);
    sp_2048_mul_16(z2, &a[16], &b[16]);
    sp_2048_mul_16(z0, a, b);
    sp_2048_mask_16(r + 32, a1, 0 - cb);
    sp_2048_mask_16(b1, b1, 0 - ca);
    u += sp_2048_add_16(r + 32, r + 32, b1);
    u += sp_2048_sub_in_place_32(z1, z2);
    u += sp_2048_sub_in_place_32(z1, z0);
    u += sp_2048_add_32(r + 16, r + 16, z1);
    r[48] = u;
    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
    (void)sp_2048_add_32(r + 32, r + 32, z2);
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit z2[32];
    sp_digit z1[32];
    sp_digit a1[16];
    sp_digit u;

    u = sp_2048_add_16(a1, a, &a[16]);
    sp_2048_sqr_16(z1, a1);
    sp_2048_sqr_16(z2, &a[16]);
    sp_2048_sqr_16(z0, a);
    sp_2048_mask_16(r + 32, a1, 0 - u);
    u += sp_2048_add_16(r + 32, r + 32, r + 32);
    u += sp_2048_sub_in_place_32(z1, z2);
    u += sp_2048_sub_in_place_32(z1, z0);
    u += sp_2048_add_32(r + 16, r + 16, z1);
    r[48] = u;
    XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
    (void)sp_2048_add_32(r + 32, r + 32, z2);
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer and result.
 * b  A single precision integer.
 */
static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "ldr	r2, [%[a], #0]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r5, [%[a], #12]\n\t"
        "ldr	r6, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "ldr	r8, [%[b], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "subs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #0]\n\t"
        "str	r3, [%[a], #4]\n\t"
        "str	r4, [%[a], #8]\n\t"
        "str	r5, [%[a], #12]\n\t"
        "ldr	r2, [%[a], #16]\n\t"
        "ldr	r3, [%[a], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r5, [%[a], #28]\n\t"
        "ldr	r6, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "ldr	r8, [%[b], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #16]\n\t"
        "str	r3, [%[a], #20]\n\t"
        "str	r4, [%[a], #24]\n\t"
        "str	r5, [%[a], #28]\n\t"
        "ldr	r2, [%[a], #32]\n\t"
        "ldr	r3, [%[a], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r5, [%[a], #44]\n\t"
        "ldr	r6, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "ldr	r8, [%[b], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #32]\n\t"
        "str	r3, [%[a], #36]\n\t"
        "str	r4, [%[a], #40]\n\t"
        "str	r5, [%[a], #44]\n\t"
        "ldr	r2, [%[a], #48]\n\t"
        "ldr	r3, [%[a], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r5, [%[a], #60]\n\t"
        "ldr	r6, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "ldr	r8, [%[b], #56]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #48]\n\t"
        "str	r3, [%[a], #52]\n\t"
        "str	r4, [%[a], #56]\n\t"
        "str	r5, [%[a], #60]\n\t"
        "ldr	r2, [%[a], #64]\n\t"
        "ldr	r3, [%[a], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r5, [%[a], #76]\n\t"
        "ldr	r6, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "ldr	r8, [%[b], #72]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #64]\n\t"
        "str	r3, [%[a], #68]\n\t"
        "str	r4, [%[a], #72]\n\t"
        "str	r5, [%[a], #76]\n\t"
        "ldr	r2, [%[a], #80]\n\t"
        "ldr	r3, [%[a], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r5, [%[a], #92]\n\t"
        "ldr	r6, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "ldr	r8, [%[b], #88]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #80]\n\t"
        "str	r3, [%[a], #84]\n\t"
        "str	r4, [%[a], #88]\n\t"
        "str	r5, [%[a], #92]\n\t"
        "ldr	r2, [%[a], #96]\n\t"
        "ldr	r3, [%[a], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r5, [%[a], #108]\n\t"
        "ldr	r6, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "ldr	r8, [%[b], #104]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #96]\n\t"
        "str	r3, [%[a], #100]\n\t"
        "str	r4, [%[a], #104]\n\t"
        "str	r5, [%[a], #108]\n\t"
        "ldr	r2, [%[a], #112]\n\t"
        "ldr	r3, [%[a], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r5, [%[a], #124]\n\t"
        "ldr	r6, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "ldr	r8, [%[b], #120]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #112]\n\t"
        "str	r3, [%[a], #116]\n\t"
        "str	r4, [%[a], #120]\n\t"
        "str	r5, [%[a], #124]\n\t"
        "ldr	r2, [%[a], #128]\n\t"
        "ldr	r3, [%[a], #132]\n\t"
        "ldr	r4, [%[a], #136]\n\t"
        "ldr	r5, [%[a], #140]\n\t"
        "ldr	r6, [%[b], #128]\n\t"
        "ldr	r7, [%[b], #132]\n\t"
        "ldr	r8, [%[b], #136]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #128]\n\t"
        "str	r3, [%[a], #132]\n\t"
        "str	r4, [%[a], #136]\n\t"
        "str	r5, [%[a], #140]\n\t"
        "ldr	r2, [%[a], #144]\n\t"
        "ldr	r3, [%[a], #148]\n\t"
        "ldr	r4, [%[a], #152]\n\t"
        "ldr	r5, [%[a], #156]\n\t"
        "ldr	r6, [%[b], #144]\n\t"
        "ldr	r7, [%[b], #148]\n\t"
        "ldr	r8, [%[b], #152]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #144]\n\t"
        "str	r3, [%[a], #148]\n\t"
        "str	r4, [%[a], #152]\n\t"
        "str	r5, [%[a], #156]\n\t"
        "ldr	r2, [%[a], #160]\n\t"
        "ldr	r3, [%[a], #164]\n\t"
        "ldr	r4, [%[a], #168]\n\t"
        "ldr	r5, [%[a], #172]\n\t"
        "ldr	r6, [%[b], #160]\n\t"
        "ldr	r7, [%[b], #164]\n\t"
        "ldr	r8, [%[b], #168]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #160]\n\t"
        "str	r3, [%[a], #164]\n\t"
        "str	r4, [%[a], #168]\n\t"
        "str	r5, [%[a], #172]\n\t"
        "ldr	r2, [%[a], #176]\n\t"
        "ldr	r3, [%[a], #180]\n\t"
        "ldr	r4, [%[a], #184]\n\t"
        "ldr	r5, [%[a], #188]\n\t"
        "ldr	r6, [%[b], #176]\n\t"
        "ldr	r7, [%[b], #180]\n\t"
        "ldr	r8, [%[b], #184]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #176]\n\t"
        "str	r3, [%[a], #180]\n\t"
        "str	r4, [%[a], #184]\n\t"
        "str	r5, [%[a], #188]\n\t"
        "ldr	r2, [%[a], #192]\n\t"
        "ldr	r3, [%[a], #196]\n\t"
        "ldr	r4, [%[a], #200]\n\t"
        "ldr	r5, [%[a], #204]\n\t"
        "ldr	r6, [%[b], #192]\n\t"
        "ldr	r7, [%[b], #196]\n\t"
        "ldr	r8, [%[b], #200]\n\t"
        "ldr	r9, [%[b], #204]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #192]\n\t"
        "str	r3, [%[a], #196]\n\t"
        "str	r4, [%[a], #200]\n\t"
        "str	r5, [%[a], #204]\n\t"
        "ldr	r2, [%[a], #208]\n\t"
        "ldr	r3, [%[a], #212]\n\t"
        "ldr	r4, [%[a], #216]\n\t"
        "ldr	r5, [%[a], #220]\n\t"
        "ldr	r6, [%[b], #208]\n\t"
        "ldr	r7, [%[b], #212]\n\t"
        "ldr	r8, [%[b], #216]\n\t"
        "ldr	r9, [%[b], #220]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #208]\n\t"
        "str	r3, [%[a], #212]\n\t"
        "str	r4, [%[a], #216]\n\t"
        "str	r5, [%[a], #220]\n\t"
        "ldr	r2, [%[a], #224]\n\t"
        "ldr	r3, [%[a], #228]\n\t"
        "ldr	r4, [%[a], #232]\n\t"
        "ldr	r5, [%[a], #236]\n\t"
        "ldr	r6, [%[b], #224]\n\t"
        "ldr	r7, [%[b], #228]\n\t"
        "ldr	r8, [%[b], #232]\n\t"
        "ldr	r9, [%[b], #236]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #224]\n\t"
        "str	r3, [%[a], #228]\n\t"
        "str	r4, [%[a], #232]\n\t"
        "str	r5, [%[a], #236]\n\t"
        "ldr	r2, [%[a], #240]\n\t"
        "ldr	r3, [%[a], #244]\n\t"
        "ldr	r4, [%[a], #248]\n\t"
        "ldr	r5, [%[a], #252]\n\t"
        "ldr	r6, [%[b], #240]\n\t"
        "ldr	r7, [%[b], #244]\n\t"
        "ldr	r8, [%[b], #248]\n\t"
        "ldr	r9, [%[b], #252]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #240]\n\t"
        "str	r3, [%[a], #244]\n\t"
        "str	r4, [%[a], #248]\n\t"
        "str	r5, [%[a], #252]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
    );

    return c;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r5, [%[a], #52]\n\t"
        "ldr	r6, [%[a], #56]\n\t"
        "ldr	r7, [%[a], #60]\n\t"
        "ldr	r8, [%[b], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "ldr	r10, [%[b], #56]\n\t"
        "ldr	r14, [%[b], #60]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r5, [%[r], #52]\n\t"
        "str	r6, [%[r], #56]\n\t"
        "str	r7, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r5, [%[a], #68]\n\t"
        "ldr	r6, [%[a], #72]\n\t"
        "ldr	r7, [%[a], #76]\n\t"
        "ldr	r8, [%[b], #64]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "ldr	r10, [%[b], #72]\n\t"
        "ldr	r14, [%[b], #76]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r5, [%[r], #68]\n\t"
        "str	r6, [%[r], #72]\n\t"
        "str	r7, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r5, [%[a], #84]\n\t"
        "ldr	r6, [%[a], #88]\n\t"
        "ldr	r7, [%[a], #92]\n\t"
        "ldr	r8, [%[b], #80]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "ldr	r10, [%[b], #88]\n\t"
        "ldr	r14, [%[b], #92]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r5, [%[r], #84]\n\t"
        "str	r6, [%[r], #88]\n\t"
        "str	r7, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r5, [%[a], #100]\n\t"
        "ldr	r6, [%[a], #104]\n\t"
        "ldr	r7, [%[a], #108]\n\t"
        "ldr	r8, [%[b], #96]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "ldr	r10, [%[b], #104]\n\t"
        "ldr	r14, [%[b], #108]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r5, [%[r], #100]\n\t"
        "str	r6, [%[r], #104]\n\t"
        "str	r7, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r5, [%[a], #116]\n\t"
        "ldr	r6, [%[a], #120]\n\t"
        "ldr	r7, [%[a], #124]\n\t"
        "ldr	r8, [%[b], #112]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "ldr	r10, [%[b], #120]\n\t"
        "ldr	r14, [%[b], #124]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r5, [%[r], #116]\n\t"
        "str	r6, [%[r], #120]\n\t"
        "str	r7, [%[r], #124]\n\t"
        "ldr	r4, [%[a], #128]\n\t"
        "ldr	r5, [%[a], #132]\n\t"
        "ldr	r6, [%[a], #136]\n\t"
        "ldr	r7, [%[a], #140]\n\t"
        "ldr	r8, [%[b], #128]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "ldr	r10, [%[b], #136]\n\t"
        "ldr	r14, [%[b], #140]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #128]\n\t"
        "str	r5, [%[r], #132]\n\t"
        "str	r6, [%[r], #136]\n\t"
        "str	r7, [%[r], #140]\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "ldr	r5, [%[a], #148]\n\t"
        "ldr	r6, [%[a], #152]\n\t"
        "ldr	r7, [%[a], #156]\n\t"
        "ldr	r8, [%[b], #144]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "ldr	r10, [%[b], #152]\n\t"
        "ldr	r14, [%[b], #156]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #144]\n\t"
        "str	r5, [%[r], #148]\n\t"
        "str	r6, [%[r], #152]\n\t"
        "str	r7, [%[r], #156]\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "ldr	r5, [%[a], #164]\n\t"
        "ldr	r6, [%[a], #168]\n\t"
        "ldr	r7, [%[a], #172]\n\t"
        "ldr	r8, [%[b], #160]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "ldr	r10, [%[b], #168]\n\t"
        "ldr	r14, [%[b], #172]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #160]\n\t"
        "str	r5, [%[r], #164]\n\t"
        "str	r6, [%[r], #168]\n\t"
        "str	r7, [%[r], #172]\n\t"
        "ldr	r4, [%[a], #176]\n\t"
        "ldr	r5, [%[a], #180]\n\t"
        "ldr	r6, [%[a], #184]\n\t"
        "ldr	r7, [%[a], #188]\n\t"
        "ldr	r8, [%[b], #176]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "ldr	r10, [%[b], #184]\n\t"
        "ldr	r14, [%[b], #188]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #176]\n\t"
        "str	r5, [%[r], #180]\n\t"
        "str	r6, [%[r], #184]\n\t"
        "str	r7, [%[r], #188]\n\t"
        "ldr	r4, [%[a], #192]\n\t"
        "ldr	r5, [%[a], #196]\n\t"
        "ldr	r6, [%[a], #200]\n\t"
        "ldr	r7, [%[a], #204]\n\t"
        "ldr	r8, [%[b], #192]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "ldr	r10, [%[b], #200]\n\t"
        "ldr	r14, [%[b], #204]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #192]\n\t"
        "str	r5, [%[r], #196]\n\t"
        "str	r6, [%[r], #200]\n\t"
        "str	r7, [%[r], #204]\n\t"
        "ldr	r4, [%[a], #208]\n\t"
        "ldr	r5, [%[a], #212]\n\t"
        "ldr	r6, [%[a], #216]\n\t"
        "ldr	r7, [%[a], #220]\n\t"
        "ldr	r8, [%[b], #208]\n\t"
        "ldr	r9, [%[b], #212]\n\t"
        "ldr	r10, [%[b], #216]\n\t"
        "ldr	r14, [%[b], #220]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #208]\n\t"
        "str	r5, [%[r], #212]\n\t"
        "str	r6, [%[r], #216]\n\t"
        "str	r7, [%[r], #220]\n\t"
        "ldr	r4, [%[a], #224]\n\t"
        "ldr	r5, [%[a], #228]\n\t"
        "ldr	r6, [%[a], #232]\n\t"
        "ldr	r7, [%[a], #236]\n\t"
        "ldr	r8, [%[b], #224]\n\t"
        "ldr	r9, [%[b], #228]\n\t"
        "ldr	r10, [%[b], #232]\n\t"
        "ldr	r14, [%[b], #236]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #224]\n\t"
        "str	r5, [%[r], #228]\n\t"
        "str	r6, [%[r], #232]\n\t"
        "str	r7, [%[r], #236]\n\t"
        "ldr	r4, [%[a], #240]\n\t"
        "ldr	r5, [%[a], #244]\n\t"
        "ldr	r6, [%[a], #248]\n\t"
        "ldr	r7, [%[a], #252]\n\t"
        "ldr	r8, [%[b], #240]\n\t"
        "ldr	r9, [%[b], #244]\n\t"
        "ldr	r10, [%[b], #248]\n\t"
        "ldr	r14, [%[b], #252]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #240]\n\t"
        "str	r5, [%[r], #244]\n\t"
        "str	r6, [%[r], #248]\n\t"
        "str	r7, [%[r], #252]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<32; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 32; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[64];
    sp_digit a1[32];
    sp_digit b1[32];
    sp_digit z2[64];
    sp_digit u, ca, cb;

    ca = sp_2048_add_32(a1, a, &a[32]);
    cb = sp_2048_add_32(b1, b, &b[32]);
    u  = ca & cb;
    sp_2048_mul_32(z1, a1, b1);
    sp_2048_mul_32(z2, &a[32], &b[32]);
    sp_2048_mul_32(z0, a, b);
    sp_2048_mask_32(r + 64, a1, 0 - cb);
    sp_2048_mask_32(b1, b1, 0 - ca);
    u += sp_2048_add_32(r + 64, r + 64, b1);
    u += sp_2048_sub_in_place_64(z1, z2);
    u += sp_2048_sub_in_place_64(z1, z0);
    u += sp_2048_add_64(r + 32, r + 32, z1);
    r[96] = u;
    XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
    (void)sp_2048_add_64(r + 64, r + 64, z2);
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit z2[64];
    sp_digit z1[64];
    sp_digit a1[32];
    sp_digit u;

    u = sp_2048_add_32(a1, a, &a[32]);
    sp_2048_sqr_32(z1, a1);
    sp_2048_sqr_32(z2, &a[32]);
    sp_2048_sqr_32(z0, a);
    sp_2048_mask_32(r + 64, a1, 0 - u);
    u += sp_2048_add_32(r + 64, r + 64, r + 64);
    u += sp_2048_sub_in_place_64(z1, z2);
    u += sp_2048_sub_in_place_64(z1, z0);
    u += sp_2048_add_64(r + 32, r + 32, z1);
    r[96] = u;
    XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
    (void)sp_2048_add_64(r + 64, r + 64, z2);
}

#endif /* !WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "add	r12, %[a], #256\n\t"
        "\n1:\n\t"
        "adds	%[c], %[c], #-1\n\t"
        "ldr	r4, [%[a]], #4\n\t"
        "ldr	r5, [%[a]], #4\n\t"
        "ldr	r6, [%[a]], #4\n\t"
        "ldr	r7, [%[a]], #4\n\t"
        "ldr	r8, [%[b]], #4\n\t"
        "ldr	r9, [%[b]], #4\n\t"
        "ldr	r10, [%[b]], #4\n\t"
        "ldr	r14, [%[b]], #4\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r]], #4\n\t"
        "str	r5, [%[r]], #4\n\t"
        "str	r6, [%[r]], #4\n\t"
        "str	r7, [%[r]], #4\n\t"
        "mov	r4, #0\n\t"
        "adc	%[c], r4, #0\n\t"
        "cmp	%[a], r12\n\t"
        "bne	1b\n\t"
        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r14, #0\n\t"
        "add	r12, %[a], #256\n\t"
        "\n1:\n\t"
        "subs	%[c], r14, %[c]\n\t"
        "ldr	r3, [%[a]]\n\t"
        "ldr	r4, [%[a], #4]\n\t"
        "ldr	r5, [%[a], #8]\n\t"
        "ldr	r6, [%[a], #12]\n\t"
        "ldr	r7, [%[b]], #4\n\t"
        "ldr	r8, [%[b]], #4\n\t"
        "ldr	r9, [%[b]], #4\n\t"
        "ldr	r10, [%[b]], #4\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "sbcs	r6, r6, r10\n\t"
        "str	r3, [%[a]], #4\n\t"
        "str	r4, [%[a]], #4\n\t"
        "str	r5, [%[a]], #4\n\t"
        "str	r6, [%[a]], #4\n\t"
        "sbc	%[c], r14, r14\n\t"
        "cmp	%[a], r12\n\t"
        "bne	1b\n\t"
        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
    );

    return c;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #512\n\t"
        "mov	r5, #0\n\t"
        "mov	r6, #0\n\t"
        "mov	r7, #0\n\t"
        "mov	r8, #0\n\t"
        "\n1:\n\t"
        "subs	r3, r5, #252\n\t"
        "it	cc\n\t"
        "movcc	r3, #0\n\t"
        "sub	r4, r5, r3\n\t"
        "\n2:\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "ldr	r12, [%[b], r4]\n\t"
        "umull	r9, r10, r14, r12\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, #0\n\t"
        "add	r3, r3, #4\n\t"
        "sub	r4, r4, #4\n\t"
        "cmp	r3, #256\n\t"
        "beq	3f\n\t"
        "cmp	r3, r5\n\t"
        "ble	2b\n\t"
        "\n3:\n\t"
        "str	r6, [sp, r5]\n\t"
        "mov	r6, r7\n\t"
        "mov	r7, r8\n\t"
        "mov	r8, #0\n\t"
        "add	r5, r5, #4\n\t"
        "cmp	r5, #504\n\t"
        "ble	1b\n\t"
        "str	r6, [sp, r5]\n\t"
        "\n4:\n\t"
        "ldr	r6, [sp, #0]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r3, [sp, #12]\n\t"
        "str	r6, [%[r], #0]\n\t"
        "str	r7, [%[r], #4]\n\t"
        "str	r8, [%[r], #8]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "add	sp, sp, #16\n\t"
        "add	%[r], %[r], #16\n\t"
        "subs	r5, r5, #16\n\t"
        "bgt	4b\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #512\n\t"
        "mov	r12, #0\n\t"
        "mov	r6, #0\n\t"
        "mov	r7, #0\n\t"
        "mov	r8, #0\n\t"
        "mov	r5, #0\n\t"
        "\n1:\n\t"
        "subs	r3, r5, #252\n\t"
        "it	cc\n\t"
        "movcc	r3, r12\n\t"
        "sub	r4, r5, r3\n\t"
        "\n2:\n\t"
        "cmp	r4, r3\n\t"
        "beq	4f\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "ldr	r9, [%[a], r4]\n\t"
        "umull	r9, r10, r14, r9\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "bal	5f\n\t"
        "\n4:\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "umull	r9, r10, r14, r14\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "\n5:\n\t"
        "add	r3, r3, #4\n\t"
        "sub	r4, r4, #4\n\t"
        "cmp	r3, #256\n\t"
        "beq	3f\n\t"
        "cmp	r3, r4\n\t"
        "bgt	3f\n\t"
        "cmp	r3, r5\n\t"
        "ble	2b\n\t"
        "\n3:\n\t"
        "str	r6, [sp, r5]\n\t"
        "mov	r6, r7\n\t"
        "mov	r7, r8\n\t"
        "mov	r8, #0\n\t"
        "add	r5, r5, #4\n\t"
        "cmp	r5, #504\n\t"
        "ble	1b\n\t"
        "str	r6, [sp, r5]\n\t"
        "\n4:\n\t"
        "ldr	r6, [sp, #0]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r3, [sp, #12]\n\t"
        "str	r6, [%[r], #0]\n\t"
        "str	r7, [%[r], #4]\n\t"
        "str	r8, [%[r], #8]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "add	sp, sp, #16\n\t"
        "add	%[r], %[r], #16\n\t"
        "subs	r5, r5, #16\n\t"
        "bgt	4b\n\t"
        : [r] "+r" (r)
        : [a] "r" (a)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
    );
}

#endif /* WOLFSSL_SP_SMALL */
#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
#ifdef WOLFSSL_SP_SMALL
/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
{
    int i;

    for (i=0; i<32; i++) {
        r[i] = a[i] & m;
    }
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "add	r12, %[a], #128\n\t"
        "\n1:\n\t"
        "adds	%[c], %[c], #-1\n\t"
        "ldr	r4, [%[a]], #4\n\t"
        "ldr	r5, [%[a]], #4\n\t"
        "ldr	r6, [%[a]], #4\n\t"
        "ldr	r7, [%[a]], #4\n\t"
        "ldr	r8, [%[b]], #4\n\t"
        "ldr	r9, [%[b]], #4\n\t"
        "ldr	r10, [%[b]], #4\n\t"
        "ldr	r14, [%[b]], #4\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r]], #4\n\t"
        "str	r5, [%[r]], #4\n\t"
        "str	r6, [%[r]], #4\n\t"
        "str	r7, [%[r]], #4\n\t"
        "mov	r4, #0\n\t"
        "adc	%[c], r4, #0\n\t"
        "cmp	%[a], r12\n\t"
        "bne	1b\n\t"
        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r14, #0\n\t"
        "add	r12, %[a], #128\n\t"
        "\n1:\n\t"
        "subs	%[c], r14, %[c]\n\t"
        "ldr	r3, [%[a]]\n\t"
        "ldr	r4, [%[a], #4]\n\t"
        "ldr	r5, [%[a], #8]\n\t"
        "ldr	r6, [%[a], #12]\n\t"
        "ldr	r7, [%[b]], #4\n\t"
        "ldr	r8, [%[b]], #4\n\t"
        "ldr	r9, [%[b]], #4\n\t"
        "ldr	r10, [%[b]], #4\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "sbcs	r6, r6, r10\n\t"
        "str	r3, [%[a]], #4\n\t"
        "str	r4, [%[a]], #4\n\t"
        "str	r5, [%[a]], #4\n\t"
        "str	r6, [%[a]], #4\n\t"
        "sbc	%[c], r14, r14\n\t"
        "cmp	%[a], r12\n\t"
        "bne	1b\n\t"
        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
    );

    return c;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #256\n\t"
        "mov	r5, #0\n\t"
        "mov	r6, #0\n\t"
        "mov	r7, #0\n\t"
        "mov	r8, #0\n\t"
        "\n1:\n\t"
        "subs	r3, r5, #124\n\t"
        "it	cc\n\t"
        "movcc	r3, #0\n\t"
        "sub	r4, r5, r3\n\t"
        "\n2:\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "ldr	r12, [%[b], r4]\n\t"
        "umull	r9, r10, r14, r12\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, #0\n\t"
        "add	r3, r3, #4\n\t"
        "sub	r4, r4, #4\n\t"
        "cmp	r3, #128\n\t"
        "beq	3f\n\t"
        "cmp	r3, r5\n\t"
        "ble	2b\n\t"
        "\n3:\n\t"
        "str	r6, [sp, r5]\n\t"
        "mov	r6, r7\n\t"
        "mov	r7, r8\n\t"
        "mov	r8, #0\n\t"
        "add	r5, r5, #4\n\t"
        "cmp	r5, #248\n\t"
        "ble	1b\n\t"
        "str	r6, [sp, r5]\n\t"
        "\n4:\n\t"
        "ldr	r6, [sp, #0]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r3, [sp, #12]\n\t"
        "str	r6, [%[r], #0]\n\t"
        "str	r7, [%[r], #4]\n\t"
        "str	r8, [%[r], #8]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "add	sp, sp, #16\n\t"
        "add	%[r], %[r], #16\n\t"
        "subs	r5, r5, #16\n\t"
        "bgt	4b\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #256\n\t"
        "mov	r12, #0\n\t"
        "mov	r6, #0\n\t"
        "mov	r7, #0\n\t"
        "mov	r8, #0\n\t"
        "mov	r5, #0\n\t"
        "\n1:\n\t"
        "subs	r3, r5, #124\n\t"
        "it	cc\n\t"
        "movcc	r3, r12\n\t"
        "sub	r4, r5, r3\n\t"
        "\n2:\n\t"
        "cmp	r4, r3\n\t"
        "beq	4f\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "ldr	r9, [%[a], r4]\n\t"
        "umull	r9, r10, r14, r9\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "bal	5f\n\t"
        "\n4:\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "umull	r9, r10, r14, r14\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "\n5:\n\t"
        "add	r3, r3, #4\n\t"
        "sub	r4, r4, #4\n\t"
        "cmp	r3, #128\n\t"
        "beq	3f\n\t"
        "cmp	r3, r4\n\t"
        "bgt	3f\n\t"
        "cmp	r3, r5\n\t"
        "ble	2b\n\t"
        "\n3:\n\t"
        "str	r6, [sp, r5]\n\t"
        "mov	r6, r7\n\t"
        "mov	r7, r8\n\t"
        "mov	r8, #0\n\t"
        "add	r5, r5, #4\n\t"
        "cmp	r5, #248\n\t"
        "ble	1b\n\t"
        "str	r6, [sp, r5]\n\t"
        "\n4:\n\t"
        "ldr	r6, [sp, #0]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r3, [sp, #12]\n\t"
        "str	r6, [%[r], #0]\n\t"
        "str	r7, [%[r], #4]\n\t"
        "str	r8, [%[r], #8]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "add	sp, sp, #16\n\t"
        "add	%[r], %[r], #16\n\t"
        "subs	r5, r5, #16\n\t"
        "bgt	4b\n\t"
        : [r] "+r" (r)
        : [a] "r" (a)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
    );
}

#endif /* WOLFSSL_SP_SMALL */
#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */

/* Caclulate the bottom digit of -1/a mod 2^n.
 *
 * a    A single precision number.
 * rho  Bottom word of inverse.
 */
static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
{
    sp_digit x, b;

    b = a[0];
    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */

    /* rho = -1/m mod b */
    *rho = -x;
}

/* Mul a by digit b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision digit.
 */
static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r10, #0\n\t"
        "# A[0] * B\n\t"
        "ldr	r8, [%[a]]\n\t"
        "umull	r5, r3, %[b], r8\n\t"
        "mov	r4, #0\n\t"
        "str	r5, [%[r]]\n\t"
        "mov	r5, #0\n\t"
        "mov	r9, #4\n\t"
        "1:\n\t"
        "ldr	r8, [%[a], r9]\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], r9]\n\t"
        "mov	r3, r4\n\t"
        "mov	r4, r5\n\t"
        "mov	r5, #0\n\t"
        "add	r9, r9, #4\n\t"
        "cmp	r9, #256\n\t"
        "blt	1b\n\t"
        "str	r3, [%[r], #256]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
#else
    __asm__ __volatile__ (
        "mov	r10, #0\n\t"
        "# A[0] * B\n\t"
        "ldr	r8, [%[a]]\n\t"
        "umull	r3, r4, %[b], r8\n\t"
        "mov	r5, #0\n\t"
        "str	r3, [%[r]]\n\t"
        "# A[1] * B\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #4]\n\t"
        "# A[2] * B\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #8]\n\t"
        "# A[3] * B\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #12]\n\t"
        "# A[4] * B\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #16]\n\t"
        "# A[5] * B\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #20]\n\t"
        "# A[6] * B\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #24]\n\t"
        "# A[7] * B\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #28]\n\t"
        "# A[8] * B\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #32]\n\t"
        "# A[9] * B\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #36]\n\t"
        "# A[10] * B\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #40]\n\t"
        "# A[11] * B\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #44]\n\t"
        "# A[12] * B\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #48]\n\t"
        "# A[13] * B\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #52]\n\t"
        "# A[14] * B\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #56]\n\t"
        "# A[15] * B\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #60]\n\t"
        "# A[16] * B\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #64]\n\t"
        "# A[17] * B\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #68]\n\t"
        "# A[18] * B\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #72]\n\t"
        "# A[19] * B\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #76]\n\t"
        "# A[20] * B\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #80]\n\t"
        "# A[21] * B\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #84]\n\t"
        "# A[22] * B\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #88]\n\t"
        "# A[23] * B\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #92]\n\t"
        "# A[24] * B\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #96]\n\t"
        "# A[25] * B\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #100]\n\t"
        "# A[26] * B\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #104]\n\t"
        "# A[27] * B\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #108]\n\t"
        "# A[28] * B\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #112]\n\t"
        "# A[29] * B\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #116]\n\t"
        "# A[30] * B\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #120]\n\t"
        "# A[31] * B\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #124]\n\t"
        "# A[32] * B\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #128]\n\t"
        "# A[33] * B\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #132]\n\t"
        "# A[34] * B\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #136]\n\t"
        "# A[35] * B\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #140]\n\t"
        "# A[36] * B\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #144]\n\t"
        "# A[37] * B\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #148]\n\t"
        "# A[38] * B\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #152]\n\t"
        "# A[39] * B\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #156]\n\t"
        "# A[40] * B\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #160]\n\t"
        "# A[41] * B\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #164]\n\t"
        "# A[42] * B\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #168]\n\t"
        "# A[43] * B\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #172]\n\t"
        "# A[44] * B\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #176]\n\t"
        "# A[45] * B\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #180]\n\t"
        "# A[46] * B\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #184]\n\t"
        "# A[47] * B\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #188]\n\t"
        "# A[48] * B\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #192]\n\t"
        "# A[49] * B\n\t"
        "ldr	r8, [%[a], #196]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #196]\n\t"
        "# A[50] * B\n\t"
        "ldr	r8, [%[a], #200]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #200]\n\t"
        "# A[51] * B\n\t"
        "ldr	r8, [%[a], #204]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #204]\n\t"
        "# A[52] * B\n\t"
        "ldr	r8, [%[a], #208]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #208]\n\t"
        "# A[53] * B\n\t"
        "ldr	r8, [%[a], #212]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #212]\n\t"
        "# A[54] * B\n\t"
        "ldr	r8, [%[a], #216]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #216]\n\t"
        "# A[55] * B\n\t"
        "ldr	r8, [%[a], #220]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #220]\n\t"
        "# A[56] * B\n\t"
        "ldr	r8, [%[a], #224]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #224]\n\t"
        "# A[57] * B\n\t"
        "ldr	r8, [%[a], #228]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #228]\n\t"
        "# A[58] * B\n\t"
        "ldr	r8, [%[a], #232]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #232]\n\t"
        "# A[59] * B\n\t"
        "ldr	r8, [%[a], #236]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #236]\n\t"
        "# A[60] * B\n\t"
        "ldr	r8, [%[a], #240]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #240]\n\t"
        "# A[61] * B\n\t"
        "ldr	r8, [%[a], #244]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #244]\n\t"
        "# A[62] * B\n\t"
        "ldr	r8, [%[a], #248]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #248]\n\t"
        "# A[63] * B\n\t"
        "ldr	r8, [%[a], #252]\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adc	r4, r4, r7\n\t"
        "str	r3, [%[r], #252]\n\t"
        "str	r4, [%[r], #256]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
#endif
}

#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
/* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 2048 bits, just need to subtract.
 *
 * r  A single precision number.
 * m  A signle precision number.
 */
static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
{
    XMEMSET(r, 0, sizeof(sp_digit) * 32);

    /* r = 2^n mod m */
    sp_2048_sub_in_place_32(r, m);
}

/* Conditionally subtract b from a using the mask m.
 * m is -1 to subtract and 0 when not copying.
 *
 * r  A single precision number representing condition subtract result.
 * a  A single precision number to subtract from.
 * b  A single precision number to subtract.
 * m  Mask value to apply.
 */
static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
        sp_digit m)
{
    sp_digit c = 0;

#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r9, #0\n\t"
        "mov	r8, #0\n\t"
        "1:\n\t"
        "subs	%[c], r9, %[c]\n\t"
        "ldr	r4, [%[a], r8]\n\t"
        "ldr	r5, [%[b], r8]\n\t"
        "and	r5, r5, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbc	%[c], r9, r9\n\t"
        "str	r4, [%[r], r8]\n\t"
        "add	r8, r8, #4\n\t"
        "cmp	r8, #128\n\t"
        "blt	1b\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
    );
#else
    __asm__ __volatile__ (

        "mov	r9, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r6, [%[a], #4]\n\t"
        "ldr	r5, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "subs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r6, [%[r], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r6, [%[a], #12]\n\t"
        "ldr	r5, [%[b], #8]\n\t"
        "ldr	r7, [%[b], #12]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #8]\n\t"
        "str	r6, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r6, [%[a], #20]\n\t"
        "ldr	r5, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r6, [%[r], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r6, [%[a], #28]\n\t"
        "ldr	r5, [%[b], #24]\n\t"
        "ldr	r7, [%[b], #28]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #24]\n\t"
        "str	r6, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r6, [%[a], #36]\n\t"
        "ldr	r5, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r6, [%[r], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r6, [%[a], #44]\n\t"
        "ldr	r5, [%[b], #40]\n\t"
        "ldr	r7, [%[b], #44]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #40]\n\t"
        "str	r6, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r6, [%[a], #52]\n\t"
        "ldr	r5, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r6, [%[r], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r6, [%[a], #60]\n\t"
        "ldr	r5, [%[b], #56]\n\t"
        "ldr	r7, [%[b], #60]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #56]\n\t"
        "str	r6, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r6, [%[a], #68]\n\t"
        "ldr	r5, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r6, [%[r], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r6, [%[a], #76]\n\t"
        "ldr	r5, [%[b], #72]\n\t"
        "ldr	r7, [%[b], #76]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #72]\n\t"
        "str	r6, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r6, [%[a], #84]\n\t"
        "ldr	r5, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r6, [%[r], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r6, [%[a], #92]\n\t"
        "ldr	r5, [%[b], #88]\n\t"
        "ldr	r7, [%[b], #92]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #88]\n\t"
        "str	r6, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r6, [%[a], #100]\n\t"
        "ldr	r5, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r6, [%[r], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r6, [%[a], #108]\n\t"
        "ldr	r5, [%[b], #104]\n\t"
        "ldr	r7, [%[b], #108]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #104]\n\t"
        "str	r6, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r6, [%[a], #116]\n\t"
        "ldr	r5, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r6, [%[r], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r6, [%[a], #124]\n\t"
        "ldr	r5, [%[b], #120]\n\t"
        "ldr	r7, [%[b], #124]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #120]\n\t"
        "str	r6, [%[r], #124]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
    );
#endif /* WOLFSSL_SP_SMALL */

    return c;
}

/* Reduce the number back to 2048 bits using Montgomery reduction.
 *
 * a   A single precision number to reduce in place.
 * m   The single precision number representing the modulus.
 * mp  The digit representing the negative inverse of m mod 2^n.
 */
SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    sp_digit ca = 0;

    __asm__ __volatile__ (
        "# i = 0\n\t"
        "mov	r12, #0\n\t"
        "ldr	r10, [%[a], #0]\n\t"
        "ldr	r14, [%[a], #4]\n\t"
        "\n1:\n\t"
        "# mu = a[i] * mp\n\t"
        "mul	r8, %[mp], r10\n\t"
        "# a[i+0] += m[0] * mu\n\t"
        "ldr	r7, [%[m], #0]\n\t"
        "ldr	r9, [%[a], #0]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r10, r10, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "# a[i+1] += m[1] * mu\n\t"
        "ldr	r7, [%[m], #4]\n\t"
        "ldr	r9, [%[a], #4]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r10, r14, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r10, r10, r5\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+2] += m[2] * mu\n\t"
        "ldr	r7, [%[m], #8]\n\t"
        "ldr	r14, [%[a], #8]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r14, r14, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r14, r14, r4\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+3] += m[3] * mu\n\t"
        "ldr	r7, [%[m], #12]\n\t"
        "ldr	r9, [%[a], #12]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #12]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+4] += m[4] * mu\n\t"
        "ldr	r7, [%[m], #16]\n\t"
        "ldr	r9, [%[a], #16]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #16]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+5] += m[5] * mu\n\t"
        "ldr	r7, [%[m], #20]\n\t"
        "ldr	r9, [%[a], #20]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #20]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+6] += m[6] * mu\n\t"
        "ldr	r7, [%[m], #24]\n\t"
        "ldr	r9, [%[a], #24]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #24]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+7] += m[7] * mu\n\t"
        "ldr	r7, [%[m], #28]\n\t"
        "ldr	r9, [%[a], #28]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #28]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+8] += m[8] * mu\n\t"
        "ldr	r7, [%[m], #32]\n\t"
        "ldr	r9, [%[a], #32]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #32]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+9] += m[9] * mu\n\t"
        "ldr	r7, [%[m], #36]\n\t"
        "ldr	r9, [%[a], #36]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #36]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+10] += m[10] * mu\n\t"
        "ldr	r7, [%[m], #40]\n\t"
        "ldr	r9, [%[a], #40]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #40]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+11] += m[11] * mu\n\t"
        "ldr	r7, [%[m], #44]\n\t"
        "ldr	r9, [%[a], #44]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #44]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+12] += m[12] * mu\n\t"
        "ldr	r7, [%[m], #48]\n\t"
        "ldr	r9, [%[a], #48]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #48]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+13] += m[13] * mu\n\t"
        "ldr	r7, [%[m], #52]\n\t"
        "ldr	r9, [%[a], #52]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #52]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+14] += m[14] * mu\n\t"
        "ldr	r7, [%[m], #56]\n\t"
        "ldr	r9, [%[a], #56]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #56]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+15] += m[15] * mu\n\t"
        "ldr	r7, [%[m], #60]\n\t"
        "ldr	r9, [%[a], #60]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #60]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+16] += m[16] * mu\n\t"
        "ldr	r7, [%[m], #64]\n\t"
        "ldr	r9, [%[a], #64]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #64]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+17] += m[17] * mu\n\t"
        "ldr	r7, [%[m], #68]\n\t"
        "ldr	r9, [%[a], #68]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #68]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+18] += m[18] * mu\n\t"
        "ldr	r7, [%[m], #72]\n\t"
        "ldr	r9, [%[a], #72]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #72]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+19] += m[19] * mu\n\t"
        "ldr	r7, [%[m], #76]\n\t"
        "ldr	r9, [%[a], #76]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #76]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+20] += m[20] * mu\n\t"
        "ldr	r7, [%[m], #80]\n\t"
        "ldr	r9, [%[a], #80]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #80]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+21] += m[21] * mu\n\t"
        "ldr	r7, [%[m], #84]\n\t"
        "ldr	r9, [%[a], #84]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #84]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+22] += m[22] * mu\n\t"
        "ldr	r7, [%[m], #88]\n\t"
        "ldr	r9, [%[a], #88]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #88]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+23] += m[23] * mu\n\t"
        "ldr	r7, [%[m], #92]\n\t"
        "ldr	r9, [%[a], #92]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #92]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+24] += m[24] * mu\n\t"
        "ldr	r7, [%[m], #96]\n\t"
        "ldr	r9, [%[a], #96]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #96]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+25] += m[25] * mu\n\t"
        "ldr	r7, [%[m], #100]\n\t"
        "ldr	r9, [%[a], #100]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #100]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+26] += m[26] * mu\n\t"
        "ldr	r7, [%[m], #104]\n\t"
        "ldr	r9, [%[a], #104]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #104]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+27] += m[27] * mu\n\t"
        "ldr	r7, [%[m], #108]\n\t"
        "ldr	r9, [%[a], #108]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #108]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+28] += m[28] * mu\n\t"
        "ldr	r7, [%[m], #112]\n\t"
        "ldr	r9, [%[a], #112]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #112]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+29] += m[29] * mu\n\t"
        "ldr	r7, [%[m], #116]\n\t"
        "ldr	r9, [%[a], #116]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #116]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+30] += m[30] * mu\n\t"
        "ldr	r7, [%[m], #120]\n\t"
        "ldr	r9, [%[a], #120]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #120]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+31] += m[31] * mu\n\t"
        "ldr	r7, [%[m], #124]\n\t"
        "ldr   r9, [%[a], #124]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r7, r7, %[ca]\n\t"
        "mov	%[ca], #0\n\t"
        "adc	%[ca], %[ca], %[ca]\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #124]\n\t"
        "ldr	r9, [%[a], #128]\n\t"
        "adcs	r9, r9, r7\n\t"
        "str	r9, [%[a], #128]\n\t"
        "adc	%[ca], %[ca], #0\n\t"
        "# i += 1\n\t"
        "add	%[a], %[a], #4\n\t"
        "add	r12, r12, #4\n\t"
        "cmp	r12, #128\n\t"
        "blt	1b\n\t"
        "str	r10, [%[a], #0]\n\t"
        "str	r14, [%[a], #4]\n\t"
        : [ca] "+r" (ca), [a] "+r" (a)
        : [m] "r" (m), [mp] "r" (mp)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
}

/* Multiply two Montogmery form numbers mod the modulus (prime).
 * (r = a * b mod m)
 *
 * r   Result of multiplication.
 * a   First number to multiply in Montogmery form.
 * b   Second number to multiply in Montogmery form.
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
        const sp_digit* m, sp_digit mp)
{
    sp_2048_mul_32(r, a, b);
    sp_2048_mont_reduce_32(r, m, mp);
}

/* Square the Montgomery form number. (r = a * a mod m)
 *
 * r   Result of squaring.
 * a   Number to square in Montogmery form.
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    sp_2048_sqr_32(r, a);
    sp_2048_mont_reduce_32(r, m, mp);
}

/* Mul a by digit b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision digit.
 */
static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r10, #0\n\t"
        "# A[0] * B\n\t"
        "ldr	r8, [%[a]]\n\t"
        "umull	r5, r3, %[b], r8\n\t"
        "mov	r4, #0\n\t"
        "str	r5, [%[r]]\n\t"
        "mov	r5, #0\n\t"
        "mov	r9, #4\n\t"
        "1:\n\t"
        "ldr	r8, [%[a], r9]\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], r9]\n\t"
        "mov	r3, r4\n\t"
        "mov	r4, r5\n\t"
        "mov	r5, #0\n\t"
        "add	r9, r9, #4\n\t"
        "cmp	r9, #128\n\t"
        "blt	1b\n\t"
        "str	r3, [%[r], #128]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
#else
    __asm__ __volatile__ (
        "mov	r10, #0\n\t"
        "# A[0] * B\n\t"
        "ldr	r8, [%[a]]\n\t"
        "umull	r3, r4, %[b], r8\n\t"
        "mov	r5, #0\n\t"
        "str	r3, [%[r]]\n\t"
        "# A[1] * B\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #4]\n\t"
        "# A[2] * B\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #8]\n\t"
        "# A[3] * B\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #12]\n\t"
        "# A[4] * B\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #16]\n\t"
        "# A[5] * B\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #20]\n\t"
        "# A[6] * B\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #24]\n\t"
        "# A[7] * B\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #28]\n\t"
        "# A[8] * B\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #32]\n\t"
        "# A[9] * B\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #36]\n\t"
        "# A[10] * B\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #40]\n\t"
        "# A[11] * B\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #44]\n\t"
        "# A[12] * B\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #48]\n\t"
        "# A[13] * B\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #52]\n\t"
        "# A[14] * B\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #56]\n\t"
        "# A[15] * B\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #60]\n\t"
        "# A[16] * B\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #64]\n\t"
        "# A[17] * B\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #68]\n\t"
        "# A[18] * B\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #72]\n\t"
        "# A[19] * B\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #76]\n\t"
        "# A[20] * B\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #80]\n\t"
        "# A[21] * B\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #84]\n\t"
        "# A[22] * B\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #88]\n\t"
        "# A[23] * B\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #92]\n\t"
        "# A[24] * B\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #96]\n\t"
        "# A[25] * B\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #100]\n\t"
        "# A[26] * B\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #104]\n\t"
        "# A[27] * B\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #108]\n\t"
        "# A[28] * B\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #112]\n\t"
        "# A[29] * B\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #116]\n\t"
        "# A[30] * B\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #120]\n\t"
        "# A[31] * B\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adc	r5, r5, r7\n\t"
        "str	r4, [%[r], #124]\n\t"
        "str	r5, [%[r], #128]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
#endif
}

/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
 *
 * d1   The high order half of the number to divide.
 * d0   The low order half of the number to divide.
 * div  The dividend.
 * returns the result of the division.
 *
 * Note that this is an approximate div. It may give an answer 1 larger.
 */
static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
{
    sp_digit r = 0;

    __asm__ __volatile__ (
        "lsr	r5, %[div], #1\n\t"
        "add	r5, r5, #1\n\t"
        "mov	r6, %[d0]\n\t"
        "mov	r7, %[d1]\n\t"
        "# Do top 32\n\t"
        "subs	r8, r5, r7\n\t"
        "sbc	r8, r8, r8\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "sub	%[r], %[r], r8\n\t"
        "and	r8, r8, r5\n\t"
        "subs	r7, r7, r8\n\t"
        "# Next 30 bits\n\t"
        "mov	r4, #29\n\t"
        "1:\n\t"
        "movs	r6, r6, lsl #1\n\t"
        "adc	r7, r7, r7\n\t"
        "subs	r8, r5, r7\n\t"
        "sbc	r8, r8, r8\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "sub	%[r], %[r], r8\n\t"
        "and	r8, r8, r5\n\t"
        "subs	r7, r7, r8\n\t"
        "subs	r4, r4, #1\n\t"
        "bpl	1b\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "add	%[r], %[r], #1\n\t"
        "umull	r4, r5, %[r], %[div]\n\t"
        "subs	r4, %[d0], r4\n\t"
        "sbc	r5, %[d1], r5\n\t"
        "add	%[r], %[r], r5\n\t"
        "umull	r4, r5, %[r], %[div]\n\t"
        "subs	r4, %[d0], r4\n\t"
        "sbc	r5, %[d1], r5\n\t"
        "add	%[r], %[r], r5\n\t"
        "subs	r8, %[div], r4\n\t"
        "sbc	r8, r8, r8\n\t"
        "sub	%[r], %[r], r8\n\t"
        : [r] "+r" (r)
        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
        : "r4", "r5", "r6", "r7", "r8"
    );
    return r;
}

/* Compare a with b in constant time.
 *
 * a  A single precision integer.
 * b  A single precision integer.
 * return -ve, 0 or +ve if a is less than, equal to or greater than b
 * respectively.
 */
static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
{
    sp_digit r = -1;
    sp_digit one = 1;


#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r7, #0\n\t"
        "mov	r3, #-1\n\t"
        "mov	r6, #124\n\t"
        "1:\n\t"
        "ldr	r4, [%[a], r6]\n\t"
        "ldr	r5, [%[b], r6]\n\t"
        "and	r4, r4, r3\n\t"
        "and	r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "subs	r6, r6, #4\n\t"
        "bcs	1b\n\t"
        "eor	%[r], %[r], r3\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
        : "r3", "r4", "r5", "r6", "r7"
    );
#else
    __asm__ __volatile__ (
        "mov	r7, #0\n\t"
        "mov	r3, #-1\n\t"
        "ldr		r4, [%[a], #124]\n\t"
        "ldr		r5, [%[b], #124]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #120]\n\t"
        "ldr		r5, [%[b], #120]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #116]\n\t"
        "ldr		r5, [%[b], #116]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #112]\n\t"
        "ldr		r5, [%[b], #112]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #108]\n\t"
        "ldr		r5, [%[b], #108]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #104]\n\t"
        "ldr		r5, [%[b], #104]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #100]\n\t"
        "ldr		r5, [%[b], #100]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #96]\n\t"
        "ldr		r5, [%[b], #96]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #92]\n\t"
        "ldr		r5, [%[b], #92]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #88]\n\t"
        "ldr		r5, [%[b], #88]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #84]\n\t"
        "ldr		r5, [%[b], #84]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #80]\n\t"
        "ldr		r5, [%[b], #80]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #76]\n\t"
        "ldr		r5, [%[b], #76]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #72]\n\t"
        "ldr		r5, [%[b], #72]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #68]\n\t"
        "ldr		r5, [%[b], #68]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #64]\n\t"
        "ldr		r5, [%[b], #64]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #60]\n\t"
        "ldr		r5, [%[b], #60]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #56]\n\t"
        "ldr		r5, [%[b], #56]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #52]\n\t"
        "ldr		r5, [%[b], #52]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #48]\n\t"
        "ldr		r5, [%[b], #48]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #44]\n\t"
        "ldr		r5, [%[b], #44]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #40]\n\t"
        "ldr		r5, [%[b], #40]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #36]\n\t"
        "ldr		r5, [%[b], #36]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #32]\n\t"
        "ldr		r5, [%[b], #32]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #28]\n\t"
        "ldr		r5, [%[b], #28]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #24]\n\t"
        "ldr		r5, [%[b], #24]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #20]\n\t"
        "ldr		r5, [%[b], #20]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #16]\n\t"
        "ldr		r5, [%[b], #16]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #12]\n\t"
        "ldr		r5, [%[b], #12]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #8]\n\t"
        "ldr		r5, [%[b], #8]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #4]\n\t"
        "ldr		r5, [%[b], #4]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #0]\n\t"
        "ldr		r5, [%[b], #0]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "eor	%[r], %[r], r3\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
        : "r3", "r4", "r5", "r6", "r7"
    );
#endif

    return r;
}

/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Nmber to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
        sp_digit* r)
{
    sp_digit t1[64], t2[33];
    sp_digit div, r1;
    int i;

    (void)m;


    div = d[31];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
    for (i=31; i>=0; i--) {
        r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);

        sp_2048_mul_d_32(t2, d, r1);
        t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
        t1[32 + i] -= t2[32];
        sp_2048_mask_32(t2, d, t1[32 + i]);
        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
        sp_2048_mask_32(t2, d, t1[32 + i]);
        t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
    }

    r1 = sp_2048_cmp_32(t1, d) >= 0;
    sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    return sp_2048_div_32(a, m, NULL, r);
}

#ifdef WOLFSSL_SP_SMALL
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit t[16][64];
#else
    sp_digit* t[16];
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        for (i=0; i<16; i++) {
            t[i] = td + i * 64;
        }
#endif
        norm = t[0];

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_32(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
        if (reduceA != 0) {
            err = sp_2048_mod_32(t[1] + 32, a, m);
            if (err == MP_OKAY) {
                err = sp_2048_mod_32(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
            err = sp_2048_mod_32(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 4;
        if (c == 32) {
            c = 28;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
        for (; i>=0 || c>=4; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 28;
                n <<= 4;
                c = 28;
            }
            else if (c < 4) {
                y = n >> 28;
                n = e[i--];
                c = 4 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 28) & 0xf;
                n <<= 4;
                c -= 4;
            }

            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);

            sp_2048_mont_mul_32(r, r, t[y], m, mp);
        }

        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
        sp_2048_mont_reduce_32(r, m, mp);

        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
        sp_2048_cond_sub_32(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#else
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit t[32][64];
#else
    sp_digit* t[32];
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        for (i=0; i<32; i++) {
            t[i] = td + i * 64;
        }
#endif
        norm = t[0];

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_32(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
        if (reduceA != 0) {
            err = sp_2048_mod_32(t[1] + 32, a, m);
            if (err == MP_OKAY) {
                err = sp_2048_mod_32(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
            err = sp_2048_mod_32(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
        sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
        sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
        sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
        sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
        sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
        sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
        sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
        sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
        sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
        sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
        sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
        sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
        sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
        sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
        sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
        sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
        sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
        sp_2048_mont_sqr_32(t[20], t[10], m, mp);
        sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
        sp_2048_mont_sqr_32(t[22], t[11], m, mp);
        sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
        sp_2048_mont_sqr_32(t[24], t[12], m, mp);
        sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
        sp_2048_mont_sqr_32(t[26], t[13], m, mp);
        sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
        sp_2048_mont_sqr_32(t[28], t[14], m, mp);
        sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
        sp_2048_mont_sqr_32(t[30], t[15], m, mp);
        sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 5;
        if (c == 32) {
            c = 27;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
        for (; i>=0 || c>=5; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 27;
                n <<= 5;
                c = 27;
            }
            else if (c < 5) {
                y = n >> 27;
                n = e[i--];
                c = 5 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 27) & 0x1f;
                n <<= 5;
                c -= 5;
            }

            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);
            sp_2048_mont_sqr_32(r, r, m, mp);

            sp_2048_mont_mul_32(r, r, t[y], m, mp);
        }

        XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
        sp_2048_mont_reduce_32(r, m, mp);

        mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
        sp_2048_cond_sub_32(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#endif /* WOLFSSL_SP_SMALL */

#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */

#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
/* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 2048 bits, just need to subtract.
 *
 * r  A single precision number.
 * m  A signle precision number.
 */
static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
{
    XMEMSET(r, 0, sizeof(sp_digit) * 64);

    /* r = 2^n mod m */
    sp_2048_sub_in_place_64(r, m);
}

#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
/* Conditionally subtract b from a using the mask m.
 * m is -1 to subtract and 0 when not copying.
 *
 * r  A single precision number representing condition subtract result.
 * a  A single precision number to subtract from.
 * b  A single precision number to subtract.
 * m  Mask value to apply.
 */
static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
        sp_digit m)
{
    sp_digit c = 0;

#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r9, #0\n\t"
        "mov	r8, #0\n\t"
        "1:\n\t"
        "subs	%[c], r9, %[c]\n\t"
        "ldr	r4, [%[a], r8]\n\t"
        "ldr	r5, [%[b], r8]\n\t"
        "and	r5, r5, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbc	%[c], r9, r9\n\t"
        "str	r4, [%[r], r8]\n\t"
        "add	r8, r8, #4\n\t"
        "cmp	r8, #256\n\t"
        "blt	1b\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
    );
#else
    __asm__ __volatile__ (

        "mov	r9, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r6, [%[a], #4]\n\t"
        "ldr	r5, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "subs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r6, [%[r], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r6, [%[a], #12]\n\t"
        "ldr	r5, [%[b], #8]\n\t"
        "ldr	r7, [%[b], #12]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #8]\n\t"
        "str	r6, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r6, [%[a], #20]\n\t"
        "ldr	r5, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r6, [%[r], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r6, [%[a], #28]\n\t"
        "ldr	r5, [%[b], #24]\n\t"
        "ldr	r7, [%[b], #28]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #24]\n\t"
        "str	r6, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r6, [%[a], #36]\n\t"
        "ldr	r5, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r6, [%[r], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r6, [%[a], #44]\n\t"
        "ldr	r5, [%[b], #40]\n\t"
        "ldr	r7, [%[b], #44]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #40]\n\t"
        "str	r6, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r6, [%[a], #52]\n\t"
        "ldr	r5, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r6, [%[r], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r6, [%[a], #60]\n\t"
        "ldr	r5, [%[b], #56]\n\t"
        "ldr	r7, [%[b], #60]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #56]\n\t"
        "str	r6, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r6, [%[a], #68]\n\t"
        "ldr	r5, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r6, [%[r], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r6, [%[a], #76]\n\t"
        "ldr	r5, [%[b], #72]\n\t"
        "ldr	r7, [%[b], #76]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #72]\n\t"
        "str	r6, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r6, [%[a], #84]\n\t"
        "ldr	r5, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r6, [%[r], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r6, [%[a], #92]\n\t"
        "ldr	r5, [%[b], #88]\n\t"
        "ldr	r7, [%[b], #92]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #88]\n\t"
        "str	r6, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r6, [%[a], #100]\n\t"
        "ldr	r5, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r6, [%[r], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r6, [%[a], #108]\n\t"
        "ldr	r5, [%[b], #104]\n\t"
        "ldr	r7, [%[b], #108]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #104]\n\t"
        "str	r6, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r6, [%[a], #116]\n\t"
        "ldr	r5, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r6, [%[r], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r6, [%[a], #124]\n\t"
        "ldr	r5, [%[b], #120]\n\t"
        "ldr	r7, [%[b], #124]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #120]\n\t"
        "str	r6, [%[r], #124]\n\t"
        "ldr	r4, [%[a], #128]\n\t"
        "ldr	r6, [%[a], #132]\n\t"
        "ldr	r5, [%[b], #128]\n\t"
        "ldr	r7, [%[b], #132]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #128]\n\t"
        "str	r6, [%[r], #132]\n\t"
        "ldr	r4, [%[a], #136]\n\t"
        "ldr	r6, [%[a], #140]\n\t"
        "ldr	r5, [%[b], #136]\n\t"
        "ldr	r7, [%[b], #140]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #136]\n\t"
        "str	r6, [%[r], #140]\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "ldr	r6, [%[a], #148]\n\t"
        "ldr	r5, [%[b], #144]\n\t"
        "ldr	r7, [%[b], #148]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #144]\n\t"
        "str	r6, [%[r], #148]\n\t"
        "ldr	r4, [%[a], #152]\n\t"
        "ldr	r6, [%[a], #156]\n\t"
        "ldr	r5, [%[b], #152]\n\t"
        "ldr	r7, [%[b], #156]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #152]\n\t"
        "str	r6, [%[r], #156]\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "ldr	r6, [%[a], #164]\n\t"
        "ldr	r5, [%[b], #160]\n\t"
        "ldr	r7, [%[b], #164]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #160]\n\t"
        "str	r6, [%[r], #164]\n\t"
        "ldr	r4, [%[a], #168]\n\t"
        "ldr	r6, [%[a], #172]\n\t"
        "ldr	r5, [%[b], #168]\n\t"
        "ldr	r7, [%[b], #172]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #168]\n\t"
        "str	r6, [%[r], #172]\n\t"
        "ldr	r4, [%[a], #176]\n\t"
        "ldr	r6, [%[a], #180]\n\t"
        "ldr	r5, [%[b], #176]\n\t"
        "ldr	r7, [%[b], #180]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #176]\n\t"
        "str	r6, [%[r], #180]\n\t"
        "ldr	r4, [%[a], #184]\n\t"
        "ldr	r6, [%[a], #188]\n\t"
        "ldr	r5, [%[b], #184]\n\t"
        "ldr	r7, [%[b], #188]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #184]\n\t"
        "str	r6, [%[r], #188]\n\t"
        "ldr	r4, [%[a], #192]\n\t"
        "ldr	r6, [%[a], #196]\n\t"
        "ldr	r5, [%[b], #192]\n\t"
        "ldr	r7, [%[b], #196]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #192]\n\t"
        "str	r6, [%[r], #196]\n\t"
        "ldr	r4, [%[a], #200]\n\t"
        "ldr	r6, [%[a], #204]\n\t"
        "ldr	r5, [%[b], #200]\n\t"
        "ldr	r7, [%[b], #204]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #200]\n\t"
        "str	r6, [%[r], #204]\n\t"
        "ldr	r4, [%[a], #208]\n\t"
        "ldr	r6, [%[a], #212]\n\t"
        "ldr	r5, [%[b], #208]\n\t"
        "ldr	r7, [%[b], #212]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #208]\n\t"
        "str	r6, [%[r], #212]\n\t"
        "ldr	r4, [%[a], #216]\n\t"
        "ldr	r6, [%[a], #220]\n\t"
        "ldr	r5, [%[b], #216]\n\t"
        "ldr	r7, [%[b], #220]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #216]\n\t"
        "str	r6, [%[r], #220]\n\t"
        "ldr	r4, [%[a], #224]\n\t"
        "ldr	r6, [%[a], #228]\n\t"
        "ldr	r5, [%[b], #224]\n\t"
        "ldr	r7, [%[b], #228]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #224]\n\t"
        "str	r6, [%[r], #228]\n\t"
        "ldr	r4, [%[a], #232]\n\t"
        "ldr	r6, [%[a], #236]\n\t"
        "ldr	r5, [%[b], #232]\n\t"
        "ldr	r7, [%[b], #236]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #232]\n\t"
        "str	r6, [%[r], #236]\n\t"
        "ldr	r4, [%[a], #240]\n\t"
        "ldr	r6, [%[a], #244]\n\t"
        "ldr	r5, [%[b], #240]\n\t"
        "ldr	r7, [%[b], #244]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #240]\n\t"
        "str	r6, [%[r], #244]\n\t"
        "ldr	r4, [%[a], #248]\n\t"
        "ldr	r6, [%[a], #252]\n\t"
        "ldr	r5, [%[b], #248]\n\t"
        "ldr	r7, [%[b], #252]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #248]\n\t"
        "str	r6, [%[r], #252]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
    );
#endif /* WOLFSSL_SP_SMALL */

    return c;
}

/* Reduce the number back to 2048 bits using Montgomery reduction.
 *
 * a   A single precision number to reduce in place.
 * m   The single precision number representing the modulus.
 * mp  The digit representing the negative inverse of m mod 2^n.
 */
SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    sp_digit ca = 0;

    __asm__ __volatile__ (
        "# i = 0\n\t"
        "mov	r12, #0\n\t"
        "ldr	r10, [%[a], #0]\n\t"
        "ldr	r14, [%[a], #4]\n\t"
        "\n1:\n\t"
        "# mu = a[i] * mp\n\t"
        "mul	r8, %[mp], r10\n\t"
        "# a[i+0] += m[0] * mu\n\t"
        "ldr	r7, [%[m], #0]\n\t"
        "ldr	r9, [%[a], #0]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r10, r10, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "# a[i+1] += m[1] * mu\n\t"
        "ldr	r7, [%[m], #4]\n\t"
        "ldr	r9, [%[a], #4]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r10, r14, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r10, r10, r5\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+2] += m[2] * mu\n\t"
        "ldr	r7, [%[m], #8]\n\t"
        "ldr	r14, [%[a], #8]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r14, r14, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r14, r14, r4\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+3] += m[3] * mu\n\t"
        "ldr	r7, [%[m], #12]\n\t"
        "ldr	r9, [%[a], #12]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #12]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+4] += m[4] * mu\n\t"
        "ldr	r7, [%[m], #16]\n\t"
        "ldr	r9, [%[a], #16]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #16]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+5] += m[5] * mu\n\t"
        "ldr	r7, [%[m], #20]\n\t"
        "ldr	r9, [%[a], #20]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #20]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+6] += m[6] * mu\n\t"
        "ldr	r7, [%[m], #24]\n\t"
        "ldr	r9, [%[a], #24]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #24]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+7] += m[7] * mu\n\t"
        "ldr	r7, [%[m], #28]\n\t"
        "ldr	r9, [%[a], #28]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #28]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+8] += m[8] * mu\n\t"
        "ldr	r7, [%[m], #32]\n\t"
        "ldr	r9, [%[a], #32]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #32]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+9] += m[9] * mu\n\t"
        "ldr	r7, [%[m], #36]\n\t"
        "ldr	r9, [%[a], #36]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #36]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+10] += m[10] * mu\n\t"
        "ldr	r7, [%[m], #40]\n\t"
        "ldr	r9, [%[a], #40]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #40]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+11] += m[11] * mu\n\t"
        "ldr	r7, [%[m], #44]\n\t"
        "ldr	r9, [%[a], #44]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #44]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+12] += m[12] * mu\n\t"
        "ldr	r7, [%[m], #48]\n\t"
        "ldr	r9, [%[a], #48]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #48]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+13] += m[13] * mu\n\t"
        "ldr	r7, [%[m], #52]\n\t"
        "ldr	r9, [%[a], #52]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #52]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+14] += m[14] * mu\n\t"
        "ldr	r7, [%[m], #56]\n\t"
        "ldr	r9, [%[a], #56]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #56]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+15] += m[15] * mu\n\t"
        "ldr	r7, [%[m], #60]\n\t"
        "ldr	r9, [%[a], #60]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #60]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+16] += m[16] * mu\n\t"
        "ldr	r7, [%[m], #64]\n\t"
        "ldr	r9, [%[a], #64]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #64]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+17] += m[17] * mu\n\t"
        "ldr	r7, [%[m], #68]\n\t"
        "ldr	r9, [%[a], #68]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #68]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+18] += m[18] * mu\n\t"
        "ldr	r7, [%[m], #72]\n\t"
        "ldr	r9, [%[a], #72]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #72]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+19] += m[19] * mu\n\t"
        "ldr	r7, [%[m], #76]\n\t"
        "ldr	r9, [%[a], #76]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #76]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+20] += m[20] * mu\n\t"
        "ldr	r7, [%[m], #80]\n\t"
        "ldr	r9, [%[a], #80]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #80]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+21] += m[21] * mu\n\t"
        "ldr	r7, [%[m], #84]\n\t"
        "ldr	r9, [%[a], #84]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #84]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+22] += m[22] * mu\n\t"
        "ldr	r7, [%[m], #88]\n\t"
        "ldr	r9, [%[a], #88]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #88]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+23] += m[23] * mu\n\t"
        "ldr	r7, [%[m], #92]\n\t"
        "ldr	r9, [%[a], #92]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #92]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+24] += m[24] * mu\n\t"
        "ldr	r7, [%[m], #96]\n\t"
        "ldr	r9, [%[a], #96]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #96]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+25] += m[25] * mu\n\t"
        "ldr	r7, [%[m], #100]\n\t"
        "ldr	r9, [%[a], #100]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #100]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+26] += m[26] * mu\n\t"
        "ldr	r7, [%[m], #104]\n\t"
        "ldr	r9, [%[a], #104]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #104]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+27] += m[27] * mu\n\t"
        "ldr	r7, [%[m], #108]\n\t"
        "ldr	r9, [%[a], #108]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #108]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+28] += m[28] * mu\n\t"
        "ldr	r7, [%[m], #112]\n\t"
        "ldr	r9, [%[a], #112]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #112]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+29] += m[29] * mu\n\t"
        "ldr	r7, [%[m], #116]\n\t"
        "ldr	r9, [%[a], #116]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #116]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+30] += m[30] * mu\n\t"
        "ldr	r7, [%[m], #120]\n\t"
        "ldr	r9, [%[a], #120]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #120]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+31] += m[31] * mu\n\t"
        "ldr	r7, [%[m], #124]\n\t"
        "ldr	r9, [%[a], #124]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #124]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+32] += m[32] * mu\n\t"
        "ldr	r7, [%[m], #128]\n\t"
        "ldr	r9, [%[a], #128]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #128]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+33] += m[33] * mu\n\t"
        "ldr	r7, [%[m], #132]\n\t"
        "ldr	r9, [%[a], #132]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #132]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+34] += m[34] * mu\n\t"
        "ldr	r7, [%[m], #136]\n\t"
        "ldr	r9, [%[a], #136]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #136]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+35] += m[35] * mu\n\t"
        "ldr	r7, [%[m], #140]\n\t"
        "ldr	r9, [%[a], #140]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #140]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+36] += m[36] * mu\n\t"
        "ldr	r7, [%[m], #144]\n\t"
        "ldr	r9, [%[a], #144]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #144]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+37] += m[37] * mu\n\t"
        "ldr	r7, [%[m], #148]\n\t"
        "ldr	r9, [%[a], #148]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #148]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+38] += m[38] * mu\n\t"
        "ldr	r7, [%[m], #152]\n\t"
        "ldr	r9, [%[a], #152]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #152]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+39] += m[39] * mu\n\t"
        "ldr	r7, [%[m], #156]\n\t"
        "ldr	r9, [%[a], #156]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #156]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+40] += m[40] * mu\n\t"
        "ldr	r7, [%[m], #160]\n\t"
        "ldr	r9, [%[a], #160]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #160]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+41] += m[41] * mu\n\t"
        "ldr	r7, [%[m], #164]\n\t"
        "ldr	r9, [%[a], #164]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #164]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+42] += m[42] * mu\n\t"
        "ldr	r7, [%[m], #168]\n\t"
        "ldr	r9, [%[a], #168]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #168]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+43] += m[43] * mu\n\t"
        "ldr	r7, [%[m], #172]\n\t"
        "ldr	r9, [%[a], #172]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #172]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+44] += m[44] * mu\n\t"
        "ldr	r7, [%[m], #176]\n\t"
        "ldr	r9, [%[a], #176]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #176]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+45] += m[45] * mu\n\t"
        "ldr	r7, [%[m], #180]\n\t"
        "ldr	r9, [%[a], #180]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #180]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+46] += m[46] * mu\n\t"
        "ldr	r7, [%[m], #184]\n\t"
        "ldr	r9, [%[a], #184]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #184]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+47] += m[47] * mu\n\t"
        "ldr	r7, [%[m], #188]\n\t"
        "ldr	r9, [%[a], #188]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #188]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+48] += m[48] * mu\n\t"
        "ldr	r7, [%[m], #192]\n\t"
        "ldr	r9, [%[a], #192]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #192]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+49] += m[49] * mu\n\t"
        "ldr	r7, [%[m], #196]\n\t"
        "ldr	r9, [%[a], #196]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #196]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+50] += m[50] * mu\n\t"
        "ldr	r7, [%[m], #200]\n\t"
        "ldr	r9, [%[a], #200]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #200]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+51] += m[51] * mu\n\t"
        "ldr	r7, [%[m], #204]\n\t"
        "ldr	r9, [%[a], #204]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #204]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+52] += m[52] * mu\n\t"
        "ldr	r7, [%[m], #208]\n\t"
        "ldr	r9, [%[a], #208]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #208]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+53] += m[53] * mu\n\t"
        "ldr	r7, [%[m], #212]\n\t"
        "ldr	r9, [%[a], #212]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #212]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+54] += m[54] * mu\n\t"
        "ldr	r7, [%[m], #216]\n\t"
        "ldr	r9, [%[a], #216]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #216]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+55] += m[55] * mu\n\t"
        "ldr	r7, [%[m], #220]\n\t"
        "ldr	r9, [%[a], #220]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #220]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+56] += m[56] * mu\n\t"
        "ldr	r7, [%[m], #224]\n\t"
        "ldr	r9, [%[a], #224]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #224]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+57] += m[57] * mu\n\t"
        "ldr	r7, [%[m], #228]\n\t"
        "ldr	r9, [%[a], #228]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #228]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+58] += m[58] * mu\n\t"
        "ldr	r7, [%[m], #232]\n\t"
        "ldr	r9, [%[a], #232]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #232]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+59] += m[59] * mu\n\t"
        "ldr	r7, [%[m], #236]\n\t"
        "ldr	r9, [%[a], #236]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #236]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+60] += m[60] * mu\n\t"
        "ldr	r7, [%[m], #240]\n\t"
        "ldr	r9, [%[a], #240]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #240]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+61] += m[61] * mu\n\t"
        "ldr	r7, [%[m], #244]\n\t"
        "ldr	r9, [%[a], #244]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #244]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+62] += m[62] * mu\n\t"
        "ldr	r7, [%[m], #248]\n\t"
        "ldr	r9, [%[a], #248]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #248]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+63] += m[63] * mu\n\t"
        "ldr	r7, [%[m], #252]\n\t"
        "ldr   r9, [%[a], #252]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r7, r7, %[ca]\n\t"
        "mov	%[ca], #0\n\t"
        "adc	%[ca], %[ca], %[ca]\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #252]\n\t"
        "ldr	r9, [%[a], #256]\n\t"
        "adcs	r9, r9, r7\n\t"
        "str	r9, [%[a], #256]\n\t"
        "adc	%[ca], %[ca], #0\n\t"
        "# i += 1\n\t"
        "add	%[a], %[a], #4\n\t"
        "add	r12, r12, #4\n\t"
        "cmp	r12, #256\n\t"
        "blt	1b\n\t"
        "str	r10, [%[a], #0]\n\t"
        "str	r14, [%[a], #4]\n\t"
        : [ca] "+r" (ca), [a] "+r" (a)
        : [m] "r" (m), [mp] "r" (mp)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
}

/* Multiply two Montogmery form numbers mod the modulus (prime).
 * (r = a * b mod m)
 *
 * r   Result of multiplication.
 * a   First number to multiply in Montogmery form.
 * b   Second number to multiply in Montogmery form.
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
        const sp_digit* m, sp_digit mp)
{
    sp_2048_mul_64(r, a, b);
    sp_2048_mont_reduce_64(r, m, mp);
}

/* Square the Montgomery form number. (r = a * a mod m)
 *
 * r   Result of squaring.
 * a   Number to square in Montogmery form.
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    sp_2048_sqr_64(r, a);
    sp_2048_mont_reduce_64(r, m, mp);
}

#ifndef WOLFSSL_RSA_PUBLIC_ONLY
/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
 *
 * d1   The high order half of the number to divide.
 * d0   The low order half of the number to divide.
 * div  The dividend.
 * returns the result of the division.
 *
 * Note that this is an approximate div. It may give an answer 1 larger.
 */
static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div)
{
    sp_digit r = 0;

    __asm__ __volatile__ (
        "lsr	r5, %[div], #1\n\t"
        "add	r5, r5, #1\n\t"
        "mov	r6, %[d0]\n\t"
        "mov	r7, %[d1]\n\t"
        "# Do top 32\n\t"
        "subs	r8, r5, r7\n\t"
        "sbc	r8, r8, r8\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "sub	%[r], %[r], r8\n\t"
        "and	r8, r8, r5\n\t"
        "subs	r7, r7, r8\n\t"
        "# Next 30 bits\n\t"
        "mov	r4, #29\n\t"
        "1:\n\t"
        "movs	r6, r6, lsl #1\n\t"
        "adc	r7, r7, r7\n\t"
        "subs	r8, r5, r7\n\t"
        "sbc	r8, r8, r8\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "sub	%[r], %[r], r8\n\t"
        "and	r8, r8, r5\n\t"
        "subs	r7, r7, r8\n\t"
        "subs	r4, r4, #1\n\t"
        "bpl	1b\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "add	%[r], %[r], #1\n\t"
        "umull	r4, r5, %[r], %[div]\n\t"
        "subs	r4, %[d0], r4\n\t"
        "sbc	r5, %[d1], r5\n\t"
        "add	%[r], %[r], r5\n\t"
        "umull	r4, r5, %[r], %[div]\n\t"
        "subs	r4, %[d0], r4\n\t"
        "sbc	r5, %[d1], r5\n\t"
        "add	%[r], %[r], r5\n\t"
        "subs	r8, %[div], r4\n\t"
        "sbc	r8, r8, r8\n\t"
        "sub	%[r], %[r], r8\n\t"
        : [r] "+r" (r)
        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
        : "r4", "r5", "r6", "r7", "r8"
    );
    return r;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<64; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 64; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Compare a with b in constant time.
 *
 * a  A single precision integer.
 * b  A single precision integer.
 * return -ve, 0 or +ve if a is less than, equal to or greater than b
 * respectively.
 */
static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
{
    sp_digit r = -1;
    sp_digit one = 1;


#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r7, #0\n\t"
        "mov	r3, #-1\n\t"
        "mov	r6, #252\n\t"
        "1:\n\t"
        "ldr	r4, [%[a], r6]\n\t"
        "ldr	r5, [%[b], r6]\n\t"
        "and	r4, r4, r3\n\t"
        "and	r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "subs	r6, r6, #4\n\t"
        "bcs	1b\n\t"
        "eor	%[r], %[r], r3\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
        : "r3", "r4", "r5", "r6", "r7"
    );
#else
    __asm__ __volatile__ (
        "mov	r7, #0\n\t"
        "mov	r3, #-1\n\t"
        "ldr		r4, [%[a], #252]\n\t"
        "ldr		r5, [%[b], #252]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #248]\n\t"
        "ldr		r5, [%[b], #248]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #244]\n\t"
        "ldr		r5, [%[b], #244]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #240]\n\t"
        "ldr		r5, [%[b], #240]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #236]\n\t"
        "ldr		r5, [%[b], #236]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #232]\n\t"
        "ldr		r5, [%[b], #232]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #228]\n\t"
        "ldr		r5, [%[b], #228]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #224]\n\t"
        "ldr		r5, [%[b], #224]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #220]\n\t"
        "ldr		r5, [%[b], #220]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #216]\n\t"
        "ldr		r5, [%[b], #216]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #212]\n\t"
        "ldr		r5, [%[b], #212]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #208]\n\t"
        "ldr		r5, [%[b], #208]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #204]\n\t"
        "ldr		r5, [%[b], #204]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #200]\n\t"
        "ldr		r5, [%[b], #200]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #196]\n\t"
        "ldr		r5, [%[b], #196]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #192]\n\t"
        "ldr		r5, [%[b], #192]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #188]\n\t"
        "ldr		r5, [%[b], #188]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #184]\n\t"
        "ldr		r5, [%[b], #184]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #180]\n\t"
        "ldr		r5, [%[b], #180]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #176]\n\t"
        "ldr		r5, [%[b], #176]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #172]\n\t"
        "ldr		r5, [%[b], #172]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #168]\n\t"
        "ldr		r5, [%[b], #168]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #164]\n\t"
        "ldr		r5, [%[b], #164]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #160]\n\t"
        "ldr		r5, [%[b], #160]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #156]\n\t"
        "ldr		r5, [%[b], #156]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #152]\n\t"
        "ldr		r5, [%[b], #152]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #148]\n\t"
        "ldr		r5, [%[b], #148]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #144]\n\t"
        "ldr		r5, [%[b], #144]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #140]\n\t"
        "ldr		r5, [%[b], #140]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #136]\n\t"
        "ldr		r5, [%[b], #136]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #132]\n\t"
        "ldr		r5, [%[b], #132]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #128]\n\t"
        "ldr		r5, [%[b], #128]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #124]\n\t"
        "ldr		r5, [%[b], #124]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #120]\n\t"
        "ldr		r5, [%[b], #120]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #116]\n\t"
        "ldr		r5, [%[b], #116]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #112]\n\t"
        "ldr		r5, [%[b], #112]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #108]\n\t"
        "ldr		r5, [%[b], #108]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #104]\n\t"
        "ldr		r5, [%[b], #104]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #100]\n\t"
        "ldr		r5, [%[b], #100]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #96]\n\t"
        "ldr		r5, [%[b], #96]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #92]\n\t"
        "ldr		r5, [%[b], #92]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #88]\n\t"
        "ldr		r5, [%[b], #88]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #84]\n\t"
        "ldr		r5, [%[b], #84]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #80]\n\t"
        "ldr		r5, [%[b], #80]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #76]\n\t"
        "ldr		r5, [%[b], #76]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #72]\n\t"
        "ldr		r5, [%[b], #72]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #68]\n\t"
        "ldr		r5, [%[b], #68]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #64]\n\t"
        "ldr		r5, [%[b], #64]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #60]\n\t"
        "ldr		r5, [%[b], #60]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #56]\n\t"
        "ldr		r5, [%[b], #56]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #52]\n\t"
        "ldr		r5, [%[b], #52]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #48]\n\t"
        "ldr		r5, [%[b], #48]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #44]\n\t"
        "ldr		r5, [%[b], #44]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #40]\n\t"
        "ldr		r5, [%[b], #40]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #36]\n\t"
        "ldr		r5, [%[b], #36]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #32]\n\t"
        "ldr		r5, [%[b], #32]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #28]\n\t"
        "ldr		r5, [%[b], #28]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #24]\n\t"
        "ldr		r5, [%[b], #24]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #20]\n\t"
        "ldr		r5, [%[b], #20]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #16]\n\t"
        "ldr		r5, [%[b], #16]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #12]\n\t"
        "ldr		r5, [%[b], #12]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #8]\n\t"
        "ldr		r5, [%[b], #8]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #4]\n\t"
        "ldr		r5, [%[b], #4]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #0]\n\t"
        "ldr		r5, [%[b], #0]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "eor	%[r], %[r], r3\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
        : "r3", "r4", "r5", "r6", "r7"
    );
#endif

    return r;
}

/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Nmber to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
        sp_digit* r)
{
    sp_digit t1[128], t2[65];
    sp_digit div, r1;
    int i;

    (void)m;


    div = d[63];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
    for (i=63; i>=0; i--) {
        r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);

        sp_2048_mul_d_64(t2, d, r1);
        t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
        t1[64 + i] -= t2[64];
        sp_2048_mask_64(t2, d, t1[64 + i]);
        t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
        sp_2048_mask_64(t2, d, t1[64 + i]);
        t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
    }

    r1 = sp_2048_cmp_64(t1, d) >= 0;
    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    return sp_2048_div_64(a, m, NULL, r);
}

#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Nmber to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
        sp_digit* r)
{
    sp_digit t1[128], t2[65];
    sp_digit div, r1;
    int i;

    (void)m;


    div = d[63];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
    for (i=63; i>=0; i--) {
        r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);

        sp_2048_mul_d_64(t2, d, r1);
        t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
        t1[64 + i] -= t2[64];
        if (t1[64 + i] != 0) {
            t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
            if (t1[64 + i] != 0)
                t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
        }
    }

    r1 = sp_2048_cmp_64(t1, d) >= 0;
    sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    return sp_2048_div_64_cond(a, m, NULL, r);
}

#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
                                                     defined(WOLFSSL_HAVE_SP_DH)
#ifdef WOLFSSL_SP_SMALL
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit t[16][128];
#else
    sp_digit* t[16];
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        for (i=0; i<16; i++) {
            t[i] = td + i * 128;
        }
#endif
        norm = t[0];

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_64(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
        if (reduceA != 0) {
            err = sp_2048_mod_64(t[1] + 64, a, m);
            if (err == MP_OKAY) {
                err = sp_2048_mod_64(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
            err = sp_2048_mod_64(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
        sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
        sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
        sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
        sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
        sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
        sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
        sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
        sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
        sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
        sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
        sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
        sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
        sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 4;
        if (c == 32) {
            c = 28;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
        for (; i>=0 || c>=4; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 28;
                n <<= 4;
                c = 28;
            }
            else if (c < 4) {
                y = n >> 28;
                n = e[i--];
                c = 4 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 28) & 0xf;
                n <<= 4;
                c -= 4;
            }

            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);

            sp_2048_mont_mul_64(r, r, t[y], m, mp);
        }

        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
        sp_2048_mont_reduce_64(r, m, mp);

        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
        sp_2048_cond_sub_64(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#else
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit t[32][128];
#else
    sp_digit* t[32];
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        for (i=0; i<32; i++) {
            t[i] = td + i * 128;
        }
#endif
        norm = t[0];

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_64(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
        if (reduceA != 0) {
            err = sp_2048_mod_64(t[1] + 64, a, m);
            if (err == MP_OKAY) {
                err = sp_2048_mod_64(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
            err = sp_2048_mod_64(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
        sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
        sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
        sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
        sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
        sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
        sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
        sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
        sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
        sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
        sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
        sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
        sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
        sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
        sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
        sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
        sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
        sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
        sp_2048_mont_sqr_64(t[20], t[10], m, mp);
        sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
        sp_2048_mont_sqr_64(t[22], t[11], m, mp);
        sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
        sp_2048_mont_sqr_64(t[24], t[12], m, mp);
        sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
        sp_2048_mont_sqr_64(t[26], t[13], m, mp);
        sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
        sp_2048_mont_sqr_64(t[28], t[14], m, mp);
        sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
        sp_2048_mont_sqr_64(t[30], t[15], m, mp);
        sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 5;
        if (c == 32) {
            c = 27;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
        for (; i>=0 || c>=5; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 27;
                n <<= 5;
                c = 27;
            }
            else if (c < 5) {
                y = n >> 27;
                n = e[i--];
                c = 5 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 27) & 0x1f;
                n <<= 5;
                c -= 5;
            }

            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);

            sp_2048_mont_mul_64(r, r, t[y], m, mp);
        }

        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
        sp_2048_mont_reduce_64(r, m, mp);

        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
        sp_2048_cond_sub_64(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#endif /* WOLFSSL_SP_SMALL */
#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */

#ifdef WOLFSSL_HAVE_SP_RSA
/* RSA public key operation.
 *
 * in      Array of bytes representing the number to exponentiate, base.
 * inLen   Number of bytes in base.
 * em      Public exponent.
 * mm      Modulus.
 * out     Buffer to hold big-endian bytes of exponentiation result.
 *         Must be at least 256 bytes long.
 * outLen  Number of bytes in result.
 * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
 * an array is too long and MEMORY_E when dynamic memory allocation fails.
 */
int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
    byte* out, word32* outLen)
{
#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
    sp_digit ad[128], md[64], rd[128];
#else
    sp_digit* d = NULL;
#endif
    sp_digit* a;
    sp_digit *ah;
    sp_digit* m;
    sp_digit* r;
    sp_digit e[1];
    int err = MP_OKAY;

    if (*outLen < 256)
        err = MP_TO_E;
    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 ||
                                                     mp_count_bits(mm) != 2048))
        err = MP_READ_E;

#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
    if (err == MP_OKAY) {
        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
                                                              DYNAMIC_TYPE_RSA);
        if (d == NULL)
            err = MEMORY_E;
    }

    if (err == MP_OKAY) {
        a = d;
        r = a + 64 * 2;
        m = r + 64 * 2;
        ah = a + 64;
    }
#else
    a = ad;
    m = md;
    r = rd;
    ah = a + 64;
#endif

    if (err == MP_OKAY) {
        sp_2048_from_bin(ah, 64, in, inLen);
#if DIGIT_BIT >= 32
        e[0] = em->dp[0];
#else
        e[0] = em->dp[0];
        if (em->used > 1)
            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
#endif
        if (e[0] == 0)
            err = MP_EXPTMOD_E;
    }
    if (err == MP_OKAY) {
        sp_2048_from_mp(m, 64, mm);

        if (e[0] == 0x3) {
            if (err == MP_OKAY) {
                sp_2048_sqr_64(r, ah);
                err = sp_2048_mod_64_cond(r, r, m);
            }
            if (err == MP_OKAY) {
                sp_2048_mul_64(r, ah, r);
                err = sp_2048_mod_64_cond(r, r, m);
            }
        }
        else {
            int i;
            sp_digit mp;

            sp_2048_mont_setup(m, &mp);

            /* Convert to Montgomery form. */
            XMEMSET(a, 0, sizeof(sp_digit) * 64);
            err = sp_2048_mod_64_cond(a, a, m);

            if (err == MP_OKAY) {
                for (i=31; i>=0; i--)
                    if (e[0] >> i)
                        break;

                XMEMCPY(r, a, sizeof(sp_digit) * 64);
                for (i--; i>=0; i--) {
                    sp_2048_mont_sqr_64(r, r, m, mp);
                    if (((e[0] >> i) & 1) == 1)
                        sp_2048_mont_mul_64(r, r, a, m, mp);
                }
                XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
                sp_2048_mont_reduce_64(r, m, mp);

                for (i = 63; i > 0; i--) {
                    if (r[i] != m[i])
                        break;
                }
                if (r[i] >= m[i])
                    sp_2048_sub_in_place_64(r, m);
            }
        }
    }

    if (err == MP_OKAY) {
        sp_2048_to_bin(r, out);
        *outLen = 256;
    }

#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
    if (d != NULL)
        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
#endif

    return err;
}

#ifndef WOLFSSL_RSA_PUBLIC_ONLY
/* RSA private key operation.
 *
 * in      Array of bytes representing the number to exponentiate, base.
 * inLen   Number of bytes in base.
 * dm      Private exponent.
 * pm      First prime.
 * qm      Second prime.
 * dpm     First prime's CRT exponent.
 * dqm     Second prime's CRT exponent.
 * qim     Inverse of second prime mod p.
 * mm      Modulus.
 * out     Buffer to hold big-endian bytes of exponentiation result.
 *         Must be at least 256 bytes long.
 * outLen  Number of bytes in result.
 * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
 * an array is too long and MEMORY_E when dynamic memory allocation fails.
 */
int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
    byte* out, word32* outLen)
{
#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
    sp_digit ad[64 * 2];
    sp_digit pd[32], qd[32], dpd[32];
    sp_digit tmpad[64], tmpbd[64];
#else
    sp_digit* t = NULL;
#endif
    sp_digit* a;
    sp_digit* p;
    sp_digit* q;
    sp_digit* dp;
    sp_digit* dq;
    sp_digit* qi;
    sp_digit* tmp;
    sp_digit* tmpa;
    sp_digit* tmpb;
    sp_digit* r;
    sp_digit c;
    int err = MP_OKAY;

    (void)dm;
    (void)mm;

    if (*outLen < 256)
        err = MP_TO_E;
    if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
        err = MP_READ_E;

#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
    if (err == MP_OKAY) {
        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
                                                              DYNAMIC_TYPE_RSA);
        if (t == NULL)
            err = MEMORY_E;
    }
    if (err == MP_OKAY) {
        a = t;
        p = a + 64 * 2;
        q = p + 32;
        qi = dq = dp = q + 32;
        tmpa = qi + 32;
        tmpb = tmpa + 64;

        tmp = t;
        r = tmp + 64;
    }
#else
    r = a = ad;
    p = pd;
    q = qd;
    qi = dq = dp = dpd;
    tmpa = tmpad;
    tmpb = tmpbd;
    tmp = a + 64;
#endif

    if (err == MP_OKAY) {
        sp_2048_from_bin(a, 64, in, inLen);
        sp_2048_from_mp(p, 32, pm);
        sp_2048_from_mp(q, 32, qm);
        sp_2048_from_mp(dp, 32, dpm);

        err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
    }
    if (err == MP_OKAY) {
        sp_2048_from_mp(dq, 32, dqm);
        err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
    }

    if (err == MP_OKAY) {
        c = sp_2048_sub_in_place_32(tmpa, tmpb);
        sp_2048_mask_32(tmp, p, c);
        sp_2048_add_32(tmpa, tmpa, tmp);

        sp_2048_from_mp(qi, 32, qim);
        sp_2048_mul_32(tmpa, tmpa, qi);
        err = sp_2048_mod_32(tmpa, tmpa, p);
    }

    if (err == MP_OKAY) {
        sp_2048_mul_32(tmpa, q, tmpa);
        XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
        sp_2048_add_64(r, tmpb, tmpa);

        sp_2048_to_bin(r, out);
        *outLen = 256;
    }

#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
    if (t != NULL) {
        XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
    }
#else
    XMEMSET(tmpad, 0, sizeof(tmpad));
    XMEMSET(tmpbd, 0, sizeof(tmpbd));
    XMEMSET(pd, 0, sizeof(pd));
    XMEMSET(qd, 0, sizeof(qd));
    XMEMSET(dpd, 0, sizeof(dpd));
#endif

    return err;
}
#endif
#endif /* WOLFSSL_HAVE_SP_RSA */
#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
/* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
 * r  A multi-precision integer.
 */
static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
{
    int err;

    err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
#if DIGIT_BIT == 32
        XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
        r->used = 64;
        mp_clamp(r);
#elif DIGIT_BIT < 32
        int i, j = 0, s = 0;

        r->dp[0] = 0;
        for (i = 0; i < 64; i++) {
            r->dp[j] |= a[i] << s;
            r->dp[j] &= (1L << DIGIT_BIT) - 1;
            s = DIGIT_BIT - s;
            r->dp[++j] = a[i] >> s;
            while (s + DIGIT_BIT <= 32) {
                s += DIGIT_BIT;
                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
                if (s == SP_WORD_SIZE) {
                    r->dp[j] = 0;
                }
                else {
                    r->dp[j] = a[i] >> s;
                }
            }
            s = 32 - s;
        }
        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
        mp_clamp(r);
#else
        int i, j = 0, s = 0;

        r->dp[0] = 0;
        for (i = 0; i < 64; i++) {
            r->dp[j] |= ((mp_digit)a[i]) << s;
            if (s + 32 >= DIGIT_BIT) {
    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
                r->dp[j] &= (1L << DIGIT_BIT) - 1;
    #endif
                s = DIGIT_BIT - s;
                r->dp[++j] = a[i] >> s;
                s = 32 - s;
            }
            else {
                s += 32;
            }
        }
        r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
        mp_clamp(r);
#endif
    }

    return err;
}

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base  Base. MP integer.
 * exp   Exponent. MP integer.
 * mod   Modulus. MP integer.
 * res   Result. MP integer.
 * returs 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
{
    int err = MP_OKAY;
    sp_digit b[128], e[64], m[64];
    sp_digit* r = b;
    int expBits = mp_count_bits(exp);

    if (mp_count_bits(base) > 2048) {
        err = MP_READ_E;
    }

    if (err == MP_OKAY) {
        if (expBits > 2048) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        if (mp_count_bits(mod) != 2048) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        sp_2048_from_mp(b, 64, base);
        sp_2048_from_mp(e, 64, exp);
        sp_2048_from_mp(m, 64, mod);

        err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
    }

    if (err == MP_OKAY) {
        err = sp_2048_to_mp(r, res);
    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}

#ifdef WOLFSSL_HAVE_SP_DH

#ifdef HAVE_FFDHE_2048
static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
{
    __asm__ __volatile__ (
        "mov	r6, #31\n\t"
        "sub	r6, r6, %[n]\n\t"
        "ldr	r3, [%[a], #252]\n\t"
        "lsr	r4, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r4, r4, r6\n\t"
        "ldr	r2, [%[a], #248]\n\t"
        "str	r4, [%[r], #256]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #244]\n\t"
        "str	r3, [%[r], #252]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #240]\n\t"
        "str	r2, [%[r], #248]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #236]\n\t"
        "str	r4, [%[r], #244]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #232]\n\t"
        "str	r3, [%[r], #240]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #228]\n\t"
        "str	r2, [%[r], #236]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #224]\n\t"
        "str	r4, [%[r], #232]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #220]\n\t"
        "str	r3, [%[r], #228]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #216]\n\t"
        "str	r2, [%[r], #224]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #212]\n\t"
        "str	r4, [%[r], #220]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #208]\n\t"
        "str	r3, [%[r], #216]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #204]\n\t"
        "str	r2, [%[r], #212]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #200]\n\t"
        "str	r4, [%[r], #208]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #196]\n\t"
        "str	r3, [%[r], #204]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #192]\n\t"
        "str	r2, [%[r], #200]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #188]\n\t"
        "str	r4, [%[r], #196]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #184]\n\t"
        "str	r3, [%[r], #192]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #180]\n\t"
        "str	r2, [%[r], #188]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #176]\n\t"
        "str	r4, [%[r], #184]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #172]\n\t"
        "str	r3, [%[r], #180]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #168]\n\t"
        "str	r2, [%[r], #176]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #164]\n\t"
        "str	r4, [%[r], #172]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "str	r3, [%[r], #168]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #156]\n\t"
        "str	r2, [%[r], #164]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #152]\n\t"
        "str	r4, [%[r], #160]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #148]\n\t"
        "str	r3, [%[r], #156]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #144]\n\t"
        "str	r2, [%[r], #152]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #140]\n\t"
        "str	r4, [%[r], #148]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #136]\n\t"
        "str	r3, [%[r], #144]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #132]\n\t"
        "str	r2, [%[r], #140]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #128]\n\t"
        "str	r4, [%[r], #136]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #124]\n\t"
        "str	r3, [%[r], #132]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #120]\n\t"
        "str	r2, [%[r], #128]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #116]\n\t"
        "str	r4, [%[r], #124]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "str	r3, [%[r], #120]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #108]\n\t"
        "str	r2, [%[r], #116]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #104]\n\t"
        "str	r4, [%[r], #112]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #100]\n\t"
        "str	r3, [%[r], #108]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #96]\n\t"
        "str	r2, [%[r], #104]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #92]\n\t"
        "str	r4, [%[r], #100]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "str	r3, [%[r], #96]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #84]\n\t"
        "str	r2, [%[r], #92]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #80]\n\t"
        "str	r4, [%[r], #88]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #76]\n\t"
        "str	r3, [%[r], #84]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #72]\n\t"
        "str	r2, [%[r], #80]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #68]\n\t"
        "str	r4, [%[r], #76]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "str	r3, [%[r], #72]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #60]\n\t"
        "str	r2, [%[r], #68]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #56]\n\t"
        "str	r4, [%[r], #64]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #52]\n\t"
        "str	r3, [%[r], #60]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #48]\n\t"
        "str	r2, [%[r], #56]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #44]\n\t"
        "str	r4, [%[r], #52]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "str	r3, [%[r], #48]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #36]\n\t"
        "str	r2, [%[r], #44]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #32]\n\t"
        "str	r4, [%[r], #40]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #28]\n\t"
        "str	r3, [%[r], #36]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #24]\n\t"
        "str	r2, [%[r], #32]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #20]\n\t"
        "str	r4, [%[r], #28]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "str	r3, [%[r], #24]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #12]\n\t"
        "str	r2, [%[r], #20]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #8]\n\t"
        "str	r4, [%[r], #16]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #4]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #0]\n\t"
        "str	r2, [%[r], #8]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "str	r3, [%[r]]\n\t"
        "str	r4, [%[r], #4]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
        : "memory", "r2", "r3", "r4", "r5", "r6"
    );
}

/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
        const sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit nd[128];
    sp_digit td[65];
#else
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit* tmp;
    sp_digit mp = 1;
    sp_digit n, o;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        norm = td;
        tmp  = td + 128;
#else
        norm = nd;
        tmp  = td;
#endif

        sp_2048_mont_setup(m, &mp);
        sp_2048_mont_norm_64(norm, m);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 5;
        if (c == 32) {
            c = 27;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        sp_2048_lshift_64(r, norm, y);
        for (; i>=0 || c>=5; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 27;
                n <<= 5;
                c = 27;
            }
            else if (c < 5) {
                y = n >> 27;
                n = e[i--];
                c = 5 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 27) & 0x1f;
                n <<= 5;
                c -= 5;
            }

            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);
            sp_2048_mont_sqr_64(r, r, m, mp);

            sp_2048_lshift_64(r, r, y);
            sp_2048_mul_d_64(tmp, norm, r[64]);
            r[64] = 0;
            o = sp_2048_add_64(r, r, tmp);
            sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
        }

        XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
        sp_2048_mont_reduce_64(r, m, mp);

        mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
        sp_2048_cond_sub_64(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#endif /* HAVE_FFDHE_2048 */

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base     Base.
 * exp      Array of bytes that is the exponent.
 * expLen   Length of data, in bytes, in exponent.
 * mod      Modulus.
 * out      Buffer to hold big-endian bytes of exponentiation result.
 *          Must be at least 256 bytes long.
 * outLen   Length, in bytes, of exponentiation result.
 * returs 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
    mp_int* mod, byte* out, word32* outLen)
{
    int err = MP_OKAY;
    sp_digit b[128], e[64], m[64];
    sp_digit* r = b;
    word32 i;

    if (mp_count_bits(base) > 2048) {
        err = MP_READ_E;
    }

    if (err == MP_OKAY) {
        if (expLen > 256) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        if (mp_count_bits(mod) != 2048) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        sp_2048_from_mp(b, 64, base);
        sp_2048_from_bin(e, 64, exp, expLen);
        sp_2048_from_mp(m, 64, mod);

    #ifdef HAVE_FFDHE_2048
        if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
            err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
        else
    #endif
            err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);

    }

    if (err == MP_OKAY) {
        sp_2048_to_bin(r, out);
        *outLen = 256;
        for (i=0; i<256 && out[i] == 0; i++) {
        }
        *outLen -= i;
        XMEMMOVE(out, out + i, *outLen);

    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}
#endif /* WOLFSSL_HAVE_SP_DH */

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base  Base. MP integer.
 * exp   Exponent. MP integer.
 * mod   Modulus. MP integer.
 * res   Result. MP integer.
 * returs 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
{
    int err = MP_OKAY;
    sp_digit b[64], e[32], m[32];
    sp_digit* r = b;
    int expBits = mp_count_bits(exp);

    if (mp_count_bits(base) > 1024) {
        err = MP_READ_E;
    }

    if (err == MP_OKAY) {
        if (expBits > 1024) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        if (mp_count_bits(mod) != 1024) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        sp_2048_from_mp(b, 32, base);
        sp_2048_from_mp(e, 32, exp);
        sp_2048_from_mp(m, 32, mod);

        err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
    }

    if (err == MP_OKAY) {
        XMEMSET(r + 32, 0, sizeof(*r) * 32U);
        err = sp_2048_to_mp(r, res);
        res->used = mod->used;
        mp_clamp(res);
    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}

#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */

#endif /* !WOLFSSL_SP_NO_2048 */

#ifndef WOLFSSL_SP_NO_3072
/* Read big endian unsigned byte array into r.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  Byte array.
 * n  Number of bytes in array to read.
 */
static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
{
    int i, j = 0;
    word32 s = 0;

    r[0] = 0;
    for (i = n-1; i >= 0; i--) {
        r[j] |= (((sp_digit)a[i]) << s);
        if (s >= 24U) {
            r[j] &= 0xffffffff;
            s = 32U - s;
            if (j + 1 >= size) {
                break;
            }
            r[++j] = (sp_digit)a[i] >> s;
            s = 8U - s;
        }
        else {
            s += 8U;
        }
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
}

/* Convert an mp_int to an array of sp_digit.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  A multi-precision integer.
 */
static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
{
#if DIGIT_BIT == 32
    int j;

    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);

    for (j = a->used; j < size; j++) {
        r[j] = 0;
    }
#elif DIGIT_BIT > 32
    int i, j = 0;
    word32 s = 0;

    r[0] = 0;
    for (i = 0; i < a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i] << s);
        r[j] &= 0xffffffff;
        s = 32U - s;
        if (j + 1 >= size) {
            break;
        }
        /* lint allow cast of mismatch word32 and mp_digit */
        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
        while ((s + 32U) <= (word32)DIGIT_BIT) {
            s += 32U;
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            if (s < (word32)DIGIT_BIT) {
                /* lint allow cast of mismatch word32 and mp_digit */
                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
            }
            else {
                r[++j] = 0L;
            }
        }
        s = (word32)DIGIT_BIT - s;
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#else
    int i, j = 0, s = 0;

    r[0] = 0;
    for (i = 0; i < a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i]) << s;
        if (s + DIGIT_BIT >= 32) {
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            s = 32 - s;
            if (s == DIGIT_BIT) {
                r[++j] = 0;
                s = 0;
            }
            else {
                r[++j] = a->dp[i] >> s;
                s = DIGIT_BIT - s;
            }
        }
        else {
            s += DIGIT_BIT;
        }
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#endif
}

/* Write r as big endian to byte array.
 * Fixed length number of bytes written: 384
 *
 * r  A single precision integer.
 * a  Byte array.
 */
static void sp_3072_to_bin(sp_digit* r, byte* a)
{
    int i, j, s = 0, b;

    j = 3072 / 8 - 1;
    a[j] = 0;
    for (i=0; i<96 && j>=0; i++) {
        b = 0;
        /* lint allow cast of mismatch sp_digit and int */
        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
        if (j < 0) {
            break;
        }
        while (b < 32) {
            a[j--] = r[i] >> b; b += 8;
            if (j < 0) {
                break;
            }
        }
        s = 8 - (b - 32);
        if (j >= 0) {
            a[j] = 0;
        }
        if (s != 0) {
            j++;
        }
    }
}

#ifndef WOLFSSL_SP_SMALL
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #48\n\t"
        "mov	r10, #0\n\t"
        "#  A[0] * B[0]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r3, r4, r8, r9\n\t"
        "mov	r5, #0\n\t"
        "str	r3, [sp]\n\t"
        "#  A[0] * B[1]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[0]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #4]\n\t"
        "#  A[0] * B[2]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[1]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[0]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #8]\n\t"
        "#  A[0] * B[3]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[2]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[1]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[0]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #12]\n\t"
        "#  A[0] * B[4]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[3]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[2]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[1]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[0]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #16]\n\t"
        "#  A[0] * B[5]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[4]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[3]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[2]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[1]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[0]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #20]\n\t"
        "#  A[0] * B[6]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[5]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[4]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[3]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[2]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[1]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[0]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #24]\n\t"
        "#  A[0] * B[7]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[6]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[5]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[4]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[3]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[2]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[1]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[0]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #28]\n\t"
        "#  A[0] * B[8]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[7]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[6]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[5]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[4]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[3]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[2]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[1]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[0]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #32]\n\t"
        "#  A[0] * B[9]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[8]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[7]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[6]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[5]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[4]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[3]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[2]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[1]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[0]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #36]\n\t"
        "#  A[0] * B[10]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[9]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[8]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[7]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[6]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[5]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[4]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[3]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[2]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[1]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[0]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #40]\n\t"
        "#  A[0] * B[11]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[10]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[9]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[8]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[7]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[6]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[5]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[4]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[3]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[2]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[1]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[0]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #44]\n\t"
        "#  A[1] * B[11]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[2] * B[10]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[9]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[8]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[7]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[6]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[5]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[4]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[3]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[2]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[1]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #48]\n\t"
        "#  A[2] * B[11]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[3] * B[10]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[9]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[8]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[7]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[6]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[5]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[4]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[3]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[2]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #52]\n\t"
        "#  A[3] * B[11]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[4] * B[10]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[9]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[8]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[7]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[6]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[5]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[4]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[3]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #56]\n\t"
        "#  A[4] * B[11]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[5] * B[10]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[9]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[8]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[7]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[6]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[5]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[4]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #60]\n\t"
        "#  A[5] * B[11]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[6] * B[10]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[9]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[8]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[7]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[6]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[5]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #64]\n\t"
        "#  A[6] * B[11]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[7] * B[10]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[9]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[8]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[7]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[6]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #68]\n\t"
        "#  A[7] * B[11]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[8] * B[10]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[9]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[8]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[7]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #72]\n\t"
        "#  A[8] * B[11]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[9] * B[10]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[9]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[8]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #76]\n\t"
        "#  A[9] * B[11]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[10] * B[10]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[9]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #80]\n\t"
        "#  A[10] * B[11]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[11] * B[10]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #84]\n\t"
        "#  A[11] * B[11]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adc	r5, r5, r7\n\t"
        "str	r4, [%[r], #88]\n\t"
        "str	r5, [%[r], #92]\n\t"
        "ldr	r3, [sp, #0]\n\t"
        "ldr	r4, [sp, #4]\n\t"
        "ldr	r5, [sp, #8]\n\t"
        "ldr	r6, [sp, #12]\n\t"
        "str	r3, [%[r], #0]\n\t"
        "str	r4, [%[r], #4]\n\t"
        "str	r5, [%[r], #8]\n\t"
        "str	r6, [%[r], #12]\n\t"
        "ldr	r3, [sp, #16]\n\t"
        "ldr	r4, [sp, #20]\n\t"
        "ldr	r5, [sp, #24]\n\t"
        "ldr	r6, [sp, #28]\n\t"
        "str	r3, [%[r], #16]\n\t"
        "str	r4, [%[r], #20]\n\t"
        "str	r5, [%[r], #24]\n\t"
        "str	r6, [%[r], #28]\n\t"
        "ldr	r3, [sp, #32]\n\t"
        "ldr	r4, [sp, #36]\n\t"
        "ldr	r5, [sp, #40]\n\t"
        "ldr	r6, [sp, #44]\n\t"
        "str	r3, [%[r], #32]\n\t"
        "str	r4, [%[r], #36]\n\t"
        "str	r5, [%[r], #40]\n\t"
        "str	r6, [%[r], #44]\n\t"
        "add	sp, sp, #48\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #48\n\t"
        "mov	r14, #0\n\t"
        "#  A[0] * A[0]\n\t"
        "ldr	r10, [%[a], #0]\n\t"
        "umull	r8, r3, r10, r10\n\t"
        "mov	r4, #0\n\t"
        "str	r8, [sp]\n\t"
        "#  A[0] * A[1]\n\t"
        "ldr	r10, [%[a], #4]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r14, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "str	r3, [sp, #4]\n\t"
        "#  A[0] * A[2]\n\t"
        "ldr	r10, [%[a], #8]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r14, r14\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "#  A[1] * A[1]\n\t"
        "ldr	r10, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "str	r4, [sp, #8]\n\t"
        "#  A[0] * A[3]\n\t"
        "ldr	r10, [%[a], #12]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r14, r14\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "#  A[1] * A[2]\n\t"
        "ldr	r10, [%[a], #8]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "str	r2, [sp, #12]\n\t"
        "#  A[0] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r14, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "#  A[1] * A[3]\n\t"
        "ldr	r10, [%[a], #12]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "#  A[2] * A[2]\n\t"
        "ldr	r10, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "str	r3, [sp, #16]\n\t"
        "#  A[0] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r3, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[3]\n\t"
        "ldr	r10, [%[a], #12]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r4, r4, r5\n\t"
        "adcs	r2, r2, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r4, [sp, #20]\n\t"
        "#  A[0] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r4, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[3]\n\t"
        "ldr	r10, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r2, r2, r5\n\t"
        "adcs	r3, r3, r6\n\t"
        "adc	r4, r4, r7\n\t"
        "str	r2, [sp, #24]\n\t"
        "#  A[0] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r2, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r3, r3, r5\n\t"
        "adcs	r4, r4, r6\n\t"
        "adc	r2, r2, r7\n\t"
        "str	r3, [sp, #28]\n\t"
        "#  A[0] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r3, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[4] * A[4]\n\t"
        "ldr	r10, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r4, r4, r5\n\t"
        "adcs	r2, r2, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r4, [sp, #32]\n\t"
        "#  A[0] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r4, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[4] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r2, r2, r5\n\t"
        "adcs	r3, r3, r6\n\t"
        "adc	r4, r4, r7\n\t"
        "str	r2, [sp, #36]\n\t"
        "#  A[0] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r2, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[4] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[5] * A[5]\n\t"
        "ldr	r10, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r3, r3, r5\n\t"
        "adcs	r4, r4, r6\n\t"
        "adc	r2, r2, r7\n\t"
        "str	r3, [sp, #40]\n\t"
        "#  A[0] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r3, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[1] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[2] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[4] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[5] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r4, r4, r5\n\t"
        "adcs	r2, r2, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r4, [sp, #44]\n\t"
        "#  A[1] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r4, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[2] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[3] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[4] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[5] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[6] * A[6]\n\t"
        "ldr	r10, [%[a], #24]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r2, r2, r5\n\t"
        "adcs	r3, r3, r6\n\t"
        "adc	r4, r4, r7\n\t"
        "str	r2, [%[r], #48]\n\t"
        "#  A[2] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r2, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[3] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[4] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[5] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[6] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r3, r3, r5\n\t"
        "adcs	r4, r4, r6\n\t"
        "adc	r2, r2, r7\n\t"
        "str	r3, [%[r], #52]\n\t"
        "#  A[3] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r3, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[4] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[5] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[6] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[7] * A[7]\n\t"
        "ldr	r10, [%[a], #28]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r4, r4, r5\n\t"
        "adcs	r2, r2, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r4, [%[r], #56]\n\t"
        "#  A[4] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r4, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[5] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[6] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[7] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r2, r2, r5\n\t"
        "adcs	r3, r3, r6\n\t"
        "adc	r4, r4, r7\n\t"
        "str	r2, [%[r], #60]\n\t"
        "#  A[5] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r2, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[6] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[7] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[8] * A[8]\n\t"
        "ldr	r10, [%[a], #32]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r3, r3, r5\n\t"
        "adcs	r4, r4, r6\n\t"
        "adc	r2, r2, r7\n\t"
        "str	r3, [%[r], #64]\n\t"
        "#  A[6] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "umull	r5, r6, r10, r8\n\t"
        "mov	r3, #0\n\t"
        "mov	r7, #0\n\t"
        "#  A[7] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "#  A[8] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r5, r5, r8\n\t"
        "adcs	r6, r6, r9\n\t"
        "adc	r7, r7, r14\n\t"
        "adds	r5, r5, r5\n\t"
        "adcs	r6, r6, r6\n\t"
        "adc	r7, r7, r7\n\t"
        "adds	r4, r4, r5\n\t"
        "adcs	r2, r2, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r4, [%[r], #68]\n\t"
        "#  A[7] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r14, r14\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "#  A[8] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "#  A[9] * A[9]\n\t"
        "ldr	r10, [%[a], #36]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "str	r2, [%[r], #72]\n\t"
        "#  A[8] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r14, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "#  A[9] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "adds	r3, r3, r8\n\t"
        "adcs	r4, r4, r9\n\t"
        "adc	r2, r2, r14\n\t"
        "str	r3, [%[r], #76]\n\t"
        "#  A[9] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r14, r14\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "#  A[10] * A[10]\n\t"
        "ldr	r10, [%[a], #40]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r2, r2, r9\n\t"
        "adc	r3, r3, r14\n\t"
        "str	r4, [%[r], #80]\n\t"
        "#  A[10] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "umull	r8, r9, r10, r8\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r14, r14\n\t"
        "adds	r2, r2, r8\n\t"
        "adcs	r3, r3, r9\n\t"
        "adc	r4, r4, r14\n\t"
        "str	r2, [%[r], #84]\n\t"
        "#  A[11] * A[11]\n\t"
        "ldr	r10, [%[a], #44]\n\t"
        "umull	r8, r9, r10, r10\n\t"
        "adds	r3, r3, r8\n\t"
        "adc	r4, r4, r9\n\t"
        "str	r3, [%[r], #88]\n\t"
        "str	r4, [%[r], #92]\n\t"
        "ldr	r2, [sp, #0]\n\t"
        "ldr	r3, [sp, #4]\n\t"
        "ldr	r4, [sp, #8]\n\t"
        "ldr	r8, [sp, #12]\n\t"
        "str	r2, [%[r], #0]\n\t"
        "str	r3, [%[r], #4]\n\t"
        "str	r4, [%[r], #8]\n\t"
        "str	r8, [%[r], #12]\n\t"
        "ldr	r2, [sp, #16]\n\t"
        "ldr	r3, [sp, #20]\n\t"
        "ldr	r4, [sp, #24]\n\t"
        "ldr	r8, [sp, #28]\n\t"
        "str	r2, [%[r], #16]\n\t"
        "str	r3, [%[r], #20]\n\t"
        "str	r4, [%[r], #24]\n\t"
        "str	r8, [%[r], #28]\n\t"
        "ldr	r2, [sp, #32]\n\t"
        "ldr	r3, [sp, #36]\n\t"
        "ldr	r4, [sp, #40]\n\t"
        "ldr	r8, [sp, #44]\n\t"
        "str	r2, [%[r], #32]\n\t"
        "str	r3, [%[r], #36]\n\t"
        "str	r4, [%[r], #40]\n\t"
        "str	r8, [%[r], #44]\n\t"
        "add	sp, sp, #48\n\t"
        :
        : [r] "r" (r), [a] "r" (a)
        : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
    );
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer and result.
 * b  A single precision integer.
 */
static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "ldr	r2, [%[a], #0]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r5, [%[a], #12]\n\t"
        "ldr	r6, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "ldr	r8, [%[b], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "subs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #0]\n\t"
        "str	r3, [%[a], #4]\n\t"
        "str	r4, [%[a], #8]\n\t"
        "str	r5, [%[a], #12]\n\t"
        "ldr	r2, [%[a], #16]\n\t"
        "ldr	r3, [%[a], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r5, [%[a], #28]\n\t"
        "ldr	r6, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "ldr	r8, [%[b], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #16]\n\t"
        "str	r3, [%[a], #20]\n\t"
        "str	r4, [%[a], #24]\n\t"
        "str	r5, [%[a], #28]\n\t"
        "ldr	r2, [%[a], #32]\n\t"
        "ldr	r3, [%[a], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r5, [%[a], #44]\n\t"
        "ldr	r6, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "ldr	r8, [%[b], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #32]\n\t"
        "str	r3, [%[a], #36]\n\t"
        "str	r4, [%[a], #40]\n\t"
        "str	r5, [%[a], #44]\n\t"
        "ldr	r2, [%[a], #48]\n\t"
        "ldr	r3, [%[a], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r5, [%[a], #60]\n\t"
        "ldr	r6, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "ldr	r8, [%[b], #56]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #48]\n\t"
        "str	r3, [%[a], #52]\n\t"
        "str	r4, [%[a], #56]\n\t"
        "str	r5, [%[a], #60]\n\t"
        "ldr	r2, [%[a], #64]\n\t"
        "ldr	r3, [%[a], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r5, [%[a], #76]\n\t"
        "ldr	r6, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "ldr	r8, [%[b], #72]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #64]\n\t"
        "str	r3, [%[a], #68]\n\t"
        "str	r4, [%[a], #72]\n\t"
        "str	r5, [%[a], #76]\n\t"
        "ldr	r2, [%[a], #80]\n\t"
        "ldr	r3, [%[a], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r5, [%[a], #92]\n\t"
        "ldr	r6, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "ldr	r8, [%[b], #88]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #80]\n\t"
        "str	r3, [%[a], #84]\n\t"
        "str	r4, [%[a], #88]\n\t"
        "str	r5, [%[a], #92]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
    );

    return c;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r5, [%[a], #52]\n\t"
        "ldr	r6, [%[a], #56]\n\t"
        "ldr	r7, [%[a], #60]\n\t"
        "ldr	r8, [%[b], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "ldr	r10, [%[b], #56]\n\t"
        "ldr	r14, [%[b], #60]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r5, [%[r], #52]\n\t"
        "str	r6, [%[r], #56]\n\t"
        "str	r7, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r5, [%[a], #68]\n\t"
        "ldr	r6, [%[a], #72]\n\t"
        "ldr	r7, [%[a], #76]\n\t"
        "ldr	r8, [%[b], #64]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "ldr	r10, [%[b], #72]\n\t"
        "ldr	r14, [%[b], #76]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r5, [%[r], #68]\n\t"
        "str	r6, [%[r], #72]\n\t"
        "str	r7, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r5, [%[a], #84]\n\t"
        "ldr	r6, [%[a], #88]\n\t"
        "ldr	r7, [%[a], #92]\n\t"
        "ldr	r8, [%[b], #80]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "ldr	r10, [%[b], #88]\n\t"
        "ldr	r14, [%[b], #92]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r5, [%[r], #84]\n\t"
        "str	r6, [%[r], #88]\n\t"
        "str	r7, [%[r], #92]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<12; i++) {
        r[i] = a[i] & m;
    }
#else
    r[0] = a[0] & m;
    r[1] = a[1] & m;
    r[2] = a[2] & m;
    r[3] = a[3] & m;
    r[4] = a[4] & m;
    r[5] = a[5] & m;
    r[6] = a[6] & m;
    r[7] = a[7] & m;
    r[8] = a[8] & m;
    r[9] = a[9] & m;
    r[10] = a[10] & m;
    r[11] = a[11] & m;
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[24];
    sp_digit a1[12];
    sp_digit b1[12];
    sp_digit z2[24];
    sp_digit u, ca, cb;

    ca = sp_3072_add_12(a1, a, &a[12]);
    cb = sp_3072_add_12(b1, b, &b[12]);
    u  = ca & cb;
    sp_3072_mul_12(z1, a1, b1);
    sp_3072_mul_12(z2, &a[12], &b[12]);
    sp_3072_mul_12(z0, a, b);
    sp_3072_mask_12(r + 24, a1, 0 - cb);
    sp_3072_mask_12(b1, b1, 0 - ca);
    u += sp_3072_add_12(r + 24, r + 24, b1);
    u += sp_3072_sub_in_place_24(z1, z2);
    u += sp_3072_sub_in_place_24(z1, z0);
    u += sp_3072_add_24(r + 12, r + 12, z1);
    r[36] = u;
    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
    (void)sp_3072_add_24(r + 24, r + 24, z2);
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit z2[24];
    sp_digit z1[24];
    sp_digit a1[12];
    sp_digit u;

    u = sp_3072_add_12(a1, a, &a[12]);
    sp_3072_sqr_12(z1, a1);
    sp_3072_sqr_12(z2, &a[12]);
    sp_3072_sqr_12(z0, a);
    sp_3072_mask_12(r + 24, a1, 0 - u);
    u += sp_3072_add_12(r + 24, r + 24, r + 24);
    u += sp_3072_sub_in_place_24(z1, z2);
    u += sp_3072_sub_in_place_24(z1, z0);
    u += sp_3072_add_24(r + 12, r + 12, z1);
    r[36] = u;
    XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
    (void)sp_3072_add_24(r + 24, r + 24, z2);
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer and result.
 * b  A single precision integer.
 */
static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "ldr	r2, [%[a], #0]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r5, [%[a], #12]\n\t"
        "ldr	r6, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "ldr	r8, [%[b], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "subs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #0]\n\t"
        "str	r3, [%[a], #4]\n\t"
        "str	r4, [%[a], #8]\n\t"
        "str	r5, [%[a], #12]\n\t"
        "ldr	r2, [%[a], #16]\n\t"
        "ldr	r3, [%[a], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r5, [%[a], #28]\n\t"
        "ldr	r6, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "ldr	r8, [%[b], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #16]\n\t"
        "str	r3, [%[a], #20]\n\t"
        "str	r4, [%[a], #24]\n\t"
        "str	r5, [%[a], #28]\n\t"
        "ldr	r2, [%[a], #32]\n\t"
        "ldr	r3, [%[a], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r5, [%[a], #44]\n\t"
        "ldr	r6, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "ldr	r8, [%[b], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #32]\n\t"
        "str	r3, [%[a], #36]\n\t"
        "str	r4, [%[a], #40]\n\t"
        "str	r5, [%[a], #44]\n\t"
        "ldr	r2, [%[a], #48]\n\t"
        "ldr	r3, [%[a], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r5, [%[a], #60]\n\t"
        "ldr	r6, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "ldr	r8, [%[b], #56]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #48]\n\t"
        "str	r3, [%[a], #52]\n\t"
        "str	r4, [%[a], #56]\n\t"
        "str	r5, [%[a], #60]\n\t"
        "ldr	r2, [%[a], #64]\n\t"
        "ldr	r3, [%[a], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r5, [%[a], #76]\n\t"
        "ldr	r6, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "ldr	r8, [%[b], #72]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #64]\n\t"
        "str	r3, [%[a], #68]\n\t"
        "str	r4, [%[a], #72]\n\t"
        "str	r5, [%[a], #76]\n\t"
        "ldr	r2, [%[a], #80]\n\t"
        "ldr	r3, [%[a], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r5, [%[a], #92]\n\t"
        "ldr	r6, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "ldr	r8, [%[b], #88]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #80]\n\t"
        "str	r3, [%[a], #84]\n\t"
        "str	r4, [%[a], #88]\n\t"
        "str	r5, [%[a], #92]\n\t"
        "ldr	r2, [%[a], #96]\n\t"
        "ldr	r3, [%[a], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r5, [%[a], #108]\n\t"
        "ldr	r6, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "ldr	r8, [%[b], #104]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #96]\n\t"
        "str	r3, [%[a], #100]\n\t"
        "str	r4, [%[a], #104]\n\t"
        "str	r5, [%[a], #108]\n\t"
        "ldr	r2, [%[a], #112]\n\t"
        "ldr	r3, [%[a], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r5, [%[a], #124]\n\t"
        "ldr	r6, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "ldr	r8, [%[b], #120]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #112]\n\t"
        "str	r3, [%[a], #116]\n\t"
        "str	r4, [%[a], #120]\n\t"
        "str	r5, [%[a], #124]\n\t"
        "ldr	r2, [%[a], #128]\n\t"
        "ldr	r3, [%[a], #132]\n\t"
        "ldr	r4, [%[a], #136]\n\t"
        "ldr	r5, [%[a], #140]\n\t"
        "ldr	r6, [%[b], #128]\n\t"
        "ldr	r7, [%[b], #132]\n\t"
        "ldr	r8, [%[b], #136]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #128]\n\t"
        "str	r3, [%[a], #132]\n\t"
        "str	r4, [%[a], #136]\n\t"
        "str	r5, [%[a], #140]\n\t"
        "ldr	r2, [%[a], #144]\n\t"
        "ldr	r3, [%[a], #148]\n\t"
        "ldr	r4, [%[a], #152]\n\t"
        "ldr	r5, [%[a], #156]\n\t"
        "ldr	r6, [%[b], #144]\n\t"
        "ldr	r7, [%[b], #148]\n\t"
        "ldr	r8, [%[b], #152]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #144]\n\t"
        "str	r3, [%[a], #148]\n\t"
        "str	r4, [%[a], #152]\n\t"
        "str	r5, [%[a], #156]\n\t"
        "ldr	r2, [%[a], #160]\n\t"
        "ldr	r3, [%[a], #164]\n\t"
        "ldr	r4, [%[a], #168]\n\t"
        "ldr	r5, [%[a], #172]\n\t"
        "ldr	r6, [%[b], #160]\n\t"
        "ldr	r7, [%[b], #164]\n\t"
        "ldr	r8, [%[b], #168]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #160]\n\t"
        "str	r3, [%[a], #164]\n\t"
        "str	r4, [%[a], #168]\n\t"
        "str	r5, [%[a], #172]\n\t"
        "ldr	r2, [%[a], #176]\n\t"
        "ldr	r3, [%[a], #180]\n\t"
        "ldr	r4, [%[a], #184]\n\t"
        "ldr	r5, [%[a], #188]\n\t"
        "ldr	r6, [%[b], #176]\n\t"
        "ldr	r7, [%[b], #180]\n\t"
        "ldr	r8, [%[b], #184]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #176]\n\t"
        "str	r3, [%[a], #180]\n\t"
        "str	r4, [%[a], #184]\n\t"
        "str	r5, [%[a], #188]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
    );

    return c;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r5, [%[a], #52]\n\t"
        "ldr	r6, [%[a], #56]\n\t"
        "ldr	r7, [%[a], #60]\n\t"
        "ldr	r8, [%[b], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "ldr	r10, [%[b], #56]\n\t"
        "ldr	r14, [%[b], #60]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r5, [%[r], #52]\n\t"
        "str	r6, [%[r], #56]\n\t"
        "str	r7, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r5, [%[a], #68]\n\t"
        "ldr	r6, [%[a], #72]\n\t"
        "ldr	r7, [%[a], #76]\n\t"
        "ldr	r8, [%[b], #64]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "ldr	r10, [%[b], #72]\n\t"
        "ldr	r14, [%[b], #76]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r5, [%[r], #68]\n\t"
        "str	r6, [%[r], #72]\n\t"
        "str	r7, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r5, [%[a], #84]\n\t"
        "ldr	r6, [%[a], #88]\n\t"
        "ldr	r7, [%[a], #92]\n\t"
        "ldr	r8, [%[b], #80]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "ldr	r10, [%[b], #88]\n\t"
        "ldr	r14, [%[b], #92]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r5, [%[r], #84]\n\t"
        "str	r6, [%[r], #88]\n\t"
        "str	r7, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r5, [%[a], #100]\n\t"
        "ldr	r6, [%[a], #104]\n\t"
        "ldr	r7, [%[a], #108]\n\t"
        "ldr	r8, [%[b], #96]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "ldr	r10, [%[b], #104]\n\t"
        "ldr	r14, [%[b], #108]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r5, [%[r], #100]\n\t"
        "str	r6, [%[r], #104]\n\t"
        "str	r7, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r5, [%[a], #116]\n\t"
        "ldr	r6, [%[a], #120]\n\t"
        "ldr	r7, [%[a], #124]\n\t"
        "ldr	r8, [%[b], #112]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "ldr	r10, [%[b], #120]\n\t"
        "ldr	r14, [%[b], #124]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r5, [%[r], #116]\n\t"
        "str	r6, [%[r], #120]\n\t"
        "str	r7, [%[r], #124]\n\t"
        "ldr	r4, [%[a], #128]\n\t"
        "ldr	r5, [%[a], #132]\n\t"
        "ldr	r6, [%[a], #136]\n\t"
        "ldr	r7, [%[a], #140]\n\t"
        "ldr	r8, [%[b], #128]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "ldr	r10, [%[b], #136]\n\t"
        "ldr	r14, [%[b], #140]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #128]\n\t"
        "str	r5, [%[r], #132]\n\t"
        "str	r6, [%[r], #136]\n\t"
        "str	r7, [%[r], #140]\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "ldr	r5, [%[a], #148]\n\t"
        "ldr	r6, [%[a], #152]\n\t"
        "ldr	r7, [%[a], #156]\n\t"
        "ldr	r8, [%[b], #144]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "ldr	r10, [%[b], #152]\n\t"
        "ldr	r14, [%[b], #156]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #144]\n\t"
        "str	r5, [%[r], #148]\n\t"
        "str	r6, [%[r], #152]\n\t"
        "str	r7, [%[r], #156]\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "ldr	r5, [%[a], #164]\n\t"
        "ldr	r6, [%[a], #168]\n\t"
        "ldr	r7, [%[a], #172]\n\t"
        "ldr	r8, [%[b], #160]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "ldr	r10, [%[b], #168]\n\t"
        "ldr	r14, [%[b], #172]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #160]\n\t"
        "str	r5, [%[r], #164]\n\t"
        "str	r6, [%[r], #168]\n\t"
        "str	r7, [%[r], #172]\n\t"
        "ldr	r4, [%[a], #176]\n\t"
        "ldr	r5, [%[a], #180]\n\t"
        "ldr	r6, [%[a], #184]\n\t"
        "ldr	r7, [%[a], #188]\n\t"
        "ldr	r8, [%[b], #176]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "ldr	r10, [%[b], #184]\n\t"
        "ldr	r14, [%[b], #188]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #176]\n\t"
        "str	r5, [%[r], #180]\n\t"
        "str	r6, [%[r], #184]\n\t"
        "str	r7, [%[r], #188]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<24; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 24; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[48];
    sp_digit a1[24];
    sp_digit b1[24];
    sp_digit z2[48];
    sp_digit u, ca, cb;

    ca = sp_3072_add_24(a1, a, &a[24]);
    cb = sp_3072_add_24(b1, b, &b[24]);
    u  = ca & cb;
    sp_3072_mul_24(z1, a1, b1);
    sp_3072_mul_24(z2, &a[24], &b[24]);
    sp_3072_mul_24(z0, a, b);
    sp_3072_mask_24(r + 48, a1, 0 - cb);
    sp_3072_mask_24(b1, b1, 0 - ca);
    u += sp_3072_add_24(r + 48, r + 48, b1);
    u += sp_3072_sub_in_place_48(z1, z2);
    u += sp_3072_sub_in_place_48(z1, z0);
    u += sp_3072_add_48(r + 24, r + 24, z1);
    r[72] = u;
    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
    (void)sp_3072_add_48(r + 48, r + 48, z2);
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit z2[48];
    sp_digit z1[48];
    sp_digit a1[24];
    sp_digit u;

    u = sp_3072_add_24(a1, a, &a[24]);
    sp_3072_sqr_24(z1, a1);
    sp_3072_sqr_24(z2, &a[24]);
    sp_3072_sqr_24(z0, a);
    sp_3072_mask_24(r + 48, a1, 0 - u);
    u += sp_3072_add_24(r + 48, r + 48, r + 48);
    u += sp_3072_sub_in_place_48(z1, z2);
    u += sp_3072_sub_in_place_48(z1, z0);
    u += sp_3072_add_48(r + 24, r + 24, z1);
    r[72] = u;
    XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
    (void)sp_3072_add_48(r + 48, r + 48, z2);
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer and result.
 * b  A single precision integer.
 */
static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "ldr	r2, [%[a], #0]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r5, [%[a], #12]\n\t"
        "ldr	r6, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "ldr	r8, [%[b], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "subs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #0]\n\t"
        "str	r3, [%[a], #4]\n\t"
        "str	r4, [%[a], #8]\n\t"
        "str	r5, [%[a], #12]\n\t"
        "ldr	r2, [%[a], #16]\n\t"
        "ldr	r3, [%[a], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r5, [%[a], #28]\n\t"
        "ldr	r6, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "ldr	r8, [%[b], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #16]\n\t"
        "str	r3, [%[a], #20]\n\t"
        "str	r4, [%[a], #24]\n\t"
        "str	r5, [%[a], #28]\n\t"
        "ldr	r2, [%[a], #32]\n\t"
        "ldr	r3, [%[a], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r5, [%[a], #44]\n\t"
        "ldr	r6, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "ldr	r8, [%[b], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #32]\n\t"
        "str	r3, [%[a], #36]\n\t"
        "str	r4, [%[a], #40]\n\t"
        "str	r5, [%[a], #44]\n\t"
        "ldr	r2, [%[a], #48]\n\t"
        "ldr	r3, [%[a], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r5, [%[a], #60]\n\t"
        "ldr	r6, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "ldr	r8, [%[b], #56]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #48]\n\t"
        "str	r3, [%[a], #52]\n\t"
        "str	r4, [%[a], #56]\n\t"
        "str	r5, [%[a], #60]\n\t"
        "ldr	r2, [%[a], #64]\n\t"
        "ldr	r3, [%[a], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r5, [%[a], #76]\n\t"
        "ldr	r6, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "ldr	r8, [%[b], #72]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #64]\n\t"
        "str	r3, [%[a], #68]\n\t"
        "str	r4, [%[a], #72]\n\t"
        "str	r5, [%[a], #76]\n\t"
        "ldr	r2, [%[a], #80]\n\t"
        "ldr	r3, [%[a], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r5, [%[a], #92]\n\t"
        "ldr	r6, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "ldr	r8, [%[b], #88]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #80]\n\t"
        "str	r3, [%[a], #84]\n\t"
        "str	r4, [%[a], #88]\n\t"
        "str	r5, [%[a], #92]\n\t"
        "ldr	r2, [%[a], #96]\n\t"
        "ldr	r3, [%[a], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r5, [%[a], #108]\n\t"
        "ldr	r6, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "ldr	r8, [%[b], #104]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #96]\n\t"
        "str	r3, [%[a], #100]\n\t"
        "str	r4, [%[a], #104]\n\t"
        "str	r5, [%[a], #108]\n\t"
        "ldr	r2, [%[a], #112]\n\t"
        "ldr	r3, [%[a], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r5, [%[a], #124]\n\t"
        "ldr	r6, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "ldr	r8, [%[b], #120]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #112]\n\t"
        "str	r3, [%[a], #116]\n\t"
        "str	r4, [%[a], #120]\n\t"
        "str	r5, [%[a], #124]\n\t"
        "ldr	r2, [%[a], #128]\n\t"
        "ldr	r3, [%[a], #132]\n\t"
        "ldr	r4, [%[a], #136]\n\t"
        "ldr	r5, [%[a], #140]\n\t"
        "ldr	r6, [%[b], #128]\n\t"
        "ldr	r7, [%[b], #132]\n\t"
        "ldr	r8, [%[b], #136]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #128]\n\t"
        "str	r3, [%[a], #132]\n\t"
        "str	r4, [%[a], #136]\n\t"
        "str	r5, [%[a], #140]\n\t"
        "ldr	r2, [%[a], #144]\n\t"
        "ldr	r3, [%[a], #148]\n\t"
        "ldr	r4, [%[a], #152]\n\t"
        "ldr	r5, [%[a], #156]\n\t"
        "ldr	r6, [%[b], #144]\n\t"
        "ldr	r7, [%[b], #148]\n\t"
        "ldr	r8, [%[b], #152]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #144]\n\t"
        "str	r3, [%[a], #148]\n\t"
        "str	r4, [%[a], #152]\n\t"
        "str	r5, [%[a], #156]\n\t"
        "ldr	r2, [%[a], #160]\n\t"
        "ldr	r3, [%[a], #164]\n\t"
        "ldr	r4, [%[a], #168]\n\t"
        "ldr	r5, [%[a], #172]\n\t"
        "ldr	r6, [%[b], #160]\n\t"
        "ldr	r7, [%[b], #164]\n\t"
        "ldr	r8, [%[b], #168]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #160]\n\t"
        "str	r3, [%[a], #164]\n\t"
        "str	r4, [%[a], #168]\n\t"
        "str	r5, [%[a], #172]\n\t"
        "ldr	r2, [%[a], #176]\n\t"
        "ldr	r3, [%[a], #180]\n\t"
        "ldr	r4, [%[a], #184]\n\t"
        "ldr	r5, [%[a], #188]\n\t"
        "ldr	r6, [%[b], #176]\n\t"
        "ldr	r7, [%[b], #180]\n\t"
        "ldr	r8, [%[b], #184]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #176]\n\t"
        "str	r3, [%[a], #180]\n\t"
        "str	r4, [%[a], #184]\n\t"
        "str	r5, [%[a], #188]\n\t"
        "ldr	r2, [%[a], #192]\n\t"
        "ldr	r3, [%[a], #196]\n\t"
        "ldr	r4, [%[a], #200]\n\t"
        "ldr	r5, [%[a], #204]\n\t"
        "ldr	r6, [%[b], #192]\n\t"
        "ldr	r7, [%[b], #196]\n\t"
        "ldr	r8, [%[b], #200]\n\t"
        "ldr	r9, [%[b], #204]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #192]\n\t"
        "str	r3, [%[a], #196]\n\t"
        "str	r4, [%[a], #200]\n\t"
        "str	r5, [%[a], #204]\n\t"
        "ldr	r2, [%[a], #208]\n\t"
        "ldr	r3, [%[a], #212]\n\t"
        "ldr	r4, [%[a], #216]\n\t"
        "ldr	r5, [%[a], #220]\n\t"
        "ldr	r6, [%[b], #208]\n\t"
        "ldr	r7, [%[b], #212]\n\t"
        "ldr	r8, [%[b], #216]\n\t"
        "ldr	r9, [%[b], #220]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #208]\n\t"
        "str	r3, [%[a], #212]\n\t"
        "str	r4, [%[a], #216]\n\t"
        "str	r5, [%[a], #220]\n\t"
        "ldr	r2, [%[a], #224]\n\t"
        "ldr	r3, [%[a], #228]\n\t"
        "ldr	r4, [%[a], #232]\n\t"
        "ldr	r5, [%[a], #236]\n\t"
        "ldr	r6, [%[b], #224]\n\t"
        "ldr	r7, [%[b], #228]\n\t"
        "ldr	r8, [%[b], #232]\n\t"
        "ldr	r9, [%[b], #236]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #224]\n\t"
        "str	r3, [%[a], #228]\n\t"
        "str	r4, [%[a], #232]\n\t"
        "str	r5, [%[a], #236]\n\t"
        "ldr	r2, [%[a], #240]\n\t"
        "ldr	r3, [%[a], #244]\n\t"
        "ldr	r4, [%[a], #248]\n\t"
        "ldr	r5, [%[a], #252]\n\t"
        "ldr	r6, [%[b], #240]\n\t"
        "ldr	r7, [%[b], #244]\n\t"
        "ldr	r8, [%[b], #248]\n\t"
        "ldr	r9, [%[b], #252]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #240]\n\t"
        "str	r3, [%[a], #244]\n\t"
        "str	r4, [%[a], #248]\n\t"
        "str	r5, [%[a], #252]\n\t"
        "ldr	r2, [%[a], #256]\n\t"
        "ldr	r3, [%[a], #260]\n\t"
        "ldr	r4, [%[a], #264]\n\t"
        "ldr	r5, [%[a], #268]\n\t"
        "ldr	r6, [%[b], #256]\n\t"
        "ldr	r7, [%[b], #260]\n\t"
        "ldr	r8, [%[b], #264]\n\t"
        "ldr	r9, [%[b], #268]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #256]\n\t"
        "str	r3, [%[a], #260]\n\t"
        "str	r4, [%[a], #264]\n\t"
        "str	r5, [%[a], #268]\n\t"
        "ldr	r2, [%[a], #272]\n\t"
        "ldr	r3, [%[a], #276]\n\t"
        "ldr	r4, [%[a], #280]\n\t"
        "ldr	r5, [%[a], #284]\n\t"
        "ldr	r6, [%[b], #272]\n\t"
        "ldr	r7, [%[b], #276]\n\t"
        "ldr	r8, [%[b], #280]\n\t"
        "ldr	r9, [%[b], #284]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #272]\n\t"
        "str	r3, [%[a], #276]\n\t"
        "str	r4, [%[a], #280]\n\t"
        "str	r5, [%[a], #284]\n\t"
        "ldr	r2, [%[a], #288]\n\t"
        "ldr	r3, [%[a], #292]\n\t"
        "ldr	r4, [%[a], #296]\n\t"
        "ldr	r5, [%[a], #300]\n\t"
        "ldr	r6, [%[b], #288]\n\t"
        "ldr	r7, [%[b], #292]\n\t"
        "ldr	r8, [%[b], #296]\n\t"
        "ldr	r9, [%[b], #300]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #288]\n\t"
        "str	r3, [%[a], #292]\n\t"
        "str	r4, [%[a], #296]\n\t"
        "str	r5, [%[a], #300]\n\t"
        "ldr	r2, [%[a], #304]\n\t"
        "ldr	r3, [%[a], #308]\n\t"
        "ldr	r4, [%[a], #312]\n\t"
        "ldr	r5, [%[a], #316]\n\t"
        "ldr	r6, [%[b], #304]\n\t"
        "ldr	r7, [%[b], #308]\n\t"
        "ldr	r8, [%[b], #312]\n\t"
        "ldr	r9, [%[b], #316]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #304]\n\t"
        "str	r3, [%[a], #308]\n\t"
        "str	r4, [%[a], #312]\n\t"
        "str	r5, [%[a], #316]\n\t"
        "ldr	r2, [%[a], #320]\n\t"
        "ldr	r3, [%[a], #324]\n\t"
        "ldr	r4, [%[a], #328]\n\t"
        "ldr	r5, [%[a], #332]\n\t"
        "ldr	r6, [%[b], #320]\n\t"
        "ldr	r7, [%[b], #324]\n\t"
        "ldr	r8, [%[b], #328]\n\t"
        "ldr	r9, [%[b], #332]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #320]\n\t"
        "str	r3, [%[a], #324]\n\t"
        "str	r4, [%[a], #328]\n\t"
        "str	r5, [%[a], #332]\n\t"
        "ldr	r2, [%[a], #336]\n\t"
        "ldr	r3, [%[a], #340]\n\t"
        "ldr	r4, [%[a], #344]\n\t"
        "ldr	r5, [%[a], #348]\n\t"
        "ldr	r6, [%[b], #336]\n\t"
        "ldr	r7, [%[b], #340]\n\t"
        "ldr	r8, [%[b], #344]\n\t"
        "ldr	r9, [%[b], #348]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #336]\n\t"
        "str	r3, [%[a], #340]\n\t"
        "str	r4, [%[a], #344]\n\t"
        "str	r5, [%[a], #348]\n\t"
        "ldr	r2, [%[a], #352]\n\t"
        "ldr	r3, [%[a], #356]\n\t"
        "ldr	r4, [%[a], #360]\n\t"
        "ldr	r5, [%[a], #364]\n\t"
        "ldr	r6, [%[b], #352]\n\t"
        "ldr	r7, [%[b], #356]\n\t"
        "ldr	r8, [%[b], #360]\n\t"
        "ldr	r9, [%[b], #364]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #352]\n\t"
        "str	r3, [%[a], #356]\n\t"
        "str	r4, [%[a], #360]\n\t"
        "str	r5, [%[a], #364]\n\t"
        "ldr	r2, [%[a], #368]\n\t"
        "ldr	r3, [%[a], #372]\n\t"
        "ldr	r4, [%[a], #376]\n\t"
        "ldr	r5, [%[a], #380]\n\t"
        "ldr	r6, [%[b], #368]\n\t"
        "ldr	r7, [%[b], #372]\n\t"
        "ldr	r8, [%[b], #376]\n\t"
        "ldr	r9, [%[b], #380]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #368]\n\t"
        "str	r3, [%[a], #372]\n\t"
        "str	r4, [%[a], #376]\n\t"
        "str	r5, [%[a], #380]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
    );

    return c;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r5, [%[a], #52]\n\t"
        "ldr	r6, [%[a], #56]\n\t"
        "ldr	r7, [%[a], #60]\n\t"
        "ldr	r8, [%[b], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "ldr	r10, [%[b], #56]\n\t"
        "ldr	r14, [%[b], #60]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r5, [%[r], #52]\n\t"
        "str	r6, [%[r], #56]\n\t"
        "str	r7, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r5, [%[a], #68]\n\t"
        "ldr	r6, [%[a], #72]\n\t"
        "ldr	r7, [%[a], #76]\n\t"
        "ldr	r8, [%[b], #64]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "ldr	r10, [%[b], #72]\n\t"
        "ldr	r14, [%[b], #76]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r5, [%[r], #68]\n\t"
        "str	r6, [%[r], #72]\n\t"
        "str	r7, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r5, [%[a], #84]\n\t"
        "ldr	r6, [%[a], #88]\n\t"
        "ldr	r7, [%[a], #92]\n\t"
        "ldr	r8, [%[b], #80]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "ldr	r10, [%[b], #88]\n\t"
        "ldr	r14, [%[b], #92]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r5, [%[r], #84]\n\t"
        "str	r6, [%[r], #88]\n\t"
        "str	r7, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r5, [%[a], #100]\n\t"
        "ldr	r6, [%[a], #104]\n\t"
        "ldr	r7, [%[a], #108]\n\t"
        "ldr	r8, [%[b], #96]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "ldr	r10, [%[b], #104]\n\t"
        "ldr	r14, [%[b], #108]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r5, [%[r], #100]\n\t"
        "str	r6, [%[r], #104]\n\t"
        "str	r7, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r5, [%[a], #116]\n\t"
        "ldr	r6, [%[a], #120]\n\t"
        "ldr	r7, [%[a], #124]\n\t"
        "ldr	r8, [%[b], #112]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "ldr	r10, [%[b], #120]\n\t"
        "ldr	r14, [%[b], #124]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r5, [%[r], #116]\n\t"
        "str	r6, [%[r], #120]\n\t"
        "str	r7, [%[r], #124]\n\t"
        "ldr	r4, [%[a], #128]\n\t"
        "ldr	r5, [%[a], #132]\n\t"
        "ldr	r6, [%[a], #136]\n\t"
        "ldr	r7, [%[a], #140]\n\t"
        "ldr	r8, [%[b], #128]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "ldr	r10, [%[b], #136]\n\t"
        "ldr	r14, [%[b], #140]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #128]\n\t"
        "str	r5, [%[r], #132]\n\t"
        "str	r6, [%[r], #136]\n\t"
        "str	r7, [%[r], #140]\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "ldr	r5, [%[a], #148]\n\t"
        "ldr	r6, [%[a], #152]\n\t"
        "ldr	r7, [%[a], #156]\n\t"
        "ldr	r8, [%[b], #144]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "ldr	r10, [%[b], #152]\n\t"
        "ldr	r14, [%[b], #156]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #144]\n\t"
        "str	r5, [%[r], #148]\n\t"
        "str	r6, [%[r], #152]\n\t"
        "str	r7, [%[r], #156]\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "ldr	r5, [%[a], #164]\n\t"
        "ldr	r6, [%[a], #168]\n\t"
        "ldr	r7, [%[a], #172]\n\t"
        "ldr	r8, [%[b], #160]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "ldr	r10, [%[b], #168]\n\t"
        "ldr	r14, [%[b], #172]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #160]\n\t"
        "str	r5, [%[r], #164]\n\t"
        "str	r6, [%[r], #168]\n\t"
        "str	r7, [%[r], #172]\n\t"
        "ldr	r4, [%[a], #176]\n\t"
        "ldr	r5, [%[a], #180]\n\t"
        "ldr	r6, [%[a], #184]\n\t"
        "ldr	r7, [%[a], #188]\n\t"
        "ldr	r8, [%[b], #176]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "ldr	r10, [%[b], #184]\n\t"
        "ldr	r14, [%[b], #188]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #176]\n\t"
        "str	r5, [%[r], #180]\n\t"
        "str	r6, [%[r], #184]\n\t"
        "str	r7, [%[r], #188]\n\t"
        "ldr	r4, [%[a], #192]\n\t"
        "ldr	r5, [%[a], #196]\n\t"
        "ldr	r6, [%[a], #200]\n\t"
        "ldr	r7, [%[a], #204]\n\t"
        "ldr	r8, [%[b], #192]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "ldr	r10, [%[b], #200]\n\t"
        "ldr	r14, [%[b], #204]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #192]\n\t"
        "str	r5, [%[r], #196]\n\t"
        "str	r6, [%[r], #200]\n\t"
        "str	r7, [%[r], #204]\n\t"
        "ldr	r4, [%[a], #208]\n\t"
        "ldr	r5, [%[a], #212]\n\t"
        "ldr	r6, [%[a], #216]\n\t"
        "ldr	r7, [%[a], #220]\n\t"
        "ldr	r8, [%[b], #208]\n\t"
        "ldr	r9, [%[b], #212]\n\t"
        "ldr	r10, [%[b], #216]\n\t"
        "ldr	r14, [%[b], #220]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #208]\n\t"
        "str	r5, [%[r], #212]\n\t"
        "str	r6, [%[r], #216]\n\t"
        "str	r7, [%[r], #220]\n\t"
        "ldr	r4, [%[a], #224]\n\t"
        "ldr	r5, [%[a], #228]\n\t"
        "ldr	r6, [%[a], #232]\n\t"
        "ldr	r7, [%[a], #236]\n\t"
        "ldr	r8, [%[b], #224]\n\t"
        "ldr	r9, [%[b], #228]\n\t"
        "ldr	r10, [%[b], #232]\n\t"
        "ldr	r14, [%[b], #236]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #224]\n\t"
        "str	r5, [%[r], #228]\n\t"
        "str	r6, [%[r], #232]\n\t"
        "str	r7, [%[r], #236]\n\t"
        "ldr	r4, [%[a], #240]\n\t"
        "ldr	r5, [%[a], #244]\n\t"
        "ldr	r6, [%[a], #248]\n\t"
        "ldr	r7, [%[a], #252]\n\t"
        "ldr	r8, [%[b], #240]\n\t"
        "ldr	r9, [%[b], #244]\n\t"
        "ldr	r10, [%[b], #248]\n\t"
        "ldr	r14, [%[b], #252]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #240]\n\t"
        "str	r5, [%[r], #244]\n\t"
        "str	r6, [%[r], #248]\n\t"
        "str	r7, [%[r], #252]\n\t"
        "ldr	r4, [%[a], #256]\n\t"
        "ldr	r5, [%[a], #260]\n\t"
        "ldr	r6, [%[a], #264]\n\t"
        "ldr	r7, [%[a], #268]\n\t"
        "ldr	r8, [%[b], #256]\n\t"
        "ldr	r9, [%[b], #260]\n\t"
        "ldr	r10, [%[b], #264]\n\t"
        "ldr	r14, [%[b], #268]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #256]\n\t"
        "str	r5, [%[r], #260]\n\t"
        "str	r6, [%[r], #264]\n\t"
        "str	r7, [%[r], #268]\n\t"
        "ldr	r4, [%[a], #272]\n\t"
        "ldr	r5, [%[a], #276]\n\t"
        "ldr	r6, [%[a], #280]\n\t"
        "ldr	r7, [%[a], #284]\n\t"
        "ldr	r8, [%[b], #272]\n\t"
        "ldr	r9, [%[b], #276]\n\t"
        "ldr	r10, [%[b], #280]\n\t"
        "ldr	r14, [%[b], #284]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #272]\n\t"
        "str	r5, [%[r], #276]\n\t"
        "str	r6, [%[r], #280]\n\t"
        "str	r7, [%[r], #284]\n\t"
        "ldr	r4, [%[a], #288]\n\t"
        "ldr	r5, [%[a], #292]\n\t"
        "ldr	r6, [%[a], #296]\n\t"
        "ldr	r7, [%[a], #300]\n\t"
        "ldr	r8, [%[b], #288]\n\t"
        "ldr	r9, [%[b], #292]\n\t"
        "ldr	r10, [%[b], #296]\n\t"
        "ldr	r14, [%[b], #300]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #288]\n\t"
        "str	r5, [%[r], #292]\n\t"
        "str	r6, [%[r], #296]\n\t"
        "str	r7, [%[r], #300]\n\t"
        "ldr	r4, [%[a], #304]\n\t"
        "ldr	r5, [%[a], #308]\n\t"
        "ldr	r6, [%[a], #312]\n\t"
        "ldr	r7, [%[a], #316]\n\t"
        "ldr	r8, [%[b], #304]\n\t"
        "ldr	r9, [%[b], #308]\n\t"
        "ldr	r10, [%[b], #312]\n\t"
        "ldr	r14, [%[b], #316]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #304]\n\t"
        "str	r5, [%[r], #308]\n\t"
        "str	r6, [%[r], #312]\n\t"
        "str	r7, [%[r], #316]\n\t"
        "ldr	r4, [%[a], #320]\n\t"
        "ldr	r5, [%[a], #324]\n\t"
        "ldr	r6, [%[a], #328]\n\t"
        "ldr	r7, [%[a], #332]\n\t"
        "ldr	r8, [%[b], #320]\n\t"
        "ldr	r9, [%[b], #324]\n\t"
        "ldr	r10, [%[b], #328]\n\t"
        "ldr	r14, [%[b], #332]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #320]\n\t"
        "str	r5, [%[r], #324]\n\t"
        "str	r6, [%[r], #328]\n\t"
        "str	r7, [%[r], #332]\n\t"
        "ldr	r4, [%[a], #336]\n\t"
        "ldr	r5, [%[a], #340]\n\t"
        "ldr	r6, [%[a], #344]\n\t"
        "ldr	r7, [%[a], #348]\n\t"
        "ldr	r8, [%[b], #336]\n\t"
        "ldr	r9, [%[b], #340]\n\t"
        "ldr	r10, [%[b], #344]\n\t"
        "ldr	r14, [%[b], #348]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #336]\n\t"
        "str	r5, [%[r], #340]\n\t"
        "str	r6, [%[r], #344]\n\t"
        "str	r7, [%[r], #348]\n\t"
        "ldr	r4, [%[a], #352]\n\t"
        "ldr	r5, [%[a], #356]\n\t"
        "ldr	r6, [%[a], #360]\n\t"
        "ldr	r7, [%[a], #364]\n\t"
        "ldr	r8, [%[b], #352]\n\t"
        "ldr	r9, [%[b], #356]\n\t"
        "ldr	r10, [%[b], #360]\n\t"
        "ldr	r14, [%[b], #364]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #352]\n\t"
        "str	r5, [%[r], #356]\n\t"
        "str	r6, [%[r], #360]\n\t"
        "str	r7, [%[r], #364]\n\t"
        "ldr	r4, [%[a], #368]\n\t"
        "ldr	r5, [%[a], #372]\n\t"
        "ldr	r6, [%[a], #376]\n\t"
        "ldr	r7, [%[a], #380]\n\t"
        "ldr	r8, [%[b], #368]\n\t"
        "ldr	r9, [%[b], #372]\n\t"
        "ldr	r10, [%[b], #376]\n\t"
        "ldr	r14, [%[b], #380]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #368]\n\t"
        "str	r5, [%[r], #372]\n\t"
        "str	r6, [%[r], #376]\n\t"
        "str	r7, [%[r], #380]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<48; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 48; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit* z0 = r;
    sp_digit z1[96];
    sp_digit a1[48];
    sp_digit b1[48];
    sp_digit z2[96];
    sp_digit u, ca, cb;

    ca = sp_3072_add_48(a1, a, &a[48]);
    cb = sp_3072_add_48(b1, b, &b[48]);
    u  = ca & cb;
    sp_3072_mul_48(z1, a1, b1);
    sp_3072_mul_48(z2, &a[48], &b[48]);
    sp_3072_mul_48(z0, a, b);
    sp_3072_mask_48(r + 96, a1, 0 - cb);
    sp_3072_mask_48(b1, b1, 0 - ca);
    u += sp_3072_add_48(r + 96, r + 96, b1);
    u += sp_3072_sub_in_place_96(z1, z2);
    u += sp_3072_sub_in_place_96(z1, z0);
    u += sp_3072_add_96(r + 48, r + 48, z1);
    r[144] = u;
    XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
    (void)sp_3072_add_96(r + 96, r + 96, z2);
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
{
    sp_digit* z0 = r;
    sp_digit z2[96];
    sp_digit z1[96];
    sp_digit a1[48];
    sp_digit u;

    u = sp_3072_add_48(a1, a, &a[48]);
    sp_3072_sqr_48(z1, a1);
    sp_3072_sqr_48(z2, &a[48]);
    sp_3072_sqr_48(z0, a);
    sp_3072_mask_48(r + 96, a1, 0 - u);
    u += sp_3072_add_48(r + 96, r + 96, r + 96);
    u += sp_3072_sub_in_place_96(z1, z2);
    u += sp_3072_sub_in_place_96(z1, z0);
    u += sp_3072_add_96(r + 48, r + 48, z1);
    r[144] = u;
    XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
    (void)sp_3072_add_96(r + 96, r + 96, z2);
}

#endif /* !WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "add	r12, %[a], #384\n\t"
        "\n1:\n\t"
        "adds	%[c], %[c], #-1\n\t"
        "ldr	r4, [%[a]], #4\n\t"
        "ldr	r5, [%[a]], #4\n\t"
        "ldr	r6, [%[a]], #4\n\t"
        "ldr	r7, [%[a]], #4\n\t"
        "ldr	r8, [%[b]], #4\n\t"
        "ldr	r9, [%[b]], #4\n\t"
        "ldr	r10, [%[b]], #4\n\t"
        "ldr	r14, [%[b]], #4\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r]], #4\n\t"
        "str	r5, [%[r]], #4\n\t"
        "str	r6, [%[r]], #4\n\t"
        "str	r7, [%[r]], #4\n\t"
        "mov	r4, #0\n\t"
        "adc	%[c], r4, #0\n\t"
        "cmp	%[a], r12\n\t"
        "bne	1b\n\t"
        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r14, #0\n\t"
        "add	r12, %[a], #384\n\t"
        "\n1:\n\t"
        "subs	%[c], r14, %[c]\n\t"
        "ldr	r3, [%[a]]\n\t"
        "ldr	r4, [%[a], #4]\n\t"
        "ldr	r5, [%[a], #8]\n\t"
        "ldr	r6, [%[a], #12]\n\t"
        "ldr	r7, [%[b]], #4\n\t"
        "ldr	r8, [%[b]], #4\n\t"
        "ldr	r9, [%[b]], #4\n\t"
        "ldr	r10, [%[b]], #4\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "sbcs	r6, r6, r10\n\t"
        "str	r3, [%[a]], #4\n\t"
        "str	r4, [%[a]], #4\n\t"
        "str	r5, [%[a]], #4\n\t"
        "str	r6, [%[a]], #4\n\t"
        "sbc	%[c], r14, r14\n\t"
        "cmp	%[a], r12\n\t"
        "bne	1b\n\t"
        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
    );

    return c;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #768\n\t"
        "mov	r5, #0\n\t"
        "mov	r6, #0\n\t"
        "mov	r7, #0\n\t"
        "mov	r8, #0\n\t"
        "\n1:\n\t"
        "subs	r3, r5, #380\n\t"
        "it	cc\n\t"
        "movcc	r3, #0\n\t"
        "sub	r4, r5, r3\n\t"
        "\n2:\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "ldr	r12, [%[b], r4]\n\t"
        "umull	r9, r10, r14, r12\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, #0\n\t"
        "add	r3, r3, #4\n\t"
        "sub	r4, r4, #4\n\t"
        "cmp	r3, #384\n\t"
        "beq	3f\n\t"
        "cmp	r3, r5\n\t"
        "ble	2b\n\t"
        "\n3:\n\t"
        "str	r6, [sp, r5]\n\t"
        "mov	r6, r7\n\t"
        "mov	r7, r8\n\t"
        "mov	r8, #0\n\t"
        "add	r5, r5, #4\n\t"
        "cmp	r5, #760\n\t"
        "ble	1b\n\t"
        "str	r6, [sp, r5]\n\t"
        "\n4:\n\t"
        "ldr	r6, [sp, #0]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r3, [sp, #12]\n\t"
        "str	r6, [%[r], #0]\n\t"
        "str	r7, [%[r], #4]\n\t"
        "str	r8, [%[r], #8]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "add	sp, sp, #16\n\t"
        "add	%[r], %[r], #16\n\t"
        "subs	r5, r5, #16\n\t"
        "bgt	4b\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #768\n\t"
        "mov	r12, #0\n\t"
        "mov	r6, #0\n\t"
        "mov	r7, #0\n\t"
        "mov	r8, #0\n\t"
        "mov	r5, #0\n\t"
        "\n1:\n\t"
        "subs	r3, r5, #380\n\t"
        "it	cc\n\t"
        "movcc	r3, r12\n\t"
        "sub	r4, r5, r3\n\t"
        "\n2:\n\t"
        "cmp	r4, r3\n\t"
        "beq	4f\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "ldr	r9, [%[a], r4]\n\t"
        "umull	r9, r10, r14, r9\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "bal	5f\n\t"
        "\n4:\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "umull	r9, r10, r14, r14\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "\n5:\n\t"
        "add	r3, r3, #4\n\t"
        "sub	r4, r4, #4\n\t"
        "cmp	r3, #384\n\t"
        "beq	3f\n\t"
        "cmp	r3, r4\n\t"
        "bgt	3f\n\t"
        "cmp	r3, r5\n\t"
        "ble	2b\n\t"
        "\n3:\n\t"
        "str	r6, [sp, r5]\n\t"
        "mov	r6, r7\n\t"
        "mov	r7, r8\n\t"
        "mov	r8, #0\n\t"
        "add	r5, r5, #4\n\t"
        "cmp	r5, #760\n\t"
        "ble	1b\n\t"
        "str	r6, [sp, r5]\n\t"
        "\n4:\n\t"
        "ldr	r6, [sp, #0]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r3, [sp, #12]\n\t"
        "str	r6, [%[r], #0]\n\t"
        "str	r7, [%[r], #4]\n\t"
        "str	r8, [%[r], #8]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "add	sp, sp, #16\n\t"
        "add	%[r], %[r], #16\n\t"
        "subs	r5, r5, #16\n\t"
        "bgt	4b\n\t"
        : [r] "+r" (r)
        : [a] "r" (a)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
    );
}

#endif /* WOLFSSL_SP_SMALL */
#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
#ifdef WOLFSSL_SP_SMALL
/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
{
    int i;

    for (i=0; i<48; i++) {
        r[i] = a[i] & m;
    }
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "add	r12, %[a], #192\n\t"
        "\n1:\n\t"
        "adds	%[c], %[c], #-1\n\t"
        "ldr	r4, [%[a]], #4\n\t"
        "ldr	r5, [%[a]], #4\n\t"
        "ldr	r6, [%[a]], #4\n\t"
        "ldr	r7, [%[a]], #4\n\t"
        "ldr	r8, [%[b]], #4\n\t"
        "ldr	r9, [%[b]], #4\n\t"
        "ldr	r10, [%[b]], #4\n\t"
        "ldr	r14, [%[b]], #4\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r]], #4\n\t"
        "str	r5, [%[r]], #4\n\t"
        "str	r6, [%[r]], #4\n\t"
        "str	r7, [%[r]], #4\n\t"
        "mov	r4, #0\n\t"
        "adc	%[c], r4, #0\n\t"
        "cmp	%[a], r12\n\t"
        "bne	1b\n\t"
        : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r14, #0\n\t"
        "add	r12, %[a], #192\n\t"
        "\n1:\n\t"
        "subs	%[c], r14, %[c]\n\t"
        "ldr	r3, [%[a]]\n\t"
        "ldr	r4, [%[a], #4]\n\t"
        "ldr	r5, [%[a], #8]\n\t"
        "ldr	r6, [%[a], #12]\n\t"
        "ldr	r7, [%[b]], #4\n\t"
        "ldr	r8, [%[b]], #4\n\t"
        "ldr	r9, [%[b]], #4\n\t"
        "ldr	r10, [%[b]], #4\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "sbcs	r6, r6, r10\n\t"
        "str	r3, [%[a]], #4\n\t"
        "str	r4, [%[a]], #4\n\t"
        "str	r5, [%[a]], #4\n\t"
        "str	r6, [%[a]], #4\n\t"
        "sbc	%[c], r14, r14\n\t"
        "cmp	%[a], r12\n\t"
        "bne	1b\n\t"
        : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
        :
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
    );

    return c;
}

#endif /* WOLFSSL_SP_SMALL */
#ifdef WOLFSSL_SP_SMALL
/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #384\n\t"
        "mov	r5, #0\n\t"
        "mov	r6, #0\n\t"
        "mov	r7, #0\n\t"
        "mov	r8, #0\n\t"
        "\n1:\n\t"
        "subs	r3, r5, #188\n\t"
        "it	cc\n\t"
        "movcc	r3, #0\n\t"
        "sub	r4, r5, r3\n\t"
        "\n2:\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "ldr	r12, [%[b], r4]\n\t"
        "umull	r9, r10, r14, r12\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, #0\n\t"
        "add	r3, r3, #4\n\t"
        "sub	r4, r4, #4\n\t"
        "cmp	r3, #192\n\t"
        "beq	3f\n\t"
        "cmp	r3, r5\n\t"
        "ble	2b\n\t"
        "\n3:\n\t"
        "str	r6, [sp, r5]\n\t"
        "mov	r6, r7\n\t"
        "mov	r7, r8\n\t"
        "mov	r8, #0\n\t"
        "add	r5, r5, #4\n\t"
        "cmp	r5, #376\n\t"
        "ble	1b\n\t"
        "str	r6, [sp, r5]\n\t"
        "\n4:\n\t"
        "ldr	r6, [sp, #0]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r3, [sp, #12]\n\t"
        "str	r6, [%[r], #0]\n\t"
        "str	r7, [%[r], #4]\n\t"
        "str	r8, [%[r], #8]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "add	sp, sp, #16\n\t"
        "add	%[r], %[r], #16\n\t"
        "subs	r5, r5, #16\n\t"
        "bgt	4b\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );
}

/* Square a and put result in r. (r = a * a)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 */
static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #384\n\t"
        "mov	r12, #0\n\t"
        "mov	r6, #0\n\t"
        "mov	r7, #0\n\t"
        "mov	r8, #0\n\t"
        "mov	r5, #0\n\t"
        "\n1:\n\t"
        "subs	r3, r5, #188\n\t"
        "it	cc\n\t"
        "movcc	r3, r12\n\t"
        "sub	r4, r5, r3\n\t"
        "\n2:\n\t"
        "cmp	r4, r3\n\t"
        "beq	4f\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "ldr	r9, [%[a], r4]\n\t"
        "umull	r9, r10, r14, r9\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "bal	5f\n\t"
        "\n4:\n\t"
        "ldr	r14, [%[a], r3]\n\t"
        "umull	r9, r10, r14, r14\n\t"
        "adds	r6, r6, r9\n\t"
        "adcs	r7, r7, r10\n\t"
        "adc	r8, r8, r12\n\t"
        "\n5:\n\t"
        "add	r3, r3, #4\n\t"
        "sub	r4, r4, #4\n\t"
        "cmp	r3, #192\n\t"
        "beq	3f\n\t"
        "cmp	r3, r4\n\t"
        "bgt	3f\n\t"
        "cmp	r3, r5\n\t"
        "ble	2b\n\t"
        "\n3:\n\t"
        "str	r6, [sp, r5]\n\t"
        "mov	r6, r7\n\t"
        "mov	r7, r8\n\t"
        "mov	r8, #0\n\t"
        "add	r5, r5, #4\n\t"
        "cmp	r5, #376\n\t"
        "ble	1b\n\t"
        "str	r6, [sp, r5]\n\t"
        "\n4:\n\t"
        "ldr	r6, [sp, #0]\n\t"
        "ldr	r7, [sp, #4]\n\t"
        "ldr	r8, [sp, #8]\n\t"
        "ldr	r3, [sp, #12]\n\t"
        "str	r6, [%[r], #0]\n\t"
        "str	r7, [%[r], #4]\n\t"
        "str	r8, [%[r], #8]\n\t"
        "str	r3, [%[r], #12]\n\t"
        "add	sp, sp, #16\n\t"
        "add	%[r], %[r], #16\n\t"
        "subs	r5, r5, #16\n\t"
        "bgt	4b\n\t"
        : [r] "+r" (r)
        : [a] "r" (a)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
    );
}

#endif /* WOLFSSL_SP_SMALL */
#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */

/* Caclulate the bottom digit of -1/a mod 2^n.
 *
 * a    A single precision number.
 * rho  Bottom word of inverse.
 */
static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
{
    sp_digit x, b;

    b = a[0];
    x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
    x *= 2 - b * x;               /* here x*a==1 mod 2**32 */

    /* rho = -1/m mod b */
    *rho = -x;
}

/* Mul a by digit b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision digit.
 */
static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r10, #0\n\t"
        "# A[0] * B\n\t"
        "ldr	r8, [%[a]]\n\t"
        "umull	r5, r3, %[b], r8\n\t"
        "mov	r4, #0\n\t"
        "str	r5, [%[r]]\n\t"
        "mov	r5, #0\n\t"
        "mov	r9, #4\n\t"
        "1:\n\t"
        "ldr	r8, [%[a], r9]\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], r9]\n\t"
        "mov	r3, r4\n\t"
        "mov	r4, r5\n\t"
        "mov	r5, #0\n\t"
        "add	r9, r9, #4\n\t"
        "cmp	r9, #384\n\t"
        "blt	1b\n\t"
        "str	r3, [%[r], #384]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
#else
    __asm__ __volatile__ (
        "mov	r10, #0\n\t"
        "# A[0] * B\n\t"
        "ldr	r8, [%[a]]\n\t"
        "umull	r3, r4, %[b], r8\n\t"
        "mov	r5, #0\n\t"
        "str	r3, [%[r]]\n\t"
        "# A[1] * B\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #4]\n\t"
        "# A[2] * B\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #8]\n\t"
        "# A[3] * B\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #12]\n\t"
        "# A[4] * B\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #16]\n\t"
        "# A[5] * B\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #20]\n\t"
        "# A[6] * B\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #24]\n\t"
        "# A[7] * B\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #28]\n\t"
        "# A[8] * B\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #32]\n\t"
        "# A[9] * B\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #36]\n\t"
        "# A[10] * B\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #40]\n\t"
        "# A[11] * B\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #44]\n\t"
        "# A[12] * B\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #48]\n\t"
        "# A[13] * B\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #52]\n\t"
        "# A[14] * B\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #56]\n\t"
        "# A[15] * B\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #60]\n\t"
        "# A[16] * B\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #64]\n\t"
        "# A[17] * B\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #68]\n\t"
        "# A[18] * B\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #72]\n\t"
        "# A[19] * B\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #76]\n\t"
        "# A[20] * B\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #80]\n\t"
        "# A[21] * B\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #84]\n\t"
        "# A[22] * B\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #88]\n\t"
        "# A[23] * B\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #92]\n\t"
        "# A[24] * B\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #96]\n\t"
        "# A[25] * B\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #100]\n\t"
        "# A[26] * B\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #104]\n\t"
        "# A[27] * B\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #108]\n\t"
        "# A[28] * B\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #112]\n\t"
        "# A[29] * B\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #116]\n\t"
        "# A[30] * B\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #120]\n\t"
        "# A[31] * B\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #124]\n\t"
        "# A[32] * B\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #128]\n\t"
        "# A[33] * B\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #132]\n\t"
        "# A[34] * B\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #136]\n\t"
        "# A[35] * B\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #140]\n\t"
        "# A[36] * B\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #144]\n\t"
        "# A[37] * B\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #148]\n\t"
        "# A[38] * B\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #152]\n\t"
        "# A[39] * B\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #156]\n\t"
        "# A[40] * B\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #160]\n\t"
        "# A[41] * B\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #164]\n\t"
        "# A[42] * B\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #168]\n\t"
        "# A[43] * B\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #172]\n\t"
        "# A[44] * B\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #176]\n\t"
        "# A[45] * B\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #180]\n\t"
        "# A[46] * B\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #184]\n\t"
        "# A[47] * B\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #188]\n\t"
        "# A[48] * B\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #192]\n\t"
        "# A[49] * B\n\t"
        "ldr	r8, [%[a], #196]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #196]\n\t"
        "# A[50] * B\n\t"
        "ldr	r8, [%[a], #200]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #200]\n\t"
        "# A[51] * B\n\t"
        "ldr	r8, [%[a], #204]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #204]\n\t"
        "# A[52] * B\n\t"
        "ldr	r8, [%[a], #208]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #208]\n\t"
        "# A[53] * B\n\t"
        "ldr	r8, [%[a], #212]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #212]\n\t"
        "# A[54] * B\n\t"
        "ldr	r8, [%[a], #216]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #216]\n\t"
        "# A[55] * B\n\t"
        "ldr	r8, [%[a], #220]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #220]\n\t"
        "# A[56] * B\n\t"
        "ldr	r8, [%[a], #224]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #224]\n\t"
        "# A[57] * B\n\t"
        "ldr	r8, [%[a], #228]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #228]\n\t"
        "# A[58] * B\n\t"
        "ldr	r8, [%[a], #232]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #232]\n\t"
        "# A[59] * B\n\t"
        "ldr	r8, [%[a], #236]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #236]\n\t"
        "# A[60] * B\n\t"
        "ldr	r8, [%[a], #240]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #240]\n\t"
        "# A[61] * B\n\t"
        "ldr	r8, [%[a], #244]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #244]\n\t"
        "# A[62] * B\n\t"
        "ldr	r8, [%[a], #248]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #248]\n\t"
        "# A[63] * B\n\t"
        "ldr	r8, [%[a], #252]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #252]\n\t"
        "# A[64] * B\n\t"
        "ldr	r8, [%[a], #256]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #256]\n\t"
        "# A[65] * B\n\t"
        "ldr	r8, [%[a], #260]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #260]\n\t"
        "# A[66] * B\n\t"
        "ldr	r8, [%[a], #264]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #264]\n\t"
        "# A[67] * B\n\t"
        "ldr	r8, [%[a], #268]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #268]\n\t"
        "# A[68] * B\n\t"
        "ldr	r8, [%[a], #272]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #272]\n\t"
        "# A[69] * B\n\t"
        "ldr	r8, [%[a], #276]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #276]\n\t"
        "# A[70] * B\n\t"
        "ldr	r8, [%[a], #280]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #280]\n\t"
        "# A[71] * B\n\t"
        "ldr	r8, [%[a], #284]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #284]\n\t"
        "# A[72] * B\n\t"
        "ldr	r8, [%[a], #288]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #288]\n\t"
        "# A[73] * B\n\t"
        "ldr	r8, [%[a], #292]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #292]\n\t"
        "# A[74] * B\n\t"
        "ldr	r8, [%[a], #296]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #296]\n\t"
        "# A[75] * B\n\t"
        "ldr	r8, [%[a], #300]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #300]\n\t"
        "# A[76] * B\n\t"
        "ldr	r8, [%[a], #304]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #304]\n\t"
        "# A[77] * B\n\t"
        "ldr	r8, [%[a], #308]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #308]\n\t"
        "# A[78] * B\n\t"
        "ldr	r8, [%[a], #312]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #312]\n\t"
        "# A[79] * B\n\t"
        "ldr	r8, [%[a], #316]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #316]\n\t"
        "# A[80] * B\n\t"
        "ldr	r8, [%[a], #320]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #320]\n\t"
        "# A[81] * B\n\t"
        "ldr	r8, [%[a], #324]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #324]\n\t"
        "# A[82] * B\n\t"
        "ldr	r8, [%[a], #328]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #328]\n\t"
        "# A[83] * B\n\t"
        "ldr	r8, [%[a], #332]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #332]\n\t"
        "# A[84] * B\n\t"
        "ldr	r8, [%[a], #336]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #336]\n\t"
        "# A[85] * B\n\t"
        "ldr	r8, [%[a], #340]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #340]\n\t"
        "# A[86] * B\n\t"
        "ldr	r8, [%[a], #344]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #344]\n\t"
        "# A[87] * B\n\t"
        "ldr	r8, [%[a], #348]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #348]\n\t"
        "# A[88] * B\n\t"
        "ldr	r8, [%[a], #352]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #352]\n\t"
        "# A[89] * B\n\t"
        "ldr	r8, [%[a], #356]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #356]\n\t"
        "# A[90] * B\n\t"
        "ldr	r8, [%[a], #360]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #360]\n\t"
        "# A[91] * B\n\t"
        "ldr	r8, [%[a], #364]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #364]\n\t"
        "# A[92] * B\n\t"
        "ldr	r8, [%[a], #368]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #368]\n\t"
        "# A[93] * B\n\t"
        "ldr	r8, [%[a], #372]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #372]\n\t"
        "# A[94] * B\n\t"
        "ldr	r8, [%[a], #376]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #376]\n\t"
        "# A[95] * B\n\t"
        "ldr	r8, [%[a], #380]\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r5, [%[r], #380]\n\t"
        "str	r3, [%[r], #384]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
#endif
}

#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
/* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 3072 bits, just need to subtract.
 *
 * r  A single precision number.
 * m  A signle precision number.
 */
static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
{
    XMEMSET(r, 0, sizeof(sp_digit) * 48);

    /* r = 2^n mod m */
    sp_3072_sub_in_place_48(r, m);
}

/* Conditionally subtract b from a using the mask m.
 * m is -1 to subtract and 0 when not copying.
 *
 * r  A single precision number representing condition subtract result.
 * a  A single precision number to subtract from.
 * b  A single precision number to subtract.
 * m  Mask value to apply.
 */
static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
        sp_digit m)
{
    sp_digit c = 0;

#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r9, #0\n\t"
        "mov	r8, #0\n\t"
        "1:\n\t"
        "subs	%[c], r9, %[c]\n\t"
        "ldr	r4, [%[a], r8]\n\t"
        "ldr	r5, [%[b], r8]\n\t"
        "and	r5, r5, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbc	%[c], r9, r9\n\t"
        "str	r4, [%[r], r8]\n\t"
        "add	r8, r8, #4\n\t"
        "cmp	r8, #192\n\t"
        "blt	1b\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
    );
#else
    __asm__ __volatile__ (

        "mov	r9, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r6, [%[a], #4]\n\t"
        "ldr	r5, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "subs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r6, [%[r], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r6, [%[a], #12]\n\t"
        "ldr	r5, [%[b], #8]\n\t"
        "ldr	r7, [%[b], #12]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #8]\n\t"
        "str	r6, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r6, [%[a], #20]\n\t"
        "ldr	r5, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r6, [%[r], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r6, [%[a], #28]\n\t"
        "ldr	r5, [%[b], #24]\n\t"
        "ldr	r7, [%[b], #28]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #24]\n\t"
        "str	r6, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r6, [%[a], #36]\n\t"
        "ldr	r5, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r6, [%[r], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r6, [%[a], #44]\n\t"
        "ldr	r5, [%[b], #40]\n\t"
        "ldr	r7, [%[b], #44]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #40]\n\t"
        "str	r6, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r6, [%[a], #52]\n\t"
        "ldr	r5, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r6, [%[r], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r6, [%[a], #60]\n\t"
        "ldr	r5, [%[b], #56]\n\t"
        "ldr	r7, [%[b], #60]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #56]\n\t"
        "str	r6, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r6, [%[a], #68]\n\t"
        "ldr	r5, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r6, [%[r], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r6, [%[a], #76]\n\t"
        "ldr	r5, [%[b], #72]\n\t"
        "ldr	r7, [%[b], #76]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #72]\n\t"
        "str	r6, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r6, [%[a], #84]\n\t"
        "ldr	r5, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r6, [%[r], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r6, [%[a], #92]\n\t"
        "ldr	r5, [%[b], #88]\n\t"
        "ldr	r7, [%[b], #92]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #88]\n\t"
        "str	r6, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r6, [%[a], #100]\n\t"
        "ldr	r5, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r6, [%[r], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r6, [%[a], #108]\n\t"
        "ldr	r5, [%[b], #104]\n\t"
        "ldr	r7, [%[b], #108]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #104]\n\t"
        "str	r6, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r6, [%[a], #116]\n\t"
        "ldr	r5, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r6, [%[r], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r6, [%[a], #124]\n\t"
        "ldr	r5, [%[b], #120]\n\t"
        "ldr	r7, [%[b], #124]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #120]\n\t"
        "str	r6, [%[r], #124]\n\t"
        "ldr	r4, [%[a], #128]\n\t"
        "ldr	r6, [%[a], #132]\n\t"
        "ldr	r5, [%[b], #128]\n\t"
        "ldr	r7, [%[b], #132]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #128]\n\t"
        "str	r6, [%[r], #132]\n\t"
        "ldr	r4, [%[a], #136]\n\t"
        "ldr	r6, [%[a], #140]\n\t"
        "ldr	r5, [%[b], #136]\n\t"
        "ldr	r7, [%[b], #140]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #136]\n\t"
        "str	r6, [%[r], #140]\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "ldr	r6, [%[a], #148]\n\t"
        "ldr	r5, [%[b], #144]\n\t"
        "ldr	r7, [%[b], #148]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #144]\n\t"
        "str	r6, [%[r], #148]\n\t"
        "ldr	r4, [%[a], #152]\n\t"
        "ldr	r6, [%[a], #156]\n\t"
        "ldr	r5, [%[b], #152]\n\t"
        "ldr	r7, [%[b], #156]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #152]\n\t"
        "str	r6, [%[r], #156]\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "ldr	r6, [%[a], #164]\n\t"
        "ldr	r5, [%[b], #160]\n\t"
        "ldr	r7, [%[b], #164]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #160]\n\t"
        "str	r6, [%[r], #164]\n\t"
        "ldr	r4, [%[a], #168]\n\t"
        "ldr	r6, [%[a], #172]\n\t"
        "ldr	r5, [%[b], #168]\n\t"
        "ldr	r7, [%[b], #172]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #168]\n\t"
        "str	r6, [%[r], #172]\n\t"
        "ldr	r4, [%[a], #176]\n\t"
        "ldr	r6, [%[a], #180]\n\t"
        "ldr	r5, [%[b], #176]\n\t"
        "ldr	r7, [%[b], #180]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #176]\n\t"
        "str	r6, [%[r], #180]\n\t"
        "ldr	r4, [%[a], #184]\n\t"
        "ldr	r6, [%[a], #188]\n\t"
        "ldr	r5, [%[b], #184]\n\t"
        "ldr	r7, [%[b], #188]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #184]\n\t"
        "str	r6, [%[r], #188]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
    );
#endif /* WOLFSSL_SP_SMALL */

    return c;
}

/* Reduce the number back to 3072 bits using Montgomery reduction.
 *
 * a   A single precision number to reduce in place.
 * m   The single precision number representing the modulus.
 * mp  The digit representing the negative inverse of m mod 2^n.
 */
SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    sp_digit ca = 0;

    __asm__ __volatile__ (
        "# i = 0\n\t"
        "mov	r12, #0\n\t"
        "ldr	r10, [%[a], #0]\n\t"
        "ldr	r14, [%[a], #4]\n\t"
        "\n1:\n\t"
        "# mu = a[i] * mp\n\t"
        "mul	r8, %[mp], r10\n\t"
        "# a[i+0] += m[0] * mu\n\t"
        "ldr	r7, [%[m], #0]\n\t"
        "ldr	r9, [%[a], #0]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r10, r10, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "# a[i+1] += m[1] * mu\n\t"
        "ldr	r7, [%[m], #4]\n\t"
        "ldr	r9, [%[a], #4]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r10, r14, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r10, r10, r5\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+2] += m[2] * mu\n\t"
        "ldr	r7, [%[m], #8]\n\t"
        "ldr	r14, [%[a], #8]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r14, r14, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r14, r14, r4\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+3] += m[3] * mu\n\t"
        "ldr	r7, [%[m], #12]\n\t"
        "ldr	r9, [%[a], #12]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #12]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+4] += m[4] * mu\n\t"
        "ldr	r7, [%[m], #16]\n\t"
        "ldr	r9, [%[a], #16]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #16]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+5] += m[5] * mu\n\t"
        "ldr	r7, [%[m], #20]\n\t"
        "ldr	r9, [%[a], #20]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #20]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+6] += m[6] * mu\n\t"
        "ldr	r7, [%[m], #24]\n\t"
        "ldr	r9, [%[a], #24]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #24]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+7] += m[7] * mu\n\t"
        "ldr	r7, [%[m], #28]\n\t"
        "ldr	r9, [%[a], #28]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #28]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+8] += m[8] * mu\n\t"
        "ldr	r7, [%[m], #32]\n\t"
        "ldr	r9, [%[a], #32]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #32]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+9] += m[9] * mu\n\t"
        "ldr	r7, [%[m], #36]\n\t"
        "ldr	r9, [%[a], #36]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #36]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+10] += m[10] * mu\n\t"
        "ldr	r7, [%[m], #40]\n\t"
        "ldr	r9, [%[a], #40]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #40]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+11] += m[11] * mu\n\t"
        "ldr	r7, [%[m], #44]\n\t"
        "ldr	r9, [%[a], #44]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #44]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+12] += m[12] * mu\n\t"
        "ldr	r7, [%[m], #48]\n\t"
        "ldr	r9, [%[a], #48]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #48]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+13] += m[13] * mu\n\t"
        "ldr	r7, [%[m], #52]\n\t"
        "ldr	r9, [%[a], #52]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #52]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+14] += m[14] * mu\n\t"
        "ldr	r7, [%[m], #56]\n\t"
        "ldr	r9, [%[a], #56]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #56]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+15] += m[15] * mu\n\t"
        "ldr	r7, [%[m], #60]\n\t"
        "ldr	r9, [%[a], #60]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #60]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+16] += m[16] * mu\n\t"
        "ldr	r7, [%[m], #64]\n\t"
        "ldr	r9, [%[a], #64]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #64]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+17] += m[17] * mu\n\t"
        "ldr	r7, [%[m], #68]\n\t"
        "ldr	r9, [%[a], #68]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #68]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+18] += m[18] * mu\n\t"
        "ldr	r7, [%[m], #72]\n\t"
        "ldr	r9, [%[a], #72]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #72]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+19] += m[19] * mu\n\t"
        "ldr	r7, [%[m], #76]\n\t"
        "ldr	r9, [%[a], #76]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #76]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+20] += m[20] * mu\n\t"
        "ldr	r7, [%[m], #80]\n\t"
        "ldr	r9, [%[a], #80]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #80]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+21] += m[21] * mu\n\t"
        "ldr	r7, [%[m], #84]\n\t"
        "ldr	r9, [%[a], #84]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #84]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+22] += m[22] * mu\n\t"
        "ldr	r7, [%[m], #88]\n\t"
        "ldr	r9, [%[a], #88]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #88]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+23] += m[23] * mu\n\t"
        "ldr	r7, [%[m], #92]\n\t"
        "ldr	r9, [%[a], #92]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #92]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+24] += m[24] * mu\n\t"
        "ldr	r7, [%[m], #96]\n\t"
        "ldr	r9, [%[a], #96]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #96]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+25] += m[25] * mu\n\t"
        "ldr	r7, [%[m], #100]\n\t"
        "ldr	r9, [%[a], #100]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #100]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+26] += m[26] * mu\n\t"
        "ldr	r7, [%[m], #104]\n\t"
        "ldr	r9, [%[a], #104]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #104]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+27] += m[27] * mu\n\t"
        "ldr	r7, [%[m], #108]\n\t"
        "ldr	r9, [%[a], #108]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #108]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+28] += m[28] * mu\n\t"
        "ldr	r7, [%[m], #112]\n\t"
        "ldr	r9, [%[a], #112]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #112]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+29] += m[29] * mu\n\t"
        "ldr	r7, [%[m], #116]\n\t"
        "ldr	r9, [%[a], #116]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #116]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+30] += m[30] * mu\n\t"
        "ldr	r7, [%[m], #120]\n\t"
        "ldr	r9, [%[a], #120]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #120]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+31] += m[31] * mu\n\t"
        "ldr	r7, [%[m], #124]\n\t"
        "ldr	r9, [%[a], #124]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #124]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+32] += m[32] * mu\n\t"
        "ldr	r7, [%[m], #128]\n\t"
        "ldr	r9, [%[a], #128]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #128]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+33] += m[33] * mu\n\t"
        "ldr	r7, [%[m], #132]\n\t"
        "ldr	r9, [%[a], #132]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #132]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+34] += m[34] * mu\n\t"
        "ldr	r7, [%[m], #136]\n\t"
        "ldr	r9, [%[a], #136]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #136]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+35] += m[35] * mu\n\t"
        "ldr	r7, [%[m], #140]\n\t"
        "ldr	r9, [%[a], #140]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #140]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+36] += m[36] * mu\n\t"
        "ldr	r7, [%[m], #144]\n\t"
        "ldr	r9, [%[a], #144]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #144]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+37] += m[37] * mu\n\t"
        "ldr	r7, [%[m], #148]\n\t"
        "ldr	r9, [%[a], #148]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #148]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+38] += m[38] * mu\n\t"
        "ldr	r7, [%[m], #152]\n\t"
        "ldr	r9, [%[a], #152]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #152]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+39] += m[39] * mu\n\t"
        "ldr	r7, [%[m], #156]\n\t"
        "ldr	r9, [%[a], #156]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #156]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+40] += m[40] * mu\n\t"
        "ldr	r7, [%[m], #160]\n\t"
        "ldr	r9, [%[a], #160]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #160]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+41] += m[41] * mu\n\t"
        "ldr	r7, [%[m], #164]\n\t"
        "ldr	r9, [%[a], #164]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #164]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+42] += m[42] * mu\n\t"
        "ldr	r7, [%[m], #168]\n\t"
        "ldr	r9, [%[a], #168]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #168]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+43] += m[43] * mu\n\t"
        "ldr	r7, [%[m], #172]\n\t"
        "ldr	r9, [%[a], #172]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #172]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+44] += m[44] * mu\n\t"
        "ldr	r7, [%[m], #176]\n\t"
        "ldr	r9, [%[a], #176]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #176]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+45] += m[45] * mu\n\t"
        "ldr	r7, [%[m], #180]\n\t"
        "ldr	r9, [%[a], #180]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #180]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+46] += m[46] * mu\n\t"
        "ldr	r7, [%[m], #184]\n\t"
        "ldr	r9, [%[a], #184]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #184]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+47] += m[47] * mu\n\t"
        "ldr	r7, [%[m], #188]\n\t"
        "ldr   r9, [%[a], #188]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r7, r7, %[ca]\n\t"
        "mov	%[ca], #0\n\t"
        "adc	%[ca], %[ca], %[ca]\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #188]\n\t"
        "ldr	r9, [%[a], #192]\n\t"
        "adcs	r9, r9, r7\n\t"
        "str	r9, [%[a], #192]\n\t"
        "adc	%[ca], %[ca], #0\n\t"
        "# i += 1\n\t"
        "add	%[a], %[a], #4\n\t"
        "add	r12, r12, #4\n\t"
        "cmp	r12, #192\n\t"
        "blt	1b\n\t"
        "str	r10, [%[a], #0]\n\t"
        "str	r14, [%[a], #4]\n\t"
        : [ca] "+r" (ca), [a] "+r" (a)
        : [m] "r" (m), [mp] "r" (mp)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
}

/* Multiply two Montogmery form numbers mod the modulus (prime).
 * (r = a * b mod m)
 *
 * r   Result of multiplication.
 * a   First number to multiply in Montogmery form.
 * b   Second number to multiply in Montogmery form.
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
        const sp_digit* m, sp_digit mp)
{
    sp_3072_mul_48(r, a, b);
    sp_3072_mont_reduce_48(r, m, mp);
}

/* Square the Montgomery form number. (r = a * a mod m)
 *
 * r   Result of squaring.
 * a   Number to square in Montogmery form.
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    sp_3072_sqr_48(r, a);
    sp_3072_mont_reduce_48(r, m, mp);
}

/* Mul a by digit b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision digit.
 */
static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
        sp_digit b)
{
#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r10, #0\n\t"
        "# A[0] * B\n\t"
        "ldr	r8, [%[a]]\n\t"
        "umull	r5, r3, %[b], r8\n\t"
        "mov	r4, #0\n\t"
        "str	r5, [%[r]]\n\t"
        "mov	r5, #0\n\t"
        "mov	r9, #4\n\t"
        "1:\n\t"
        "ldr	r8, [%[a], r9]\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], r9]\n\t"
        "mov	r3, r4\n\t"
        "mov	r4, r5\n\t"
        "mov	r5, #0\n\t"
        "add	r9, r9, #4\n\t"
        "cmp	r9, #192\n\t"
        "blt	1b\n\t"
        "str	r3, [%[r], #192]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
#else
    __asm__ __volatile__ (
        "mov	r10, #0\n\t"
        "# A[0] * B\n\t"
        "ldr	r8, [%[a]]\n\t"
        "umull	r3, r4, %[b], r8\n\t"
        "mov	r5, #0\n\t"
        "str	r3, [%[r]]\n\t"
        "# A[1] * B\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #4]\n\t"
        "# A[2] * B\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #8]\n\t"
        "# A[3] * B\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #12]\n\t"
        "# A[4] * B\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #16]\n\t"
        "# A[5] * B\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #20]\n\t"
        "# A[6] * B\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #24]\n\t"
        "# A[7] * B\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #28]\n\t"
        "# A[8] * B\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #32]\n\t"
        "# A[9] * B\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #36]\n\t"
        "# A[10] * B\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #40]\n\t"
        "# A[11] * B\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #44]\n\t"
        "# A[12] * B\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #48]\n\t"
        "# A[13] * B\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #52]\n\t"
        "# A[14] * B\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #56]\n\t"
        "# A[15] * B\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #60]\n\t"
        "# A[16] * B\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #64]\n\t"
        "# A[17] * B\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #68]\n\t"
        "# A[18] * B\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #72]\n\t"
        "# A[19] * B\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #76]\n\t"
        "# A[20] * B\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #80]\n\t"
        "# A[21] * B\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #84]\n\t"
        "# A[22] * B\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #88]\n\t"
        "# A[23] * B\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #92]\n\t"
        "# A[24] * B\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #96]\n\t"
        "# A[25] * B\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #100]\n\t"
        "# A[26] * B\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #104]\n\t"
        "# A[27] * B\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #108]\n\t"
        "# A[28] * B\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #112]\n\t"
        "# A[29] * B\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #116]\n\t"
        "# A[30] * B\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #120]\n\t"
        "# A[31] * B\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #124]\n\t"
        "# A[32] * B\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #128]\n\t"
        "# A[33] * B\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #132]\n\t"
        "# A[34] * B\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #136]\n\t"
        "# A[35] * B\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #140]\n\t"
        "# A[36] * B\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #144]\n\t"
        "# A[37] * B\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #148]\n\t"
        "# A[38] * B\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #152]\n\t"
        "# A[39] * B\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #156]\n\t"
        "# A[40] * B\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #160]\n\t"
        "# A[41] * B\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #164]\n\t"
        "# A[42] * B\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #168]\n\t"
        "# A[43] * B\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #172]\n\t"
        "# A[44] * B\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "mov	r4, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [%[r], #176]\n\t"
        "# A[45] * B\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "mov	r5, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [%[r], #180]\n\t"
        "# A[46] * B\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "mov	r3, #0\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [%[r], #184]\n\t"
        "# A[47] * B\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "umull	r6, r7, %[b], r8\n\t"
        "adds	r5, r5, r6\n\t"
        "adc	r3, r3, r7\n\t"
        "str	r5, [%[r], #188]\n\t"
        "str	r3, [%[r], #192]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
    );
#endif
}

/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
 *
 * d1   The high order half of the number to divide.
 * d0   The low order half of the number to divide.
 * div  The dividend.
 * returns the result of the division.
 *
 * Note that this is an approximate div. It may give an answer 1 larger.
 */
static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
{
    sp_digit r = 0;

    __asm__ __volatile__ (
        "lsr	r5, %[div], #1\n\t"
        "add	r5, r5, #1\n\t"
        "mov	r6, %[d0]\n\t"
        "mov	r7, %[d1]\n\t"
        "# Do top 32\n\t"
        "subs	r8, r5, r7\n\t"
        "sbc	r8, r8, r8\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "sub	%[r], %[r], r8\n\t"
        "and	r8, r8, r5\n\t"
        "subs	r7, r7, r8\n\t"
        "# Next 30 bits\n\t"
        "mov	r4, #29\n\t"
        "1:\n\t"
        "movs	r6, r6, lsl #1\n\t"
        "adc	r7, r7, r7\n\t"
        "subs	r8, r5, r7\n\t"
        "sbc	r8, r8, r8\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "sub	%[r], %[r], r8\n\t"
        "and	r8, r8, r5\n\t"
        "subs	r7, r7, r8\n\t"
        "subs	r4, r4, #1\n\t"
        "bpl	1b\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "add	%[r], %[r], #1\n\t"
        "umull	r4, r5, %[r], %[div]\n\t"
        "subs	r4, %[d0], r4\n\t"
        "sbc	r5, %[d1], r5\n\t"
        "add	%[r], %[r], r5\n\t"
        "umull	r4, r5, %[r], %[div]\n\t"
        "subs	r4, %[d0], r4\n\t"
        "sbc	r5, %[d1], r5\n\t"
        "add	%[r], %[r], r5\n\t"
        "subs	r8, %[div], r4\n\t"
        "sbc	r8, r8, r8\n\t"
        "sub	%[r], %[r], r8\n\t"
        : [r] "+r" (r)
        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
        : "r4", "r5", "r6", "r7", "r8"
    );
    return r;
}

/* Compare a with b in constant time.
 *
 * a  A single precision integer.
 * b  A single precision integer.
 * return -ve, 0 or +ve if a is less than, equal to or greater than b
 * respectively.
 */
static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
{
    sp_digit r = -1;
    sp_digit one = 1;


#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r7, #0\n\t"
        "mov	r3, #-1\n\t"
        "mov	r6, #188\n\t"
        "1:\n\t"
        "ldr	r4, [%[a], r6]\n\t"
        "ldr	r5, [%[b], r6]\n\t"
        "and	r4, r4, r3\n\t"
        "and	r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "subs	r6, r6, #4\n\t"
        "bcs	1b\n\t"
        "eor	%[r], %[r], r3\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
        : "r3", "r4", "r5", "r6", "r7"
    );
#else
    __asm__ __volatile__ (
        "mov	r7, #0\n\t"
        "mov	r3, #-1\n\t"
        "ldr		r4, [%[a], #188]\n\t"
        "ldr		r5, [%[b], #188]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #184]\n\t"
        "ldr		r5, [%[b], #184]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #180]\n\t"
        "ldr		r5, [%[b], #180]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #176]\n\t"
        "ldr		r5, [%[b], #176]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #172]\n\t"
        "ldr		r5, [%[b], #172]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #168]\n\t"
        "ldr		r5, [%[b], #168]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #164]\n\t"
        "ldr		r5, [%[b], #164]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #160]\n\t"
        "ldr		r5, [%[b], #160]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #156]\n\t"
        "ldr		r5, [%[b], #156]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #152]\n\t"
        "ldr		r5, [%[b], #152]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #148]\n\t"
        "ldr		r5, [%[b], #148]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #144]\n\t"
        "ldr		r5, [%[b], #144]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #140]\n\t"
        "ldr		r5, [%[b], #140]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #136]\n\t"
        "ldr		r5, [%[b], #136]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #132]\n\t"
        "ldr		r5, [%[b], #132]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #128]\n\t"
        "ldr		r5, [%[b], #128]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #124]\n\t"
        "ldr		r5, [%[b], #124]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #120]\n\t"
        "ldr		r5, [%[b], #120]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #116]\n\t"
        "ldr		r5, [%[b], #116]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #112]\n\t"
        "ldr		r5, [%[b], #112]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #108]\n\t"
        "ldr		r5, [%[b], #108]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #104]\n\t"
        "ldr		r5, [%[b], #104]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #100]\n\t"
        "ldr		r5, [%[b], #100]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #96]\n\t"
        "ldr		r5, [%[b], #96]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #92]\n\t"
        "ldr		r5, [%[b], #92]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #88]\n\t"
        "ldr		r5, [%[b], #88]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #84]\n\t"
        "ldr		r5, [%[b], #84]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #80]\n\t"
        "ldr		r5, [%[b], #80]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #76]\n\t"
        "ldr		r5, [%[b], #76]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #72]\n\t"
        "ldr		r5, [%[b], #72]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #68]\n\t"
        "ldr		r5, [%[b], #68]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #64]\n\t"
        "ldr		r5, [%[b], #64]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #60]\n\t"
        "ldr		r5, [%[b], #60]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #56]\n\t"
        "ldr		r5, [%[b], #56]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #52]\n\t"
        "ldr		r5, [%[b], #52]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #48]\n\t"
        "ldr		r5, [%[b], #48]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #44]\n\t"
        "ldr		r5, [%[b], #44]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #40]\n\t"
        "ldr		r5, [%[b], #40]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #36]\n\t"
        "ldr		r5, [%[b], #36]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #32]\n\t"
        "ldr		r5, [%[b], #32]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #28]\n\t"
        "ldr		r5, [%[b], #28]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #24]\n\t"
        "ldr		r5, [%[b], #24]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #20]\n\t"
        "ldr		r5, [%[b], #20]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #16]\n\t"
        "ldr		r5, [%[b], #16]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #12]\n\t"
        "ldr		r5, [%[b], #12]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #8]\n\t"
        "ldr		r5, [%[b], #8]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #4]\n\t"
        "ldr		r5, [%[b], #4]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #0]\n\t"
        "ldr		r5, [%[b], #0]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "eor	%[r], %[r], r3\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
        : "r3", "r4", "r5", "r6", "r7"
    );
#endif

    return r;
}

/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Nmber to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
        sp_digit* r)
{
    sp_digit t1[96], t2[49];
    sp_digit div, r1;
    int i;

    (void)m;


    div = d[47];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
    for (i=47; i>=0; i--) {
        r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);

        sp_3072_mul_d_48(t2, d, r1);
        t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
        t1[48 + i] -= t2[48];
        sp_3072_mask_48(t2, d, t1[48 + i]);
        t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
        sp_3072_mask_48(t2, d, t1[48 + i]);
        t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
    }

    r1 = sp_3072_cmp_48(t1, d) >= 0;
    sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    return sp_3072_div_48(a, m, NULL, r);
}

#ifdef WOLFSSL_SP_SMALL
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit t[16][96];
#else
    sp_digit* t[16];
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        for (i=0; i<16; i++) {
            t[i] = td + i * 96;
        }
#endif
        norm = t[0];

        sp_3072_mont_setup(m, &mp);
        sp_3072_mont_norm_48(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
        if (reduceA != 0) {
            err = sp_3072_mod_48(t[1] + 48, a, m);
            if (err == MP_OKAY) {
                err = sp_3072_mod_48(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
            err = sp_3072_mod_48(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
        sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
        sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
        sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
        sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
        sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
        sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
        sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
        sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
        sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
        sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
        sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
        sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
        sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 4;
        if (c == 32) {
            c = 28;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
        for (; i>=0 || c>=4; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 28;
                n <<= 4;
                c = 28;
            }
            else if (c < 4) {
                y = n >> 28;
                n = e[i--];
                c = 4 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 28) & 0xf;
                n <<= 4;
                c -= 4;
            }

            sp_3072_mont_sqr_48(r, r, m, mp);
            sp_3072_mont_sqr_48(r, r, m, mp);
            sp_3072_mont_sqr_48(r, r, m, mp);
            sp_3072_mont_sqr_48(r, r, m, mp);

            sp_3072_mont_mul_48(r, r, t[y], m, mp);
        }

        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
        sp_3072_mont_reduce_48(r, m, mp);

        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
        sp_3072_cond_sub_48(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#else
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit t[32][96];
#else
    sp_digit* t[32];
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        for (i=0; i<32; i++) {
            t[i] = td + i * 96;
        }
#endif
        norm = t[0];

        sp_3072_mont_setup(m, &mp);
        sp_3072_mont_norm_48(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
        if (reduceA != 0) {
            err = sp_3072_mod_48(t[1] + 48, a, m);
            if (err == MP_OKAY) {
                err = sp_3072_mod_48(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
            err = sp_3072_mod_48(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
        sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
        sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
        sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
        sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
        sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
        sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
        sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
        sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
        sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
        sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
        sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
        sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
        sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
        sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
        sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
        sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
        sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
        sp_3072_mont_sqr_48(t[20], t[10], m, mp);
        sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
        sp_3072_mont_sqr_48(t[22], t[11], m, mp);
        sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
        sp_3072_mont_sqr_48(t[24], t[12], m, mp);
        sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
        sp_3072_mont_sqr_48(t[26], t[13], m, mp);
        sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
        sp_3072_mont_sqr_48(t[28], t[14], m, mp);
        sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
        sp_3072_mont_sqr_48(t[30], t[15], m, mp);
        sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 5;
        if (c == 32) {
            c = 27;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
        for (; i>=0 || c>=5; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 27;
                n <<= 5;
                c = 27;
            }
            else if (c < 5) {
                y = n >> 27;
                n = e[i--];
                c = 5 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 27) & 0x1f;
                n <<= 5;
                c -= 5;
            }

            sp_3072_mont_sqr_48(r, r, m, mp);
            sp_3072_mont_sqr_48(r, r, m, mp);
            sp_3072_mont_sqr_48(r, r, m, mp);
            sp_3072_mont_sqr_48(r, r, m, mp);
            sp_3072_mont_sqr_48(r, r, m, mp);

            sp_3072_mont_mul_48(r, r, t[y], m, mp);
        }

        XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
        sp_3072_mont_reduce_48(r, m, mp);

        mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
        sp_3072_cond_sub_48(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#endif /* WOLFSSL_SP_SMALL */

#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */

#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
/* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 3072 bits, just need to subtract.
 *
 * r  A single precision number.
 * m  A signle precision number.
 */
static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
{
    XMEMSET(r, 0, sizeof(sp_digit) * 96);

    /* r = 2^n mod m */
    sp_3072_sub_in_place_96(r, m);
}

#endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
/* Conditionally subtract b from a using the mask m.
 * m is -1 to subtract and 0 when not copying.
 *
 * r  A single precision number representing condition subtract result.
 * a  A single precision number to subtract from.
 * b  A single precision number to subtract.
 * m  Mask value to apply.
 */
static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
        sp_digit m)
{
    sp_digit c = 0;

#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r9, #0\n\t"
        "mov	r8, #0\n\t"
        "1:\n\t"
        "subs	%[c], r9, %[c]\n\t"
        "ldr	r4, [%[a], r8]\n\t"
        "ldr	r5, [%[b], r8]\n\t"
        "and	r5, r5, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbc	%[c], r9, r9\n\t"
        "str	r4, [%[r], r8]\n\t"
        "add	r8, r8, #4\n\t"
        "cmp	r8, #384\n\t"
        "blt	1b\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
    );
#else
    __asm__ __volatile__ (

        "mov	r9, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r6, [%[a], #4]\n\t"
        "ldr	r5, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "subs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r6, [%[r], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r6, [%[a], #12]\n\t"
        "ldr	r5, [%[b], #8]\n\t"
        "ldr	r7, [%[b], #12]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #8]\n\t"
        "str	r6, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r6, [%[a], #20]\n\t"
        "ldr	r5, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r6, [%[r], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r6, [%[a], #28]\n\t"
        "ldr	r5, [%[b], #24]\n\t"
        "ldr	r7, [%[b], #28]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #24]\n\t"
        "str	r6, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r6, [%[a], #36]\n\t"
        "ldr	r5, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r6, [%[r], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r6, [%[a], #44]\n\t"
        "ldr	r5, [%[b], #40]\n\t"
        "ldr	r7, [%[b], #44]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #40]\n\t"
        "str	r6, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r6, [%[a], #52]\n\t"
        "ldr	r5, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r6, [%[r], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r6, [%[a], #60]\n\t"
        "ldr	r5, [%[b], #56]\n\t"
        "ldr	r7, [%[b], #60]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #56]\n\t"
        "str	r6, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r6, [%[a], #68]\n\t"
        "ldr	r5, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r6, [%[r], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r6, [%[a], #76]\n\t"
        "ldr	r5, [%[b], #72]\n\t"
        "ldr	r7, [%[b], #76]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #72]\n\t"
        "str	r6, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r6, [%[a], #84]\n\t"
        "ldr	r5, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r6, [%[r], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r6, [%[a], #92]\n\t"
        "ldr	r5, [%[b], #88]\n\t"
        "ldr	r7, [%[b], #92]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #88]\n\t"
        "str	r6, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r6, [%[a], #100]\n\t"
        "ldr	r5, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r6, [%[r], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r6, [%[a], #108]\n\t"
        "ldr	r5, [%[b], #104]\n\t"
        "ldr	r7, [%[b], #108]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #104]\n\t"
        "str	r6, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r6, [%[a], #116]\n\t"
        "ldr	r5, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r6, [%[r], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r6, [%[a], #124]\n\t"
        "ldr	r5, [%[b], #120]\n\t"
        "ldr	r7, [%[b], #124]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #120]\n\t"
        "str	r6, [%[r], #124]\n\t"
        "ldr	r4, [%[a], #128]\n\t"
        "ldr	r6, [%[a], #132]\n\t"
        "ldr	r5, [%[b], #128]\n\t"
        "ldr	r7, [%[b], #132]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #128]\n\t"
        "str	r6, [%[r], #132]\n\t"
        "ldr	r4, [%[a], #136]\n\t"
        "ldr	r6, [%[a], #140]\n\t"
        "ldr	r5, [%[b], #136]\n\t"
        "ldr	r7, [%[b], #140]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #136]\n\t"
        "str	r6, [%[r], #140]\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "ldr	r6, [%[a], #148]\n\t"
        "ldr	r5, [%[b], #144]\n\t"
        "ldr	r7, [%[b], #148]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #144]\n\t"
        "str	r6, [%[r], #148]\n\t"
        "ldr	r4, [%[a], #152]\n\t"
        "ldr	r6, [%[a], #156]\n\t"
        "ldr	r5, [%[b], #152]\n\t"
        "ldr	r7, [%[b], #156]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #152]\n\t"
        "str	r6, [%[r], #156]\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "ldr	r6, [%[a], #164]\n\t"
        "ldr	r5, [%[b], #160]\n\t"
        "ldr	r7, [%[b], #164]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #160]\n\t"
        "str	r6, [%[r], #164]\n\t"
        "ldr	r4, [%[a], #168]\n\t"
        "ldr	r6, [%[a], #172]\n\t"
        "ldr	r5, [%[b], #168]\n\t"
        "ldr	r7, [%[b], #172]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #168]\n\t"
        "str	r6, [%[r], #172]\n\t"
        "ldr	r4, [%[a], #176]\n\t"
        "ldr	r6, [%[a], #180]\n\t"
        "ldr	r5, [%[b], #176]\n\t"
        "ldr	r7, [%[b], #180]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #176]\n\t"
        "str	r6, [%[r], #180]\n\t"
        "ldr	r4, [%[a], #184]\n\t"
        "ldr	r6, [%[a], #188]\n\t"
        "ldr	r5, [%[b], #184]\n\t"
        "ldr	r7, [%[b], #188]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #184]\n\t"
        "str	r6, [%[r], #188]\n\t"
        "ldr	r4, [%[a], #192]\n\t"
        "ldr	r6, [%[a], #196]\n\t"
        "ldr	r5, [%[b], #192]\n\t"
        "ldr	r7, [%[b], #196]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #192]\n\t"
        "str	r6, [%[r], #196]\n\t"
        "ldr	r4, [%[a], #200]\n\t"
        "ldr	r6, [%[a], #204]\n\t"
        "ldr	r5, [%[b], #200]\n\t"
        "ldr	r7, [%[b], #204]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #200]\n\t"
        "str	r6, [%[r], #204]\n\t"
        "ldr	r4, [%[a], #208]\n\t"
        "ldr	r6, [%[a], #212]\n\t"
        "ldr	r5, [%[b], #208]\n\t"
        "ldr	r7, [%[b], #212]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #208]\n\t"
        "str	r6, [%[r], #212]\n\t"
        "ldr	r4, [%[a], #216]\n\t"
        "ldr	r6, [%[a], #220]\n\t"
        "ldr	r5, [%[b], #216]\n\t"
        "ldr	r7, [%[b], #220]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #216]\n\t"
        "str	r6, [%[r], #220]\n\t"
        "ldr	r4, [%[a], #224]\n\t"
        "ldr	r6, [%[a], #228]\n\t"
        "ldr	r5, [%[b], #224]\n\t"
        "ldr	r7, [%[b], #228]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #224]\n\t"
        "str	r6, [%[r], #228]\n\t"
        "ldr	r4, [%[a], #232]\n\t"
        "ldr	r6, [%[a], #236]\n\t"
        "ldr	r5, [%[b], #232]\n\t"
        "ldr	r7, [%[b], #236]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #232]\n\t"
        "str	r6, [%[r], #236]\n\t"
        "ldr	r4, [%[a], #240]\n\t"
        "ldr	r6, [%[a], #244]\n\t"
        "ldr	r5, [%[b], #240]\n\t"
        "ldr	r7, [%[b], #244]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #240]\n\t"
        "str	r6, [%[r], #244]\n\t"
        "ldr	r4, [%[a], #248]\n\t"
        "ldr	r6, [%[a], #252]\n\t"
        "ldr	r5, [%[b], #248]\n\t"
        "ldr	r7, [%[b], #252]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #248]\n\t"
        "str	r6, [%[r], #252]\n\t"
        "ldr	r4, [%[a], #256]\n\t"
        "ldr	r6, [%[a], #260]\n\t"
        "ldr	r5, [%[b], #256]\n\t"
        "ldr	r7, [%[b], #260]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #256]\n\t"
        "str	r6, [%[r], #260]\n\t"
        "ldr	r4, [%[a], #264]\n\t"
        "ldr	r6, [%[a], #268]\n\t"
        "ldr	r5, [%[b], #264]\n\t"
        "ldr	r7, [%[b], #268]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #264]\n\t"
        "str	r6, [%[r], #268]\n\t"
        "ldr	r4, [%[a], #272]\n\t"
        "ldr	r6, [%[a], #276]\n\t"
        "ldr	r5, [%[b], #272]\n\t"
        "ldr	r7, [%[b], #276]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #272]\n\t"
        "str	r6, [%[r], #276]\n\t"
        "ldr	r4, [%[a], #280]\n\t"
        "ldr	r6, [%[a], #284]\n\t"
        "ldr	r5, [%[b], #280]\n\t"
        "ldr	r7, [%[b], #284]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #280]\n\t"
        "str	r6, [%[r], #284]\n\t"
        "ldr	r4, [%[a], #288]\n\t"
        "ldr	r6, [%[a], #292]\n\t"
        "ldr	r5, [%[b], #288]\n\t"
        "ldr	r7, [%[b], #292]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #288]\n\t"
        "str	r6, [%[r], #292]\n\t"
        "ldr	r4, [%[a], #296]\n\t"
        "ldr	r6, [%[a], #300]\n\t"
        "ldr	r5, [%[b], #296]\n\t"
        "ldr	r7, [%[b], #300]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #296]\n\t"
        "str	r6, [%[r], #300]\n\t"
        "ldr	r4, [%[a], #304]\n\t"
        "ldr	r6, [%[a], #308]\n\t"
        "ldr	r5, [%[b], #304]\n\t"
        "ldr	r7, [%[b], #308]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #304]\n\t"
        "str	r6, [%[r], #308]\n\t"
        "ldr	r4, [%[a], #312]\n\t"
        "ldr	r6, [%[a], #316]\n\t"
        "ldr	r5, [%[b], #312]\n\t"
        "ldr	r7, [%[b], #316]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #312]\n\t"
        "str	r6, [%[r], #316]\n\t"
        "ldr	r4, [%[a], #320]\n\t"
        "ldr	r6, [%[a], #324]\n\t"
        "ldr	r5, [%[b], #320]\n\t"
        "ldr	r7, [%[b], #324]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #320]\n\t"
        "str	r6, [%[r], #324]\n\t"
        "ldr	r4, [%[a], #328]\n\t"
        "ldr	r6, [%[a], #332]\n\t"
        "ldr	r5, [%[b], #328]\n\t"
        "ldr	r7, [%[b], #332]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #328]\n\t"
        "str	r6, [%[r], #332]\n\t"
        "ldr	r4, [%[a], #336]\n\t"
        "ldr	r6, [%[a], #340]\n\t"
        "ldr	r5, [%[b], #336]\n\t"
        "ldr	r7, [%[b], #340]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #336]\n\t"
        "str	r6, [%[r], #340]\n\t"
        "ldr	r4, [%[a], #344]\n\t"
        "ldr	r6, [%[a], #348]\n\t"
        "ldr	r5, [%[b], #344]\n\t"
        "ldr	r7, [%[b], #348]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #344]\n\t"
        "str	r6, [%[r], #348]\n\t"
        "ldr	r4, [%[a], #352]\n\t"
        "ldr	r6, [%[a], #356]\n\t"
        "ldr	r5, [%[b], #352]\n\t"
        "ldr	r7, [%[b], #356]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #352]\n\t"
        "str	r6, [%[r], #356]\n\t"
        "ldr	r4, [%[a], #360]\n\t"
        "ldr	r6, [%[a], #364]\n\t"
        "ldr	r5, [%[b], #360]\n\t"
        "ldr	r7, [%[b], #364]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #360]\n\t"
        "str	r6, [%[r], #364]\n\t"
        "ldr	r4, [%[a], #368]\n\t"
        "ldr	r6, [%[a], #372]\n\t"
        "ldr	r5, [%[b], #368]\n\t"
        "ldr	r7, [%[b], #372]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #368]\n\t"
        "str	r6, [%[r], #372]\n\t"
        "ldr	r4, [%[a], #376]\n\t"
        "ldr	r6, [%[a], #380]\n\t"
        "ldr	r5, [%[b], #376]\n\t"
        "ldr	r7, [%[b], #380]\n\t"
        "and	r5, r5, %[m]\n\t"
        "and	r7, r7, %[m]\n\t"
        "sbcs	r4, r4, r5\n\t"
        "sbcs	r6, r6, r7\n\t"
        "str	r4, [%[r], #376]\n\t"
        "str	r6, [%[r], #380]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
        : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
    );
#endif /* WOLFSSL_SP_SMALL */

    return c;
}

/* Reduce the number back to 3072 bits using Montgomery reduction.
 *
 * a   A single precision number to reduce in place.
 * m   The single precision number representing the modulus.
 * mp  The digit representing the negative inverse of m mod 2^n.
 */
SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    sp_digit ca = 0;

    __asm__ __volatile__ (
        "# i = 0\n\t"
        "mov	r12, #0\n\t"
        "ldr	r10, [%[a], #0]\n\t"
        "ldr	r14, [%[a], #4]\n\t"
        "\n1:\n\t"
        "# mu = a[i] * mp\n\t"
        "mul	r8, %[mp], r10\n\t"
        "# a[i+0] += m[0] * mu\n\t"
        "ldr	r7, [%[m], #0]\n\t"
        "ldr	r9, [%[a], #0]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r10, r10, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "# a[i+1] += m[1] * mu\n\t"
        "ldr	r7, [%[m], #4]\n\t"
        "ldr	r9, [%[a], #4]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r10, r14, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r10, r10, r5\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+2] += m[2] * mu\n\t"
        "ldr	r7, [%[m], #8]\n\t"
        "ldr	r14, [%[a], #8]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r14, r14, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r14, r14, r4\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+3] += m[3] * mu\n\t"
        "ldr	r7, [%[m], #12]\n\t"
        "ldr	r9, [%[a], #12]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #12]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+4] += m[4] * mu\n\t"
        "ldr	r7, [%[m], #16]\n\t"
        "ldr	r9, [%[a], #16]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #16]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+5] += m[5] * mu\n\t"
        "ldr	r7, [%[m], #20]\n\t"
        "ldr	r9, [%[a], #20]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #20]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+6] += m[6] * mu\n\t"
        "ldr	r7, [%[m], #24]\n\t"
        "ldr	r9, [%[a], #24]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #24]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+7] += m[7] * mu\n\t"
        "ldr	r7, [%[m], #28]\n\t"
        "ldr	r9, [%[a], #28]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #28]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+8] += m[8] * mu\n\t"
        "ldr	r7, [%[m], #32]\n\t"
        "ldr	r9, [%[a], #32]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #32]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+9] += m[9] * mu\n\t"
        "ldr	r7, [%[m], #36]\n\t"
        "ldr	r9, [%[a], #36]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #36]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+10] += m[10] * mu\n\t"
        "ldr	r7, [%[m], #40]\n\t"
        "ldr	r9, [%[a], #40]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #40]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+11] += m[11] * mu\n\t"
        "ldr	r7, [%[m], #44]\n\t"
        "ldr	r9, [%[a], #44]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #44]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+12] += m[12] * mu\n\t"
        "ldr	r7, [%[m], #48]\n\t"
        "ldr	r9, [%[a], #48]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #48]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+13] += m[13] * mu\n\t"
        "ldr	r7, [%[m], #52]\n\t"
        "ldr	r9, [%[a], #52]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #52]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+14] += m[14] * mu\n\t"
        "ldr	r7, [%[m], #56]\n\t"
        "ldr	r9, [%[a], #56]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #56]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+15] += m[15] * mu\n\t"
        "ldr	r7, [%[m], #60]\n\t"
        "ldr	r9, [%[a], #60]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #60]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+16] += m[16] * mu\n\t"
        "ldr	r7, [%[m], #64]\n\t"
        "ldr	r9, [%[a], #64]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #64]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+17] += m[17] * mu\n\t"
        "ldr	r7, [%[m], #68]\n\t"
        "ldr	r9, [%[a], #68]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #68]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+18] += m[18] * mu\n\t"
        "ldr	r7, [%[m], #72]\n\t"
        "ldr	r9, [%[a], #72]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #72]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+19] += m[19] * mu\n\t"
        "ldr	r7, [%[m], #76]\n\t"
        "ldr	r9, [%[a], #76]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #76]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+20] += m[20] * mu\n\t"
        "ldr	r7, [%[m], #80]\n\t"
        "ldr	r9, [%[a], #80]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #80]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+21] += m[21] * mu\n\t"
        "ldr	r7, [%[m], #84]\n\t"
        "ldr	r9, [%[a], #84]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #84]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+22] += m[22] * mu\n\t"
        "ldr	r7, [%[m], #88]\n\t"
        "ldr	r9, [%[a], #88]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #88]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+23] += m[23] * mu\n\t"
        "ldr	r7, [%[m], #92]\n\t"
        "ldr	r9, [%[a], #92]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #92]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+24] += m[24] * mu\n\t"
        "ldr	r7, [%[m], #96]\n\t"
        "ldr	r9, [%[a], #96]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #96]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+25] += m[25] * mu\n\t"
        "ldr	r7, [%[m], #100]\n\t"
        "ldr	r9, [%[a], #100]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #100]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+26] += m[26] * mu\n\t"
        "ldr	r7, [%[m], #104]\n\t"
        "ldr	r9, [%[a], #104]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #104]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+27] += m[27] * mu\n\t"
        "ldr	r7, [%[m], #108]\n\t"
        "ldr	r9, [%[a], #108]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #108]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+28] += m[28] * mu\n\t"
        "ldr	r7, [%[m], #112]\n\t"
        "ldr	r9, [%[a], #112]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #112]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+29] += m[29] * mu\n\t"
        "ldr	r7, [%[m], #116]\n\t"
        "ldr	r9, [%[a], #116]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #116]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+30] += m[30] * mu\n\t"
        "ldr	r7, [%[m], #120]\n\t"
        "ldr	r9, [%[a], #120]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #120]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+31] += m[31] * mu\n\t"
        "ldr	r7, [%[m], #124]\n\t"
        "ldr	r9, [%[a], #124]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #124]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+32] += m[32] * mu\n\t"
        "ldr	r7, [%[m], #128]\n\t"
        "ldr	r9, [%[a], #128]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #128]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+33] += m[33] * mu\n\t"
        "ldr	r7, [%[m], #132]\n\t"
        "ldr	r9, [%[a], #132]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #132]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+34] += m[34] * mu\n\t"
        "ldr	r7, [%[m], #136]\n\t"
        "ldr	r9, [%[a], #136]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #136]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+35] += m[35] * mu\n\t"
        "ldr	r7, [%[m], #140]\n\t"
        "ldr	r9, [%[a], #140]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #140]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+36] += m[36] * mu\n\t"
        "ldr	r7, [%[m], #144]\n\t"
        "ldr	r9, [%[a], #144]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #144]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+37] += m[37] * mu\n\t"
        "ldr	r7, [%[m], #148]\n\t"
        "ldr	r9, [%[a], #148]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #148]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+38] += m[38] * mu\n\t"
        "ldr	r7, [%[m], #152]\n\t"
        "ldr	r9, [%[a], #152]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #152]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+39] += m[39] * mu\n\t"
        "ldr	r7, [%[m], #156]\n\t"
        "ldr	r9, [%[a], #156]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #156]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+40] += m[40] * mu\n\t"
        "ldr	r7, [%[m], #160]\n\t"
        "ldr	r9, [%[a], #160]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #160]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+41] += m[41] * mu\n\t"
        "ldr	r7, [%[m], #164]\n\t"
        "ldr	r9, [%[a], #164]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #164]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+42] += m[42] * mu\n\t"
        "ldr	r7, [%[m], #168]\n\t"
        "ldr	r9, [%[a], #168]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #168]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+43] += m[43] * mu\n\t"
        "ldr	r7, [%[m], #172]\n\t"
        "ldr	r9, [%[a], #172]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #172]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+44] += m[44] * mu\n\t"
        "ldr	r7, [%[m], #176]\n\t"
        "ldr	r9, [%[a], #176]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #176]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+45] += m[45] * mu\n\t"
        "ldr	r7, [%[m], #180]\n\t"
        "ldr	r9, [%[a], #180]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #180]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+46] += m[46] * mu\n\t"
        "ldr	r7, [%[m], #184]\n\t"
        "ldr	r9, [%[a], #184]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #184]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+47] += m[47] * mu\n\t"
        "ldr	r7, [%[m], #188]\n\t"
        "ldr	r9, [%[a], #188]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #188]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+48] += m[48] * mu\n\t"
        "ldr	r7, [%[m], #192]\n\t"
        "ldr	r9, [%[a], #192]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #192]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+49] += m[49] * mu\n\t"
        "ldr	r7, [%[m], #196]\n\t"
        "ldr	r9, [%[a], #196]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #196]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+50] += m[50] * mu\n\t"
        "ldr	r7, [%[m], #200]\n\t"
        "ldr	r9, [%[a], #200]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #200]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+51] += m[51] * mu\n\t"
        "ldr	r7, [%[m], #204]\n\t"
        "ldr	r9, [%[a], #204]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #204]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+52] += m[52] * mu\n\t"
        "ldr	r7, [%[m], #208]\n\t"
        "ldr	r9, [%[a], #208]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #208]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+53] += m[53] * mu\n\t"
        "ldr	r7, [%[m], #212]\n\t"
        "ldr	r9, [%[a], #212]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #212]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+54] += m[54] * mu\n\t"
        "ldr	r7, [%[m], #216]\n\t"
        "ldr	r9, [%[a], #216]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #216]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+55] += m[55] * mu\n\t"
        "ldr	r7, [%[m], #220]\n\t"
        "ldr	r9, [%[a], #220]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #220]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+56] += m[56] * mu\n\t"
        "ldr	r7, [%[m], #224]\n\t"
        "ldr	r9, [%[a], #224]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #224]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+57] += m[57] * mu\n\t"
        "ldr	r7, [%[m], #228]\n\t"
        "ldr	r9, [%[a], #228]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #228]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+58] += m[58] * mu\n\t"
        "ldr	r7, [%[m], #232]\n\t"
        "ldr	r9, [%[a], #232]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #232]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+59] += m[59] * mu\n\t"
        "ldr	r7, [%[m], #236]\n\t"
        "ldr	r9, [%[a], #236]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #236]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+60] += m[60] * mu\n\t"
        "ldr	r7, [%[m], #240]\n\t"
        "ldr	r9, [%[a], #240]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #240]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+61] += m[61] * mu\n\t"
        "ldr	r7, [%[m], #244]\n\t"
        "ldr	r9, [%[a], #244]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #244]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+62] += m[62] * mu\n\t"
        "ldr	r7, [%[m], #248]\n\t"
        "ldr	r9, [%[a], #248]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #248]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+63] += m[63] * mu\n\t"
        "ldr	r7, [%[m], #252]\n\t"
        "ldr	r9, [%[a], #252]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #252]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+64] += m[64] * mu\n\t"
        "ldr	r7, [%[m], #256]\n\t"
        "ldr	r9, [%[a], #256]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #256]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+65] += m[65] * mu\n\t"
        "ldr	r7, [%[m], #260]\n\t"
        "ldr	r9, [%[a], #260]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #260]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+66] += m[66] * mu\n\t"
        "ldr	r7, [%[m], #264]\n\t"
        "ldr	r9, [%[a], #264]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #264]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+67] += m[67] * mu\n\t"
        "ldr	r7, [%[m], #268]\n\t"
        "ldr	r9, [%[a], #268]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #268]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+68] += m[68] * mu\n\t"
        "ldr	r7, [%[m], #272]\n\t"
        "ldr	r9, [%[a], #272]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #272]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+69] += m[69] * mu\n\t"
        "ldr	r7, [%[m], #276]\n\t"
        "ldr	r9, [%[a], #276]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #276]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+70] += m[70] * mu\n\t"
        "ldr	r7, [%[m], #280]\n\t"
        "ldr	r9, [%[a], #280]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #280]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+71] += m[71] * mu\n\t"
        "ldr	r7, [%[m], #284]\n\t"
        "ldr	r9, [%[a], #284]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #284]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+72] += m[72] * mu\n\t"
        "ldr	r7, [%[m], #288]\n\t"
        "ldr	r9, [%[a], #288]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #288]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+73] += m[73] * mu\n\t"
        "ldr	r7, [%[m], #292]\n\t"
        "ldr	r9, [%[a], #292]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #292]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+74] += m[74] * mu\n\t"
        "ldr	r7, [%[m], #296]\n\t"
        "ldr	r9, [%[a], #296]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #296]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+75] += m[75] * mu\n\t"
        "ldr	r7, [%[m], #300]\n\t"
        "ldr	r9, [%[a], #300]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #300]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+76] += m[76] * mu\n\t"
        "ldr	r7, [%[m], #304]\n\t"
        "ldr	r9, [%[a], #304]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #304]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+77] += m[77] * mu\n\t"
        "ldr	r7, [%[m], #308]\n\t"
        "ldr	r9, [%[a], #308]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #308]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+78] += m[78] * mu\n\t"
        "ldr	r7, [%[m], #312]\n\t"
        "ldr	r9, [%[a], #312]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #312]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+79] += m[79] * mu\n\t"
        "ldr	r7, [%[m], #316]\n\t"
        "ldr	r9, [%[a], #316]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #316]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+80] += m[80] * mu\n\t"
        "ldr	r7, [%[m], #320]\n\t"
        "ldr	r9, [%[a], #320]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #320]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+81] += m[81] * mu\n\t"
        "ldr	r7, [%[m], #324]\n\t"
        "ldr	r9, [%[a], #324]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #324]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+82] += m[82] * mu\n\t"
        "ldr	r7, [%[m], #328]\n\t"
        "ldr	r9, [%[a], #328]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #328]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+83] += m[83] * mu\n\t"
        "ldr	r7, [%[m], #332]\n\t"
        "ldr	r9, [%[a], #332]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #332]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+84] += m[84] * mu\n\t"
        "ldr	r7, [%[m], #336]\n\t"
        "ldr	r9, [%[a], #336]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #336]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+85] += m[85] * mu\n\t"
        "ldr	r7, [%[m], #340]\n\t"
        "ldr	r9, [%[a], #340]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #340]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+86] += m[86] * mu\n\t"
        "ldr	r7, [%[m], #344]\n\t"
        "ldr	r9, [%[a], #344]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #344]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+87] += m[87] * mu\n\t"
        "ldr	r7, [%[m], #348]\n\t"
        "ldr	r9, [%[a], #348]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #348]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+88] += m[88] * mu\n\t"
        "ldr	r7, [%[m], #352]\n\t"
        "ldr	r9, [%[a], #352]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #352]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+89] += m[89] * mu\n\t"
        "ldr	r7, [%[m], #356]\n\t"
        "ldr	r9, [%[a], #356]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #356]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+90] += m[90] * mu\n\t"
        "ldr	r7, [%[m], #360]\n\t"
        "ldr	r9, [%[a], #360]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #360]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+91] += m[91] * mu\n\t"
        "ldr	r7, [%[m], #364]\n\t"
        "ldr	r9, [%[a], #364]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #364]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+92] += m[92] * mu\n\t"
        "ldr	r7, [%[m], #368]\n\t"
        "ldr	r9, [%[a], #368]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #368]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+93] += m[93] * mu\n\t"
        "ldr	r7, [%[m], #372]\n\t"
        "ldr	r9, [%[a], #372]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r4, r7, #0\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #372]\n\t"
        "adc	r4, r4, #0\n\t"
        "# a[i+94] += m[94] * mu\n\t"
        "ldr	r7, [%[m], #376]\n\t"
        "ldr	r9, [%[a], #376]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r9, r9, r6\n\t"
        "adc	r5, r7, #0\n\t"
        "adds	r9, r9, r4\n\t"
        "str	r9, [%[a], #376]\n\t"
        "adc	r5, r5, #0\n\t"
        "# a[i+95] += m[95] * mu\n\t"
        "ldr	r7, [%[m], #380]\n\t"
        "ldr   r9, [%[a], #380]\n\t"
        "umull	r6, r7, r8, r7\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r7, r7, %[ca]\n\t"
        "mov	%[ca], #0\n\t"
        "adc	%[ca], %[ca], %[ca]\n\t"
        "adds	r9, r9, r5\n\t"
        "str	r9, [%[a], #380]\n\t"
        "ldr	r9, [%[a], #384]\n\t"
        "adcs	r9, r9, r7\n\t"
        "str	r9, [%[a], #384]\n\t"
        "adc	%[ca], %[ca], #0\n\t"
        "# i += 1\n\t"
        "add	%[a], %[a], #4\n\t"
        "add	r12, r12, #4\n\t"
        "cmp	r12, #384\n\t"
        "blt	1b\n\t"
        "str	r10, [%[a], #0]\n\t"
        "str	r14, [%[a], #4]\n\t"
        : [ca] "+r" (ca), [a] "+r" (a)
        : [m] "r" (m), [mp] "r" (mp)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
}

/* Multiply two Montogmery form numbers mod the modulus (prime).
 * (r = a * b mod m)
 *
 * r   Result of multiplication.
 * a   First number to multiply in Montogmery form.
 * b   Second number to multiply in Montogmery form.
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
        const sp_digit* m, sp_digit mp)
{
    sp_3072_mul_96(r, a, b);
    sp_3072_mont_reduce_96(r, m, mp);
}

/* Square the Montgomery form number. (r = a * a mod m)
 *
 * r   Result of squaring.
 * a   Number to square in Montogmery form.
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
        sp_digit mp)
{
    sp_3072_sqr_96(r, a);
    sp_3072_mont_reduce_96(r, m, mp);
}

#ifndef WOLFSSL_RSA_PUBLIC_ONLY
/* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
 *
 * d1   The high order half of the number to divide.
 * d0   The low order half of the number to divide.
 * div  The dividend.
 * returns the result of the division.
 *
 * Note that this is an approximate div. It may give an answer 1 larger.
 */
static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div)
{
    sp_digit r = 0;

    __asm__ __volatile__ (
        "lsr	r5, %[div], #1\n\t"
        "add	r5, r5, #1\n\t"
        "mov	r6, %[d0]\n\t"
        "mov	r7, %[d1]\n\t"
        "# Do top 32\n\t"
        "subs	r8, r5, r7\n\t"
        "sbc	r8, r8, r8\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "sub	%[r], %[r], r8\n\t"
        "and	r8, r8, r5\n\t"
        "subs	r7, r7, r8\n\t"
        "# Next 30 bits\n\t"
        "mov	r4, #29\n\t"
        "1:\n\t"
        "movs	r6, r6, lsl #1\n\t"
        "adc	r7, r7, r7\n\t"
        "subs	r8, r5, r7\n\t"
        "sbc	r8, r8, r8\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "sub	%[r], %[r], r8\n\t"
        "and	r8, r8, r5\n\t"
        "subs	r7, r7, r8\n\t"
        "subs	r4, r4, #1\n\t"
        "bpl	1b\n\t"
        "add	%[r], %[r], %[r]\n\t"
        "add	%[r], %[r], #1\n\t"
        "umull	r4, r5, %[r], %[div]\n\t"
        "subs	r4, %[d0], r4\n\t"
        "sbc	r5, %[d1], r5\n\t"
        "add	%[r], %[r], r5\n\t"
        "umull	r4, r5, %[r], %[div]\n\t"
        "subs	r4, %[d0], r4\n\t"
        "sbc	r5, %[d1], r5\n\t"
        "add	%[r], %[r], r5\n\t"
        "subs	r8, %[div], r4\n\t"
        "sbc	r8, r8, r8\n\t"
        "sub	%[r], %[r], r8\n\t"
        : [r] "+r" (r)
        : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
        : "r4", "r5", "r6", "r7", "r8"
    );
    return r;
}

/* AND m into each word of a and store in r.
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * m  Mask to AND against each digit.
 */
static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
    int i;

    for (i=0; i<96; i++) {
        r[i] = a[i] & m;
    }
#else
    int i;

    for (i = 0; i < 96; i += 8) {
        r[i+0] = a[i+0] & m;
        r[i+1] = a[i+1] & m;
        r[i+2] = a[i+2] & m;
        r[i+3] = a[i+3] & m;
        r[i+4] = a[i+4] & m;
        r[i+5] = a[i+5] & m;
        r[i+6] = a[i+6] & m;
        r[i+7] = a[i+7] & m;
    }
#endif
}

/* Compare a with b in constant time.
 *
 * a  A single precision integer.
 * b  A single precision integer.
 * return -ve, 0 or +ve if a is less than, equal to or greater than b
 * respectively.
 */
static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
{
    sp_digit r = -1;
    sp_digit one = 1;


#ifdef WOLFSSL_SP_SMALL
    __asm__ __volatile__ (
        "mov	r7, #0\n\t"
        "mov	r3, #-1\n\t"
        "mov	r6, #380\n\t"
        "1:\n\t"
        "ldr	r4, [%[a], r6]\n\t"
        "ldr	r5, [%[b], r6]\n\t"
        "and	r4, r4, r3\n\t"
        "and	r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "subs	r6, r6, #4\n\t"
        "bcs	1b\n\t"
        "eor	%[r], %[r], r3\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
        : "r3", "r4", "r5", "r6", "r7"
    );
#else
    __asm__ __volatile__ (
        "mov	r7, #0\n\t"
        "mov	r3, #-1\n\t"
        "ldr		r4, [%[a], #380]\n\t"
        "ldr		r5, [%[b], #380]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #376]\n\t"
        "ldr		r5, [%[b], #376]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #372]\n\t"
        "ldr		r5, [%[b], #372]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #368]\n\t"
        "ldr		r5, [%[b], #368]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #364]\n\t"
        "ldr		r5, [%[b], #364]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #360]\n\t"
        "ldr		r5, [%[b], #360]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #356]\n\t"
        "ldr		r5, [%[b], #356]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #352]\n\t"
        "ldr		r5, [%[b], #352]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #348]\n\t"
        "ldr		r5, [%[b], #348]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #344]\n\t"
        "ldr		r5, [%[b], #344]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #340]\n\t"
        "ldr		r5, [%[b], #340]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #336]\n\t"
        "ldr		r5, [%[b], #336]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #332]\n\t"
        "ldr		r5, [%[b], #332]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #328]\n\t"
        "ldr		r5, [%[b], #328]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #324]\n\t"
        "ldr		r5, [%[b], #324]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #320]\n\t"
        "ldr		r5, [%[b], #320]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #316]\n\t"
        "ldr		r5, [%[b], #316]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #312]\n\t"
        "ldr		r5, [%[b], #312]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #308]\n\t"
        "ldr		r5, [%[b], #308]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #304]\n\t"
        "ldr		r5, [%[b], #304]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #300]\n\t"
        "ldr		r5, [%[b], #300]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #296]\n\t"
        "ldr		r5, [%[b], #296]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #292]\n\t"
        "ldr		r5, [%[b], #292]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #288]\n\t"
        "ldr		r5, [%[b], #288]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #284]\n\t"
        "ldr		r5, [%[b], #284]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #280]\n\t"
        "ldr		r5, [%[b], #280]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #276]\n\t"
        "ldr		r5, [%[b], #276]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #272]\n\t"
        "ldr		r5, [%[b], #272]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #268]\n\t"
        "ldr		r5, [%[b], #268]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #264]\n\t"
        "ldr		r5, [%[b], #264]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #260]\n\t"
        "ldr		r5, [%[b], #260]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #256]\n\t"
        "ldr		r5, [%[b], #256]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #252]\n\t"
        "ldr		r5, [%[b], #252]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #248]\n\t"
        "ldr		r5, [%[b], #248]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #244]\n\t"
        "ldr		r5, [%[b], #244]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #240]\n\t"
        "ldr		r5, [%[b], #240]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #236]\n\t"
        "ldr		r5, [%[b], #236]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #232]\n\t"
        "ldr		r5, [%[b], #232]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #228]\n\t"
        "ldr		r5, [%[b], #228]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #224]\n\t"
        "ldr		r5, [%[b], #224]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #220]\n\t"
        "ldr		r5, [%[b], #220]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #216]\n\t"
        "ldr		r5, [%[b], #216]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #212]\n\t"
        "ldr		r5, [%[b], #212]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #208]\n\t"
        "ldr		r5, [%[b], #208]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #204]\n\t"
        "ldr		r5, [%[b], #204]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #200]\n\t"
        "ldr		r5, [%[b], #200]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #196]\n\t"
        "ldr		r5, [%[b], #196]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #192]\n\t"
        "ldr		r5, [%[b], #192]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #188]\n\t"
        "ldr		r5, [%[b], #188]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #184]\n\t"
        "ldr		r5, [%[b], #184]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #180]\n\t"
        "ldr		r5, [%[b], #180]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #176]\n\t"
        "ldr		r5, [%[b], #176]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #172]\n\t"
        "ldr		r5, [%[b], #172]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #168]\n\t"
        "ldr		r5, [%[b], #168]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #164]\n\t"
        "ldr		r5, [%[b], #164]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #160]\n\t"
        "ldr		r5, [%[b], #160]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #156]\n\t"
        "ldr		r5, [%[b], #156]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #152]\n\t"
        "ldr		r5, [%[b], #152]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #148]\n\t"
        "ldr		r5, [%[b], #148]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #144]\n\t"
        "ldr		r5, [%[b], #144]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #140]\n\t"
        "ldr		r5, [%[b], #140]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #136]\n\t"
        "ldr		r5, [%[b], #136]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #132]\n\t"
        "ldr		r5, [%[b], #132]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #128]\n\t"
        "ldr		r5, [%[b], #128]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #124]\n\t"
        "ldr		r5, [%[b], #124]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #120]\n\t"
        "ldr		r5, [%[b], #120]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #116]\n\t"
        "ldr		r5, [%[b], #116]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #112]\n\t"
        "ldr		r5, [%[b], #112]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #108]\n\t"
        "ldr		r5, [%[b], #108]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #104]\n\t"
        "ldr		r5, [%[b], #104]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #100]\n\t"
        "ldr		r5, [%[b], #100]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #96]\n\t"
        "ldr		r5, [%[b], #96]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #92]\n\t"
        "ldr		r5, [%[b], #92]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #88]\n\t"
        "ldr		r5, [%[b], #88]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #84]\n\t"
        "ldr		r5, [%[b], #84]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #80]\n\t"
        "ldr		r5, [%[b], #80]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #76]\n\t"
        "ldr		r5, [%[b], #76]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #72]\n\t"
        "ldr		r5, [%[b], #72]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #68]\n\t"
        "ldr		r5, [%[b], #68]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #64]\n\t"
        "ldr		r5, [%[b], #64]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #60]\n\t"
        "ldr		r5, [%[b], #60]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #56]\n\t"
        "ldr		r5, [%[b], #56]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #52]\n\t"
        "ldr		r5, [%[b], #52]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #48]\n\t"
        "ldr		r5, [%[b], #48]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #44]\n\t"
        "ldr		r5, [%[b], #44]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #40]\n\t"
        "ldr		r5, [%[b], #40]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #36]\n\t"
        "ldr		r5, [%[b], #36]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #32]\n\t"
        "ldr		r5, [%[b], #32]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #28]\n\t"
        "ldr		r5, [%[b], #28]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #24]\n\t"
        "ldr		r5, [%[b], #24]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #20]\n\t"
        "ldr		r5, [%[b], #20]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #16]\n\t"
        "ldr		r5, [%[b], #16]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #12]\n\t"
        "ldr		r5, [%[b], #12]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #8]\n\t"
        "ldr		r5, [%[b], #8]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #4]\n\t"
        "ldr		r5, [%[b], #4]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "ldr		r4, [%[a], #0]\n\t"
        "ldr		r5, [%[b], #0]\n\t"
        "and		r4, r4, r3\n\t"
        "and		r5, r5, r3\n\t"
        "subs	r4, r4, r5\n\t"
        "it	hi\n\t"
        "movhi	%[r], %[one]\n\t"
        "it	lo\n\t"
        "movlo	%[r], r3\n\t"
        "it	ne\n\t"
        "movne	r3, r7\n\t"
        "eor	%[r], %[r], r3\n\t"
        : [r] "+r" (r)
        : [a] "r" (a), [b] "r" (b), [one] "r" (one)
        : "r3", "r4", "r5", "r6", "r7"
    );
#endif

    return r;
}

/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Nmber to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
        sp_digit* r)
{
    sp_digit t1[192], t2[97];
    sp_digit div, r1;
    int i;

    (void)m;


    div = d[95];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
    for (i=95; i>=0; i--) {
        r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);

        sp_3072_mul_d_96(t2, d, r1);
        t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
        t1[96 + i] -= t2[96];
        sp_3072_mask_96(t2, d, t1[96 + i]);
        t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
        sp_3072_mask_96(t2, d, t1[96 + i]);
        t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
    }

    r1 = sp_3072_cmp_96(t1, d) >= 0;
    sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    return sp_3072_div_96(a, m, NULL, r);
}

#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
/* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
 * a  Nmber to be divided.
 * d  Number to divide with.
 * m  Multiplier result.
 * r  Remainder from the division.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
        sp_digit* r)
{
    sp_digit t1[192], t2[97];
    sp_digit div, r1;
    int i;

    (void)m;


    div = d[95];
    XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
    for (i=95; i>=0; i--) {
        r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);

        sp_3072_mul_d_96(t2, d, r1);
        t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
        t1[96 + i] -= t2[96];
        if (t1[96 + i] != 0) {
            t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
            if (t1[96 + i] != 0)
                t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
        }
    }

    r1 = sp_3072_cmp_96(t1, d) >= 0;
    sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);

    return MP_OKAY;
}

/* Reduce a modulo m into r. (r = a mod m)
 *
 * r  A single precision number that is the reduced result.
 * a  A single precision number that is to be reduced.
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
{
    return sp_3072_div_96_cond(a, m, NULL, r);
}

#if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
                                                     defined(WOLFSSL_HAVE_SP_DH)
#ifdef WOLFSSL_SP_SMALL
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit t[16][192];
#else
    sp_digit* t[16];
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        for (i=0; i<16; i++) {
            t[i] = td + i * 192;
        }
#endif
        norm = t[0];

        sp_3072_mont_setup(m, &mp);
        sp_3072_mont_norm_96(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
        if (reduceA != 0) {
            err = sp_3072_mod_96(t[1] + 96, a, m);
            if (err == MP_OKAY) {
                err = sp_3072_mod_96(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
            err = sp_3072_mod_96(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
        sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
        sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
        sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
        sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
        sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
        sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
        sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
        sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
        sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
        sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
        sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
        sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
        sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 4;
        if (c == 32) {
            c = 28;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
        for (; i>=0 || c>=4; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 28;
                n <<= 4;
                c = 28;
            }
            else if (c < 4) {
                y = n >> 28;
                n = e[i--];
                c = 4 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 28) & 0xf;
                n <<= 4;
                c -= 4;
            }

            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);

            sp_3072_mont_mul_96(r, r, t[y], m, mp);
        }

        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
        sp_3072_mont_reduce_96(r, m, mp);

        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
        sp_3072_cond_sub_96(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#else
/* Modular exponentiate a to the e mod m. (r = a^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * a     A single precision number being exponentiated.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
        int bits, const sp_digit* m, int reduceA)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit t[32][192];
#else
    sp_digit* t[32];
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit mp = 1;
    sp_digit n;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        for (i=0; i<32; i++) {
            t[i] = td + i * 192;
        }
#endif
        norm = t[0];

        sp_3072_mont_setup(m, &mp);
        sp_3072_mont_norm_96(norm, m);

        XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
        if (reduceA != 0) {
            err = sp_3072_mod_96(t[1] + 96, a, m);
            if (err == MP_OKAY) {
                err = sp_3072_mod_96(t[1], t[1], m);
            }
        }
        else {
            XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
            err = sp_3072_mod_96(t[1], t[1], m);
        }
    }

    if (err == MP_OKAY) {
        sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
        sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
        sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
        sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
        sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
        sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
        sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
        sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
        sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
        sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
        sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
        sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
        sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
        sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
        sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
        sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
        sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
        sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
        sp_3072_mont_sqr_96(t[20], t[10], m, mp);
        sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
        sp_3072_mont_sqr_96(t[22], t[11], m, mp);
        sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
        sp_3072_mont_sqr_96(t[24], t[12], m, mp);
        sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
        sp_3072_mont_sqr_96(t[26], t[13], m, mp);
        sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
        sp_3072_mont_sqr_96(t[28], t[14], m, mp);
        sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
        sp_3072_mont_sqr_96(t[30], t[15], m, mp);
        sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 5;
        if (c == 32) {
            c = 27;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
        for (; i>=0 || c>=5; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 27;
                n <<= 5;
                c = 27;
            }
            else if (c < 5) {
                y = n >> 27;
                n = e[i--];
                c = 5 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 27) & 0x1f;
                n <<= 5;
                c -= 5;
            }

            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);

            sp_3072_mont_mul_96(r, r, t[y], m, mp);
        }

        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
        sp_3072_mont_reduce_96(r, m, mp);

        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
        sp_3072_cond_sub_96(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#endif /* WOLFSSL_SP_SMALL */
#endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */

#ifdef WOLFSSL_HAVE_SP_RSA
/* RSA public key operation.
 *
 * in      Array of bytes representing the number to exponentiate, base.
 * inLen   Number of bytes in base.
 * em      Public exponent.
 * mm      Modulus.
 * out     Buffer to hold big-endian bytes of exponentiation result.
 *         Must be at least 384 bytes long.
 * outLen  Number of bytes in result.
 * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
 * an array is too long and MEMORY_E when dynamic memory allocation fails.
 */
int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
    byte* out, word32* outLen)
{
#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
    sp_digit ad[192], md[96], rd[192];
#else
    sp_digit* d = NULL;
#endif
    sp_digit* a;
    sp_digit *ah;
    sp_digit* m;
    sp_digit* r;
    sp_digit e[1];
    int err = MP_OKAY;

    if (*outLen < 384)
        err = MP_TO_E;
    if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 ||
                                                     mp_count_bits(mm) != 3072))
        err = MP_READ_E;

#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
    if (err == MP_OKAY) {
        d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
                                                              DYNAMIC_TYPE_RSA);
        if (d == NULL)
            err = MEMORY_E;
    }

    if (err == MP_OKAY) {
        a = d;
        r = a + 96 * 2;
        m = r + 96 * 2;
        ah = a + 96;
    }
#else
    a = ad;
    m = md;
    r = rd;
    ah = a + 96;
#endif

    if (err == MP_OKAY) {
        sp_3072_from_bin(ah, 96, in, inLen);
#if DIGIT_BIT >= 32
        e[0] = em->dp[0];
#else
        e[0] = em->dp[0];
        if (em->used > 1)
            e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
#endif
        if (e[0] == 0)
            err = MP_EXPTMOD_E;
    }
    if (err == MP_OKAY) {
        sp_3072_from_mp(m, 96, mm);

        if (e[0] == 0x3) {
            if (err == MP_OKAY) {
                sp_3072_sqr_96(r, ah);
                err = sp_3072_mod_96_cond(r, r, m);
            }
            if (err == MP_OKAY) {
                sp_3072_mul_96(r, ah, r);
                err = sp_3072_mod_96_cond(r, r, m);
            }
        }
        else {
            int i;
            sp_digit mp;

            sp_3072_mont_setup(m, &mp);

            /* Convert to Montgomery form. */
            XMEMSET(a, 0, sizeof(sp_digit) * 96);
            err = sp_3072_mod_96_cond(a, a, m);

            if (err == MP_OKAY) {
                for (i=31; i>=0; i--)
                    if (e[0] >> i)
                        break;

                XMEMCPY(r, a, sizeof(sp_digit) * 96);
                for (i--; i>=0; i--) {
                    sp_3072_mont_sqr_96(r, r, m, mp);
                    if (((e[0] >> i) & 1) == 1)
                        sp_3072_mont_mul_96(r, r, a, m, mp);
                }
                XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
                sp_3072_mont_reduce_96(r, m, mp);

                for (i = 95; i > 0; i--) {
                    if (r[i] != m[i])
                        break;
                }
                if (r[i] >= m[i])
                    sp_3072_sub_in_place_96(r, m);
            }
        }
    }

    if (err == MP_OKAY) {
        sp_3072_to_bin(r, out);
        *outLen = 384;
    }

#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
    if (d != NULL)
        XFREE(d, NULL, DYNAMIC_TYPE_RSA);
#endif

    return err;
}

#ifndef WOLFSSL_RSA_PUBLIC_ONLY
/* RSA private key operation.
 *
 * in      Array of bytes representing the number to exponentiate, base.
 * inLen   Number of bytes in base.
 * dm      Private exponent.
 * pm      First prime.
 * qm      Second prime.
 * dpm     First prime's CRT exponent.
 * dqm     Second prime's CRT exponent.
 * qim     Inverse of second prime mod p.
 * mm      Modulus.
 * out     Buffer to hold big-endian bytes of exponentiation result.
 *         Must be at least 384 bytes long.
 * outLen  Number of bytes in result.
 * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
 * an array is too long and MEMORY_E when dynamic memory allocation fails.
 */
int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
    mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
    byte* out, word32* outLen)
{
#if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
    sp_digit ad[96 * 2];
    sp_digit pd[48], qd[48], dpd[48];
    sp_digit tmpad[96], tmpbd[96];
#else
    sp_digit* t = NULL;
#endif
    sp_digit* a;
    sp_digit* p;
    sp_digit* q;
    sp_digit* dp;
    sp_digit* dq;
    sp_digit* qi;
    sp_digit* tmp;
    sp_digit* tmpa;
    sp_digit* tmpb;
    sp_digit* r;
    sp_digit c;
    int err = MP_OKAY;

    (void)dm;
    (void)mm;

    if (*outLen < 384)
        err = MP_TO_E;
    if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
        err = MP_READ_E;

#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
    if (err == MP_OKAY) {
        t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
                                                              DYNAMIC_TYPE_RSA);
        if (t == NULL)
            err = MEMORY_E;
    }
    if (err == MP_OKAY) {
        a = t;
        p = a + 96 * 2;
        q = p + 48;
        qi = dq = dp = q + 48;
        tmpa = qi + 48;
        tmpb = tmpa + 96;

        tmp = t;
        r = tmp + 96;
    }
#else
    r = a = ad;
    p = pd;
    q = qd;
    qi = dq = dp = dpd;
    tmpa = tmpad;
    tmpb = tmpbd;
    tmp = a + 96;
#endif

    if (err == MP_OKAY) {
        sp_3072_from_bin(a, 96, in, inLen);
        sp_3072_from_mp(p, 48, pm);
        sp_3072_from_mp(q, 48, qm);
        sp_3072_from_mp(dp, 48, dpm);

        err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
    }
    if (err == MP_OKAY) {
        sp_3072_from_mp(dq, 48, dqm);
        err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
    }

    if (err == MP_OKAY) {
        c = sp_3072_sub_in_place_48(tmpa, tmpb);
        sp_3072_mask_48(tmp, p, c);
        sp_3072_add_48(tmpa, tmpa, tmp);

        sp_3072_from_mp(qi, 48, qim);
        sp_3072_mul_48(tmpa, tmpa, qi);
        err = sp_3072_mod_48(tmpa, tmpa, p);
    }

    if (err == MP_OKAY) {
        sp_3072_mul_48(tmpa, q, tmpa);
        XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
        sp_3072_add_96(r, tmpb, tmpa);

        sp_3072_to_bin(r, out);
        *outLen = 384;
    }

#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
    if (t != NULL) {
        XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
        XFREE(t, NULL, DYNAMIC_TYPE_RSA);
    }
#else
    XMEMSET(tmpad, 0, sizeof(tmpad));
    XMEMSET(tmpbd, 0, sizeof(tmpbd));
    XMEMSET(pd, 0, sizeof(pd));
    XMEMSET(qd, 0, sizeof(qd));
    XMEMSET(dpd, 0, sizeof(dpd));
#endif

    return err;
}
#endif
#endif /* WOLFSSL_HAVE_SP_RSA */
#if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
                                              !defined(WOLFSSL_RSA_PUBLIC_ONLY))
/* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
 * r  A multi-precision integer.
 */
static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
{
    int err;

    err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
    if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
#if DIGIT_BIT == 32
        XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
        r->used = 96;
        mp_clamp(r);
#elif DIGIT_BIT < 32
        int i, j = 0, s = 0;

        r->dp[0] = 0;
        for (i = 0; i < 96; i++) {
            r->dp[j] |= a[i] << s;
            r->dp[j] &= (1L << DIGIT_BIT) - 1;
            s = DIGIT_BIT - s;
            r->dp[++j] = a[i] >> s;
            while (s + DIGIT_BIT <= 32) {
                s += DIGIT_BIT;
                r->dp[j++] &= (1L << DIGIT_BIT) - 1;
                if (s == SP_WORD_SIZE) {
                    r->dp[j] = 0;
                }
                else {
                    r->dp[j] = a[i] >> s;
                }
            }
            s = 32 - s;
        }
        r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
        mp_clamp(r);
#else
        int i, j = 0, s = 0;

        r->dp[0] = 0;
        for (i = 0; i < 96; i++) {
            r->dp[j] |= ((mp_digit)a[i]) << s;
            if (s + 32 >= DIGIT_BIT) {
    #if DIGIT_BIT != 32 && DIGIT_BIT != 64
                r->dp[j] &= (1L << DIGIT_BIT) - 1;
    #endif
                s = DIGIT_BIT - s;
                r->dp[++j] = a[i] >> s;
                s = 32 - s;
            }
            else {
                s += 32;
            }
        }
        r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
        mp_clamp(r);
#endif
    }

    return err;
}

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base  Base. MP integer.
 * exp   Exponent. MP integer.
 * mod   Modulus. MP integer.
 * res   Result. MP integer.
 * returs 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
{
    int err = MP_OKAY;
    sp_digit b[192], e[96], m[96];
    sp_digit* r = b;
    int expBits = mp_count_bits(exp);

    if (mp_count_bits(base) > 3072) {
        err = MP_READ_E;
    }

    if (err == MP_OKAY) {
        if (expBits > 3072) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        if (mp_count_bits(mod) != 3072) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        sp_3072_from_mp(b, 96, base);
        sp_3072_from_mp(e, 96, exp);
        sp_3072_from_mp(m, 96, mod);

        err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
    }

    if (err == MP_OKAY) {
        err = sp_3072_to_mp(r, res);
    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}

#ifdef WOLFSSL_HAVE_SP_DH

#ifdef HAVE_FFDHE_3072
static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
{
    __asm__ __volatile__ (
        "mov	r6, #31\n\t"
        "sub	r6, r6, %[n]\n\t"
        "ldr	r3, [%[a], #380]\n\t"
        "lsr	r4, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r4, r4, r6\n\t"
        "ldr	r2, [%[a], #376]\n\t"
        "str	r4, [%[r], #384]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #372]\n\t"
        "str	r3, [%[r], #380]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #368]\n\t"
        "str	r2, [%[r], #376]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #364]\n\t"
        "str	r4, [%[r], #372]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #360]\n\t"
        "str	r3, [%[r], #368]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #356]\n\t"
        "str	r2, [%[r], #364]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #352]\n\t"
        "str	r4, [%[r], #360]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #348]\n\t"
        "str	r3, [%[r], #356]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #344]\n\t"
        "str	r2, [%[r], #352]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #340]\n\t"
        "str	r4, [%[r], #348]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #336]\n\t"
        "str	r3, [%[r], #344]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #332]\n\t"
        "str	r2, [%[r], #340]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #328]\n\t"
        "str	r4, [%[r], #336]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #324]\n\t"
        "str	r3, [%[r], #332]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #320]\n\t"
        "str	r2, [%[r], #328]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #316]\n\t"
        "str	r4, [%[r], #324]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #312]\n\t"
        "str	r3, [%[r], #320]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #308]\n\t"
        "str	r2, [%[r], #316]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #304]\n\t"
        "str	r4, [%[r], #312]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #300]\n\t"
        "str	r3, [%[r], #308]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #296]\n\t"
        "str	r2, [%[r], #304]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #292]\n\t"
        "str	r4, [%[r], #300]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #288]\n\t"
        "str	r3, [%[r], #296]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #284]\n\t"
        "str	r2, [%[r], #292]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #280]\n\t"
        "str	r4, [%[r], #288]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #276]\n\t"
        "str	r3, [%[r], #284]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #272]\n\t"
        "str	r2, [%[r], #280]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #268]\n\t"
        "str	r4, [%[r], #276]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #264]\n\t"
        "str	r3, [%[r], #272]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #260]\n\t"
        "str	r2, [%[r], #268]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #256]\n\t"
        "str	r4, [%[r], #264]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #252]\n\t"
        "str	r3, [%[r], #260]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #248]\n\t"
        "str	r2, [%[r], #256]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #244]\n\t"
        "str	r4, [%[r], #252]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #240]\n\t"
        "str	r3, [%[r], #248]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #236]\n\t"
        "str	r2, [%[r], #244]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #232]\n\t"
        "str	r4, [%[r], #240]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #228]\n\t"
        "str	r3, [%[r], #236]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #224]\n\t"
        "str	r2, [%[r], #232]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #220]\n\t"
        "str	r4, [%[r], #228]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #216]\n\t"
        "str	r3, [%[r], #224]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #212]\n\t"
        "str	r2, [%[r], #220]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #208]\n\t"
        "str	r4, [%[r], #216]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #204]\n\t"
        "str	r3, [%[r], #212]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #200]\n\t"
        "str	r2, [%[r], #208]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #196]\n\t"
        "str	r4, [%[r], #204]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #192]\n\t"
        "str	r3, [%[r], #200]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #188]\n\t"
        "str	r2, [%[r], #196]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #184]\n\t"
        "str	r4, [%[r], #192]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #180]\n\t"
        "str	r3, [%[r], #188]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #176]\n\t"
        "str	r2, [%[r], #184]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #172]\n\t"
        "str	r4, [%[r], #180]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #168]\n\t"
        "str	r3, [%[r], #176]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #164]\n\t"
        "str	r2, [%[r], #172]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #160]\n\t"
        "str	r4, [%[r], #168]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #156]\n\t"
        "str	r3, [%[r], #164]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #152]\n\t"
        "str	r2, [%[r], #160]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #148]\n\t"
        "str	r4, [%[r], #156]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "str	r3, [%[r], #152]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #140]\n\t"
        "str	r2, [%[r], #148]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #136]\n\t"
        "str	r4, [%[r], #144]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #132]\n\t"
        "str	r3, [%[r], #140]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #128]\n\t"
        "str	r2, [%[r], #136]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #124]\n\t"
        "str	r4, [%[r], #132]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "str	r3, [%[r], #128]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #116]\n\t"
        "str	r2, [%[r], #124]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #112]\n\t"
        "str	r4, [%[r], #120]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #108]\n\t"
        "str	r3, [%[r], #116]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #104]\n\t"
        "str	r2, [%[r], #112]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #100]\n\t"
        "str	r4, [%[r], #108]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "str	r3, [%[r], #104]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #92]\n\t"
        "str	r2, [%[r], #100]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #88]\n\t"
        "str	r4, [%[r], #96]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #84]\n\t"
        "str	r3, [%[r], #92]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #80]\n\t"
        "str	r2, [%[r], #88]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #76]\n\t"
        "str	r4, [%[r], #84]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "str	r3, [%[r], #80]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #68]\n\t"
        "str	r2, [%[r], #76]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #64]\n\t"
        "str	r4, [%[r], #72]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #60]\n\t"
        "str	r3, [%[r], #68]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #56]\n\t"
        "str	r2, [%[r], #64]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #52]\n\t"
        "str	r4, [%[r], #60]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "str	r3, [%[r], #56]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #44]\n\t"
        "str	r2, [%[r], #52]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #40]\n\t"
        "str	r4, [%[r], #48]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #36]\n\t"
        "str	r3, [%[r], #44]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #32]\n\t"
        "str	r2, [%[r], #40]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #28]\n\t"
        "str	r4, [%[r], #36]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "str	r3, [%[r], #32]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #20]\n\t"
        "str	r2, [%[r], #28]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #16]\n\t"
        "str	r4, [%[r], #24]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #12]\n\t"
        "str	r3, [%[r], #20]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "ldr	r3, [%[a], #8]\n\t"
        "str	r2, [%[r], #16]\n\t"
        "lsr	r5, r3, #1\n\t"
        "lsl	r3, r3, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r4, r4, r5\n\t"
        "ldr	r2, [%[a], #4]\n\t"
        "str	r4, [%[r], #12]\n\t"
        "lsr	r5, r2, #1\n\t"
        "lsl	r2, r2, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r3, r3, r5\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "str	r3, [%[r], #8]\n\t"
        "lsr	r5, r4, #1\n\t"
        "lsl	r4, r4, %[n]\n\t"
        "lsr	r5, r5, r6\n\t"
        "orr	r2, r2, r5\n\t"
        "str	r4, [%[r]]\n\t"
        "str	r2, [%[r], #4]\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [n] "r" (n)
        : "memory", "r2", "r3", "r4", "r5", "r6"
    );
}

/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
 *
 * r     A single precision number that is the result of the operation.
 * e     A single precision number that is the exponent.
 * bits  The number of bits in the exponent.
 * m     A single precision number that is the modulus.
 * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
 */
static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
        const sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
    sp_digit nd[192];
    sp_digit td[97];
#else
    sp_digit* td;
#endif
    sp_digit* norm;
    sp_digit* tmp;
    sp_digit mp = 1;
    sp_digit n, o;
    sp_digit mask;
    int i;
    int c, y;
    int err = MP_OKAY;

#ifdef WOLFSSL_SMALL_STACK
    td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
                            DYNAMIC_TYPE_TMP_BUFFER);
    if (td == NULL) {
        err = MEMORY_E;
    }
#endif

    if (err == MP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
        norm = td;
        tmp  = td + 192;
#else
        norm = nd;
        tmp  = td;
#endif

        sp_3072_mont_setup(m, &mp);
        sp_3072_mont_norm_96(norm, m);

        i = (bits - 1) / 32;
        n = e[i--];
        c = bits & 31;
        if (c == 0) {
            c = 32;
        }
        c -= bits % 5;
        if (c == 32) {
            c = 27;
        }
        y = (int)(n >> c);
        n <<= 32 - c;
        sp_3072_lshift_96(r, norm, y);
        for (; i>=0 || c>=5; ) {
            if (c == 0) {
                n = e[i--];
                y = n >> 27;
                n <<= 5;
                c = 27;
            }
            else if (c < 5) {
                y = n >> 27;
                n = e[i--];
                c = 5 - c;
                y |= n >> (32 - c);
                n <<= c;
                c = 32 - c;
            }
            else {
                y = (n >> 27) & 0x1f;
                n <<= 5;
                c -= 5;
            }

            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);
            sp_3072_mont_sqr_96(r, r, m, mp);

            sp_3072_lshift_96(r, r, y);
            sp_3072_mul_d_96(tmp, norm, r[96]);
            r[96] = 0;
            o = sp_3072_add_96(r, r, tmp);
            sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
        }

        XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
        sp_3072_mont_reduce_96(r, m, mp);

        mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
        sp_3072_cond_sub_96(r, r, m, mask);
    }

#ifdef WOLFSSL_SMALL_STACK
    if (td != NULL) {
        XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    }
#endif

    return err;
}
#endif /* HAVE_FFDHE_3072 */

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base     Base.
 * exp      Array of bytes that is the exponent.
 * expLen   Length of data, in bytes, in exponent.
 * mod      Modulus.
 * out      Buffer to hold big-endian bytes of exponentiation result.
 *          Must be at least 384 bytes long.
 * outLen   Length, in bytes, of exponentiation result.
 * returs 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
    mp_int* mod, byte* out, word32* outLen)
{
    int err = MP_OKAY;
    sp_digit b[192], e[96], m[96];
    sp_digit* r = b;
    word32 i;

    if (mp_count_bits(base) > 3072) {
        err = MP_READ_E;
    }

    if (err == MP_OKAY) {
        if (expLen > 384) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        if (mp_count_bits(mod) != 3072) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        sp_3072_from_mp(b, 96, base);
        sp_3072_from_bin(e, 96, exp, expLen);
        sp_3072_from_mp(m, 96, mod);

    #ifdef HAVE_FFDHE_3072
        if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
            err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
        else
    #endif
            err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);

    }

    if (err == MP_OKAY) {
        sp_3072_to_bin(r, out);
        *outLen = 384;
        for (i=0; i<384 && out[i] == 0; i++) {
        }
        *outLen -= i;
        XMEMMOVE(out, out + i, *outLen);

    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}
#endif /* WOLFSSL_HAVE_SP_DH */

/* Perform the modular exponentiation for Diffie-Hellman.
 *
 * base  Base. MP integer.
 * exp   Exponent. MP integer.
 * mod   Modulus. MP integer.
 * res   Result. MP integer.
 * returs 0 on success, MP_READ_E if there are too many bytes in an array
 * and MEMORY_E if memory allocation fails.
 */
int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
{
    int err = MP_OKAY;
    sp_digit b[96], e[48], m[48];
    sp_digit* r = b;
    int expBits = mp_count_bits(exp);

    if (mp_count_bits(base) > 1536) {
        err = MP_READ_E;
    }

    if (err == MP_OKAY) {
        if (expBits > 1536) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        if (mp_count_bits(mod) != 1536) {
            err = MP_READ_E;
        }
    }

    if (err == MP_OKAY) {
        sp_3072_from_mp(b, 48, base);
        sp_3072_from_mp(e, 48, exp);
        sp_3072_from_mp(m, 48, mod);

        err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
    }

    if (err == MP_OKAY) {
        XMEMSET(r + 48, 0, sizeof(*r) * 48U);
        err = sp_3072_to_mp(r, res);
        res->used = mod->used;
        mp_clamp(res);
    }

    XMEMSET(e, 0, sizeof(e));

    return err;
}

#endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */

#endif /* !WOLFSSL_SP_NO_3072 */

#ifdef WOLFSSL_SP_4096
/* Read big endian unsigned byte array into r.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  Byte array.
 * n  Number of bytes in array to read.
 */
static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
{
    int i, j = 0;
    word32 s = 0;

    r[0] = 0;
    for (i = n-1; i >= 0; i--) {
        r[j] |= (((sp_digit)a[i]) << s);
        if (s >= 24U) {
            r[j] &= 0xffffffff;
            s = 32U - s;
            if (j + 1 >= size) {
                break;
            }
            r[++j] = (sp_digit)a[i] >> s;
            s = 8U - s;
        }
        else {
            s += 8U;
        }
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
}

/* Convert an mp_int to an array of sp_digit.
 *
 * r  A single precision integer.
 * size  Maximum number of bytes to convert
 * a  A multi-precision integer.
 */
static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
{
#if DIGIT_BIT == 32
    int j;

    XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);

    for (j = a->used; j < size; j++) {
        r[j] = 0;
    }
#elif DIGIT_BIT > 32
    int i, j = 0;
    word32 s = 0;

    r[0] = 0;
    for (i = 0; i < a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i] << s);
        r[j] &= 0xffffffff;
        s = 32U - s;
        if (j + 1 >= size) {
            break;
        }
        /* lint allow cast of mismatch word32 and mp_digit */
        r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
        while ((s + 32U) <= (word32)DIGIT_BIT) {
            s += 32U;
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            if (s < (word32)DIGIT_BIT) {
                /* lint allow cast of mismatch word32 and mp_digit */
                r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
            }
            else {
                r[++j] = 0L;
            }
        }
        s = (word32)DIGIT_BIT - s;
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#else
    int i, j = 0, s = 0;

    r[0] = 0;
    for (i = 0; i < a->used && j < size; i++) {
        r[j] |= ((sp_digit)a->dp[i]) << s;
        if (s + DIGIT_BIT >= 32) {
            r[j] &= 0xffffffff;
            if (j + 1 >= size) {
                break;
            }
            s = 32 - s;
            if (s == DIGIT_BIT) {
                r[++j] = 0;
                s = 0;
            }
            else {
                r[++j] = a->dp[i] >> s;
                s = DIGIT_BIT - s;
            }
        }
        else {
            s += DIGIT_BIT;
        }
    }

    for (j++; j < size; j++) {
        r[j] = 0;
    }
#endif
}

/* Write r as big endian to byte array.
 * Fixed length number of bytes written: 512
 *
 * r  A single precision integer.
 * a  Byte array.
 */
static void sp_4096_to_bin(sp_digit* r, byte* a)
{
    int i, j, s = 0, b;

    j = 4096 / 8 - 1;
    a[j] = 0;
    for (i=0; i<128 && j>=0; i++) {
        b = 0;
        /* lint allow cast of mismatch sp_digit and int */
        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
        if (j < 0) {
            break;
        }
        while (b < 32) {
            a[j--] = r[i] >> b; b += 8;
            if (j < 0) {
                break;
            }
        }
        s = 8 - (b - 32);
        if (j >= 0) {
            a[j] = 0;
        }
        if (s != 0) {
            j++;
        }
    }
}

#ifndef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r5, [%[a], #52]\n\t"
        "ldr	r6, [%[a], #56]\n\t"
        "ldr	r7, [%[a], #60]\n\t"
        "ldr	r8, [%[b], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "ldr	r10, [%[b], #56]\n\t"
        "ldr	r14, [%[b], #60]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r5, [%[r], #52]\n\t"
        "str	r6, [%[r], #56]\n\t"
        "str	r7, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r5, [%[a], #68]\n\t"
        "ldr	r6, [%[a], #72]\n\t"
        "ldr	r7, [%[a], #76]\n\t"
        "ldr	r8, [%[b], #64]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "ldr	r10, [%[b], #72]\n\t"
        "ldr	r14, [%[b], #76]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r5, [%[r], #68]\n\t"
        "str	r6, [%[r], #72]\n\t"
        "str	r7, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r5, [%[a], #84]\n\t"
        "ldr	r6, [%[a], #88]\n\t"
        "ldr	r7, [%[a], #92]\n\t"
        "ldr	r8, [%[b], #80]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "ldr	r10, [%[b], #88]\n\t"
        "ldr	r14, [%[b], #92]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r5, [%[r], #84]\n\t"
        "str	r6, [%[r], #88]\n\t"
        "str	r7, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r5, [%[a], #100]\n\t"
        "ldr	r6, [%[a], #104]\n\t"
        "ldr	r7, [%[a], #108]\n\t"
        "ldr	r8, [%[b], #96]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "ldr	r10, [%[b], #104]\n\t"
        "ldr	r14, [%[b], #108]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r5, [%[r], #100]\n\t"
        "str	r6, [%[r], #104]\n\t"
        "str	r7, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r5, [%[a], #116]\n\t"
        "ldr	r6, [%[a], #120]\n\t"
        "ldr	r7, [%[a], #124]\n\t"
        "ldr	r8, [%[b], #112]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "ldr	r10, [%[b], #120]\n\t"
        "ldr	r14, [%[b], #124]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r5, [%[r], #116]\n\t"
        "str	r6, [%[r], #120]\n\t"
        "str	r7, [%[r], #124]\n\t"
        "ldr	r4, [%[a], #128]\n\t"
        "ldr	r5, [%[a], #132]\n\t"
        "ldr	r6, [%[a], #136]\n\t"
        "ldr	r7, [%[a], #140]\n\t"
        "ldr	r8, [%[b], #128]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "ldr	r10, [%[b], #136]\n\t"
        "ldr	r14, [%[b], #140]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #128]\n\t"
        "str	r5, [%[r], #132]\n\t"
        "str	r6, [%[r], #136]\n\t"
        "str	r7, [%[r], #140]\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "ldr	r5, [%[a], #148]\n\t"
        "ldr	r6, [%[a], #152]\n\t"
        "ldr	r7, [%[a], #156]\n\t"
        "ldr	r8, [%[b], #144]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "ldr	r10, [%[b], #152]\n\t"
        "ldr	r14, [%[b], #156]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #144]\n\t"
        "str	r5, [%[r], #148]\n\t"
        "str	r6, [%[r], #152]\n\t"
        "str	r7, [%[r], #156]\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "ldr	r5, [%[a], #164]\n\t"
        "ldr	r6, [%[a], #168]\n\t"
        "ldr	r7, [%[a], #172]\n\t"
        "ldr	r8, [%[b], #160]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "ldr	r10, [%[b], #168]\n\t"
        "ldr	r14, [%[b], #172]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #160]\n\t"
        "str	r5, [%[r], #164]\n\t"
        "str	r6, [%[r], #168]\n\t"
        "str	r7, [%[r], #172]\n\t"
        "ldr	r4, [%[a], #176]\n\t"
        "ldr	r5, [%[a], #180]\n\t"
        "ldr	r6, [%[a], #184]\n\t"
        "ldr	r7, [%[a], #188]\n\t"
        "ldr	r8, [%[b], #176]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "ldr	r10, [%[b], #184]\n\t"
        "ldr	r14, [%[b], #188]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #176]\n\t"
        "str	r5, [%[r], #180]\n\t"
        "str	r6, [%[r], #184]\n\t"
        "str	r7, [%[r], #188]\n\t"
        "ldr	r4, [%[a], #192]\n\t"
        "ldr	r5, [%[a], #196]\n\t"
        "ldr	r6, [%[a], #200]\n\t"
        "ldr	r7, [%[a], #204]\n\t"
        "ldr	r8, [%[b], #192]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "ldr	r10, [%[b], #200]\n\t"
        "ldr	r14, [%[b], #204]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #192]\n\t"
        "str	r5, [%[r], #196]\n\t"
        "str	r6, [%[r], #200]\n\t"
        "str	r7, [%[r], #204]\n\t"
        "ldr	r4, [%[a], #208]\n\t"
        "ldr	r5, [%[a], #212]\n\t"
        "ldr	r6, [%[a], #216]\n\t"
        "ldr	r7, [%[a], #220]\n\t"
        "ldr	r8, [%[b], #208]\n\t"
        "ldr	r9, [%[b], #212]\n\t"
        "ldr	r10, [%[b], #216]\n\t"
        "ldr	r14, [%[b], #220]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #208]\n\t"
        "str	r5, [%[r], #212]\n\t"
        "str	r6, [%[r], #216]\n\t"
        "str	r7, [%[r], #220]\n\t"
        "ldr	r4, [%[a], #224]\n\t"
        "ldr	r5, [%[a], #228]\n\t"
        "ldr	r6, [%[a], #232]\n\t"
        "ldr	r7, [%[a], #236]\n\t"
        "ldr	r8, [%[b], #224]\n\t"
        "ldr	r9, [%[b], #228]\n\t"
        "ldr	r10, [%[b], #232]\n\t"
        "ldr	r14, [%[b], #236]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #224]\n\t"
        "str	r5, [%[r], #228]\n\t"
        "str	r6, [%[r], #232]\n\t"
        "str	r7, [%[r], #236]\n\t"
        "ldr	r4, [%[a], #240]\n\t"
        "ldr	r5, [%[a], #244]\n\t"
        "ldr	r6, [%[a], #248]\n\t"
        "ldr	r7, [%[a], #252]\n\t"
        "ldr	r8, [%[b], #240]\n\t"
        "ldr	r9, [%[b], #244]\n\t"
        "ldr	r10, [%[b], #248]\n\t"
        "ldr	r14, [%[b], #252]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #240]\n\t"
        "str	r5, [%[r], #244]\n\t"
        "str	r6, [%[r], #248]\n\t"
        "str	r7, [%[r], #252]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* Sub b from a into a. (a -= b)
 *
 * a  A single precision integer and result.
 * b  A single precision integer.
 */
static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "ldr	r2, [%[a], #0]\n\t"
        "ldr	r3, [%[a], #4]\n\t"
        "ldr	r4, [%[a], #8]\n\t"
        "ldr	r5, [%[a], #12]\n\t"
        "ldr	r6, [%[b], #0]\n\t"
        "ldr	r7, [%[b], #4]\n\t"
        "ldr	r8, [%[b], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "subs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #0]\n\t"
        "str	r3, [%[a], #4]\n\t"
        "str	r4, [%[a], #8]\n\t"
        "str	r5, [%[a], #12]\n\t"
        "ldr	r2, [%[a], #16]\n\t"
        "ldr	r3, [%[a], #20]\n\t"
        "ldr	r4, [%[a], #24]\n\t"
        "ldr	r5, [%[a], #28]\n\t"
        "ldr	r6, [%[b], #16]\n\t"
        "ldr	r7, [%[b], #20]\n\t"
        "ldr	r8, [%[b], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #16]\n\t"
        "str	r3, [%[a], #20]\n\t"
        "str	r4, [%[a], #24]\n\t"
        "str	r5, [%[a], #28]\n\t"
        "ldr	r2, [%[a], #32]\n\t"
        "ldr	r3, [%[a], #36]\n\t"
        "ldr	r4, [%[a], #40]\n\t"
        "ldr	r5, [%[a], #44]\n\t"
        "ldr	r6, [%[b], #32]\n\t"
        "ldr	r7, [%[b], #36]\n\t"
        "ldr	r8, [%[b], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #32]\n\t"
        "str	r3, [%[a], #36]\n\t"
        "str	r4, [%[a], #40]\n\t"
        "str	r5, [%[a], #44]\n\t"
        "ldr	r2, [%[a], #48]\n\t"
        "ldr	r3, [%[a], #52]\n\t"
        "ldr	r4, [%[a], #56]\n\t"
        "ldr	r5, [%[a], #60]\n\t"
        "ldr	r6, [%[b], #48]\n\t"
        "ldr	r7, [%[b], #52]\n\t"
        "ldr	r8, [%[b], #56]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #48]\n\t"
        "str	r3, [%[a], #52]\n\t"
        "str	r4, [%[a], #56]\n\t"
        "str	r5, [%[a], #60]\n\t"
        "ldr	r2, [%[a], #64]\n\t"
        "ldr	r3, [%[a], #68]\n\t"
        "ldr	r4, [%[a], #72]\n\t"
        "ldr	r5, [%[a], #76]\n\t"
        "ldr	r6, [%[b], #64]\n\t"
        "ldr	r7, [%[b], #68]\n\t"
        "ldr	r8, [%[b], #72]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #64]\n\t"
        "str	r3, [%[a], #68]\n\t"
        "str	r4, [%[a], #72]\n\t"
        "str	r5, [%[a], #76]\n\t"
        "ldr	r2, [%[a], #80]\n\t"
        "ldr	r3, [%[a], #84]\n\t"
        "ldr	r4, [%[a], #88]\n\t"
        "ldr	r5, [%[a], #92]\n\t"
        "ldr	r6, [%[b], #80]\n\t"
        "ldr	r7, [%[b], #84]\n\t"
        "ldr	r8, [%[b], #88]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #80]\n\t"
        "str	r3, [%[a], #84]\n\t"
        "str	r4, [%[a], #88]\n\t"
        "str	r5, [%[a], #92]\n\t"
        "ldr	r2, [%[a], #96]\n\t"
        "ldr	r3, [%[a], #100]\n\t"
        "ldr	r4, [%[a], #104]\n\t"
        "ldr	r5, [%[a], #108]\n\t"
        "ldr	r6, [%[b], #96]\n\t"
        "ldr	r7, [%[b], #100]\n\t"
        "ldr	r8, [%[b], #104]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #96]\n\t"
        "str	r3, [%[a], #100]\n\t"
        "str	r4, [%[a], #104]\n\t"
        "str	r5, [%[a], #108]\n\t"
        "ldr	r2, [%[a], #112]\n\t"
        "ldr	r3, [%[a], #116]\n\t"
        "ldr	r4, [%[a], #120]\n\t"
        "ldr	r5, [%[a], #124]\n\t"
        "ldr	r6, [%[b], #112]\n\t"
        "ldr	r7, [%[b], #116]\n\t"
        "ldr	r8, [%[b], #120]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #112]\n\t"
        "str	r3, [%[a], #116]\n\t"
        "str	r4, [%[a], #120]\n\t"
        "str	r5, [%[a], #124]\n\t"
        "ldr	r2, [%[a], #128]\n\t"
        "ldr	r3, [%[a], #132]\n\t"
        "ldr	r4, [%[a], #136]\n\t"
        "ldr	r5, [%[a], #140]\n\t"
        "ldr	r6, [%[b], #128]\n\t"
        "ldr	r7, [%[b], #132]\n\t"
        "ldr	r8, [%[b], #136]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #128]\n\t"
        "str	r3, [%[a], #132]\n\t"
        "str	r4, [%[a], #136]\n\t"
        "str	r5, [%[a], #140]\n\t"
        "ldr	r2, [%[a], #144]\n\t"
        "ldr	r3, [%[a], #148]\n\t"
        "ldr	r4, [%[a], #152]\n\t"
        "ldr	r5, [%[a], #156]\n\t"
        "ldr	r6, [%[b], #144]\n\t"
        "ldr	r7, [%[b], #148]\n\t"
        "ldr	r8, [%[b], #152]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #144]\n\t"
        "str	r3, [%[a], #148]\n\t"
        "str	r4, [%[a], #152]\n\t"
        "str	r5, [%[a], #156]\n\t"
        "ldr	r2, [%[a], #160]\n\t"
        "ldr	r3, [%[a], #164]\n\t"
        "ldr	r4, [%[a], #168]\n\t"
        "ldr	r5, [%[a], #172]\n\t"
        "ldr	r6, [%[b], #160]\n\t"
        "ldr	r7, [%[b], #164]\n\t"
        "ldr	r8, [%[b], #168]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #160]\n\t"
        "str	r3, [%[a], #164]\n\t"
        "str	r4, [%[a], #168]\n\t"
        "str	r5, [%[a], #172]\n\t"
        "ldr	r2, [%[a], #176]\n\t"
        "ldr	r3, [%[a], #180]\n\t"
        "ldr	r4, [%[a], #184]\n\t"
        "ldr	r5, [%[a], #188]\n\t"
        "ldr	r6, [%[b], #176]\n\t"
        "ldr	r7, [%[b], #180]\n\t"
        "ldr	r8, [%[b], #184]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #176]\n\t"
        "str	r3, [%[a], #180]\n\t"
        "str	r4, [%[a], #184]\n\t"
        "str	r5, [%[a], #188]\n\t"
        "ldr	r2, [%[a], #192]\n\t"
        "ldr	r3, [%[a], #196]\n\t"
        "ldr	r4, [%[a], #200]\n\t"
        "ldr	r5, [%[a], #204]\n\t"
        "ldr	r6, [%[b], #192]\n\t"
        "ldr	r7, [%[b], #196]\n\t"
        "ldr	r8, [%[b], #200]\n\t"
        "ldr	r9, [%[b], #204]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #192]\n\t"
        "str	r3, [%[a], #196]\n\t"
        "str	r4, [%[a], #200]\n\t"
        "str	r5, [%[a], #204]\n\t"
        "ldr	r2, [%[a], #208]\n\t"
        "ldr	r3, [%[a], #212]\n\t"
        "ldr	r4, [%[a], #216]\n\t"
        "ldr	r5, [%[a], #220]\n\t"
        "ldr	r6, [%[b], #208]\n\t"
        "ldr	r7, [%[b], #212]\n\t"
        "ldr	r8, [%[b], #216]\n\t"
        "ldr	r9, [%[b], #220]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #208]\n\t"
        "str	r3, [%[a], #212]\n\t"
        "str	r4, [%[a], #216]\n\t"
        "str	r5, [%[a], #220]\n\t"
        "ldr	r2, [%[a], #224]\n\t"
        "ldr	r3, [%[a], #228]\n\t"
        "ldr	r4, [%[a], #232]\n\t"
        "ldr	r5, [%[a], #236]\n\t"
        "ldr	r6, [%[b], #224]\n\t"
        "ldr	r7, [%[b], #228]\n\t"
        "ldr	r8, [%[b], #232]\n\t"
        "ldr	r9, [%[b], #236]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #224]\n\t"
        "str	r3, [%[a], #228]\n\t"
        "str	r4, [%[a], #232]\n\t"
        "str	r5, [%[a], #236]\n\t"
        "ldr	r2, [%[a], #240]\n\t"
        "ldr	r3, [%[a], #244]\n\t"
        "ldr	r4, [%[a], #248]\n\t"
        "ldr	r5, [%[a], #252]\n\t"
        "ldr	r6, [%[b], #240]\n\t"
        "ldr	r7, [%[b], #244]\n\t"
        "ldr	r8, [%[b], #248]\n\t"
        "ldr	r9, [%[b], #252]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #240]\n\t"
        "str	r3, [%[a], #244]\n\t"
        "str	r4, [%[a], #248]\n\t"
        "str	r5, [%[a], #252]\n\t"
        "ldr	r2, [%[a], #256]\n\t"
        "ldr	r3, [%[a], #260]\n\t"
        "ldr	r4, [%[a], #264]\n\t"
        "ldr	r5, [%[a], #268]\n\t"
        "ldr	r6, [%[b], #256]\n\t"
        "ldr	r7, [%[b], #260]\n\t"
        "ldr	r8, [%[b], #264]\n\t"
        "ldr	r9, [%[b], #268]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #256]\n\t"
        "str	r3, [%[a], #260]\n\t"
        "str	r4, [%[a], #264]\n\t"
        "str	r5, [%[a], #268]\n\t"
        "ldr	r2, [%[a], #272]\n\t"
        "ldr	r3, [%[a], #276]\n\t"
        "ldr	r4, [%[a], #280]\n\t"
        "ldr	r5, [%[a], #284]\n\t"
        "ldr	r6, [%[b], #272]\n\t"
        "ldr	r7, [%[b], #276]\n\t"
        "ldr	r8, [%[b], #280]\n\t"
        "ldr	r9, [%[b], #284]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #272]\n\t"
        "str	r3, [%[a], #276]\n\t"
        "str	r4, [%[a], #280]\n\t"
        "str	r5, [%[a], #284]\n\t"
        "ldr	r2, [%[a], #288]\n\t"
        "ldr	r3, [%[a], #292]\n\t"
        "ldr	r4, [%[a], #296]\n\t"
        "ldr	r5, [%[a], #300]\n\t"
        "ldr	r6, [%[b], #288]\n\t"
        "ldr	r7, [%[b], #292]\n\t"
        "ldr	r8, [%[b], #296]\n\t"
        "ldr	r9, [%[b], #300]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #288]\n\t"
        "str	r3, [%[a], #292]\n\t"
        "str	r4, [%[a], #296]\n\t"
        "str	r5, [%[a], #300]\n\t"
        "ldr	r2, [%[a], #304]\n\t"
        "ldr	r3, [%[a], #308]\n\t"
        "ldr	r4, [%[a], #312]\n\t"
        "ldr	r5, [%[a], #316]\n\t"
        "ldr	r6, [%[b], #304]\n\t"
        "ldr	r7, [%[b], #308]\n\t"
        "ldr	r8, [%[b], #312]\n\t"
        "ldr	r9, [%[b], #316]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #304]\n\t"
        "str	r3, [%[a], #308]\n\t"
        "str	r4, [%[a], #312]\n\t"
        "str	r5, [%[a], #316]\n\t"
        "ldr	r2, [%[a], #320]\n\t"
        "ldr	r3, [%[a], #324]\n\t"
        "ldr	r4, [%[a], #328]\n\t"
        "ldr	r5, [%[a], #332]\n\t"
        "ldr	r6, [%[b], #320]\n\t"
        "ldr	r7, [%[b], #324]\n\t"
        "ldr	r8, [%[b], #328]\n\t"
        "ldr	r9, [%[b], #332]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #320]\n\t"
        "str	r3, [%[a], #324]\n\t"
        "str	r4, [%[a], #328]\n\t"
        "str	r5, [%[a], #332]\n\t"
        "ldr	r2, [%[a], #336]\n\t"
        "ldr	r3, [%[a], #340]\n\t"
        "ldr	r4, [%[a], #344]\n\t"
        "ldr	r5, [%[a], #348]\n\t"
        "ldr	r6, [%[b], #336]\n\t"
        "ldr	r7, [%[b], #340]\n\t"
        "ldr	r8, [%[b], #344]\n\t"
        "ldr	r9, [%[b], #348]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #336]\n\t"
        "str	r3, [%[a], #340]\n\t"
        "str	r4, [%[a], #344]\n\t"
        "str	r5, [%[a], #348]\n\t"
        "ldr	r2, [%[a], #352]\n\t"
        "ldr	r3, [%[a], #356]\n\t"
        "ldr	r4, [%[a], #360]\n\t"
        "ldr	r5, [%[a], #364]\n\t"
        "ldr	r6, [%[b], #352]\n\t"
        "ldr	r7, [%[b], #356]\n\t"
        "ldr	r8, [%[b], #360]\n\t"
        "ldr	r9, [%[b], #364]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #352]\n\t"
        "str	r3, [%[a], #356]\n\t"
        "str	r4, [%[a], #360]\n\t"
        "str	r5, [%[a], #364]\n\t"
        "ldr	r2, [%[a], #368]\n\t"
        "ldr	r3, [%[a], #372]\n\t"
        "ldr	r4, [%[a], #376]\n\t"
        "ldr	r5, [%[a], #380]\n\t"
        "ldr	r6, [%[b], #368]\n\t"
        "ldr	r7, [%[b], #372]\n\t"
        "ldr	r8, [%[b], #376]\n\t"
        "ldr	r9, [%[b], #380]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #368]\n\t"
        "str	r3, [%[a], #372]\n\t"
        "str	r4, [%[a], #376]\n\t"
        "str	r5, [%[a], #380]\n\t"
        "ldr	r2, [%[a], #384]\n\t"
        "ldr	r3, [%[a], #388]\n\t"
        "ldr	r4, [%[a], #392]\n\t"
        "ldr	r5, [%[a], #396]\n\t"
        "ldr	r6, [%[b], #384]\n\t"
        "ldr	r7, [%[b], #388]\n\t"
        "ldr	r8, [%[b], #392]\n\t"
        "ldr	r9, [%[b], #396]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #384]\n\t"
        "str	r3, [%[a], #388]\n\t"
        "str	r4, [%[a], #392]\n\t"
        "str	r5, [%[a], #396]\n\t"
        "ldr	r2, [%[a], #400]\n\t"
        "ldr	r3, [%[a], #404]\n\t"
        "ldr	r4, [%[a], #408]\n\t"
        "ldr	r5, [%[a], #412]\n\t"
        "ldr	r6, [%[b], #400]\n\t"
        "ldr	r7, [%[b], #404]\n\t"
        "ldr	r8, [%[b], #408]\n\t"
        "ldr	r9, [%[b], #412]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #400]\n\t"
        "str	r3, [%[a], #404]\n\t"
        "str	r4, [%[a], #408]\n\t"
        "str	r5, [%[a], #412]\n\t"
        "ldr	r2, [%[a], #416]\n\t"
        "ldr	r3, [%[a], #420]\n\t"
        "ldr	r4, [%[a], #424]\n\t"
        "ldr	r5, [%[a], #428]\n\t"
        "ldr	r6, [%[b], #416]\n\t"
        "ldr	r7, [%[b], #420]\n\t"
        "ldr	r8, [%[b], #424]\n\t"
        "ldr	r9, [%[b], #428]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #416]\n\t"
        "str	r3, [%[a], #420]\n\t"
        "str	r4, [%[a], #424]\n\t"
        "str	r5, [%[a], #428]\n\t"
        "ldr	r2, [%[a], #432]\n\t"
        "ldr	r3, [%[a], #436]\n\t"
        "ldr	r4, [%[a], #440]\n\t"
        "ldr	r5, [%[a], #444]\n\t"
        "ldr	r6, [%[b], #432]\n\t"
        "ldr	r7, [%[b], #436]\n\t"
        "ldr	r8, [%[b], #440]\n\t"
        "ldr	r9, [%[b], #444]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #432]\n\t"
        "str	r3, [%[a], #436]\n\t"
        "str	r4, [%[a], #440]\n\t"
        "str	r5, [%[a], #444]\n\t"
        "ldr	r2, [%[a], #448]\n\t"
        "ldr	r3, [%[a], #452]\n\t"
        "ldr	r4, [%[a], #456]\n\t"
        "ldr	r5, [%[a], #460]\n\t"
        "ldr	r6, [%[b], #448]\n\t"
        "ldr	r7, [%[b], #452]\n\t"
        "ldr	r8, [%[b], #456]\n\t"
        "ldr	r9, [%[b], #460]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #448]\n\t"
        "str	r3, [%[a], #452]\n\t"
        "str	r4, [%[a], #456]\n\t"
        "str	r5, [%[a], #460]\n\t"
        "ldr	r2, [%[a], #464]\n\t"
        "ldr	r3, [%[a], #468]\n\t"
        "ldr	r4, [%[a], #472]\n\t"
        "ldr	r5, [%[a], #476]\n\t"
        "ldr	r6, [%[b], #464]\n\t"
        "ldr	r7, [%[b], #468]\n\t"
        "ldr	r8, [%[b], #472]\n\t"
        "ldr	r9, [%[b], #476]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #464]\n\t"
        "str	r3, [%[a], #468]\n\t"
        "str	r4, [%[a], #472]\n\t"
        "str	r5, [%[a], #476]\n\t"
        "ldr	r2, [%[a], #480]\n\t"
        "ldr	r3, [%[a], #484]\n\t"
        "ldr	r4, [%[a], #488]\n\t"
        "ldr	r5, [%[a], #492]\n\t"
        "ldr	r6, [%[b], #480]\n\t"
        "ldr	r7, [%[b], #484]\n\t"
        "ldr	r8, [%[b], #488]\n\t"
        "ldr	r9, [%[b], #492]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #480]\n\t"
        "str	r3, [%[a], #484]\n\t"
        "str	r4, [%[a], #488]\n\t"
        "str	r5, [%[a], #492]\n\t"
        "ldr	r2, [%[a], #496]\n\t"
        "ldr	r3, [%[a], #500]\n\t"
        "ldr	r4, [%[a], #504]\n\t"
        "ldr	r5, [%[a], #508]\n\t"
        "ldr	r6, [%[b], #496]\n\t"
        "ldr	r7, [%[b], #500]\n\t"
        "ldr	r8, [%[b], #504]\n\t"
        "ldr	r9, [%[b], #508]\n\t"
        "sbcs	r2, r2, r6\n\t"
        "sbcs	r3, r3, r7\n\t"
        "sbcs	r4, r4, r8\n\t"
        "sbcs	r5, r5, r9\n\t"
        "str	r2, [%[a], #496]\n\t"
        "str	r3, [%[a], #500]\n\t"
        "str	r4, [%[a], #504]\n\t"
        "str	r5, [%[a], #508]\n\t"
        "sbc	%[c], r9, r9\n\t"
        : [c] "+r" (c)
        : [a] "r" (a), [b] "r" (b)
        : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
    );

    return c;
}

/* Add b to a into r. (r = a + b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
{
    sp_digit c = 0;

    __asm__ __volatile__ (
        "mov	r12, #0\n\t"
        "ldr	r4, [%[a], #0]\n\t"
        "ldr	r5, [%[a], #4]\n\t"
        "ldr	r6, [%[a], #8]\n\t"
        "ldr	r7, [%[a], #12]\n\t"
        "ldr	r8, [%[b], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "ldr	r10, [%[b], #8]\n\t"
        "ldr	r14, [%[b], #12]\n\t"
        "adds	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #0]\n\t"
        "str	r5, [%[r], #4]\n\t"
        "str	r6, [%[r], #8]\n\t"
        "str	r7, [%[r], #12]\n\t"
        "ldr	r4, [%[a], #16]\n\t"
        "ldr	r5, [%[a], #20]\n\t"
        "ldr	r6, [%[a], #24]\n\t"
        "ldr	r7, [%[a], #28]\n\t"
        "ldr	r8, [%[b], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "ldr	r10, [%[b], #24]\n\t"
        "ldr	r14, [%[b], #28]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #16]\n\t"
        "str	r5, [%[r], #20]\n\t"
        "str	r6, [%[r], #24]\n\t"
        "str	r7, [%[r], #28]\n\t"
        "ldr	r4, [%[a], #32]\n\t"
        "ldr	r5, [%[a], #36]\n\t"
        "ldr	r6, [%[a], #40]\n\t"
        "ldr	r7, [%[a], #44]\n\t"
        "ldr	r8, [%[b], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "ldr	r10, [%[b], #40]\n\t"
        "ldr	r14, [%[b], #44]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #32]\n\t"
        "str	r5, [%[r], #36]\n\t"
        "str	r6, [%[r], #40]\n\t"
        "str	r7, [%[r], #44]\n\t"
        "ldr	r4, [%[a], #48]\n\t"
        "ldr	r5, [%[a], #52]\n\t"
        "ldr	r6, [%[a], #56]\n\t"
        "ldr	r7, [%[a], #60]\n\t"
        "ldr	r8, [%[b], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "ldr	r10, [%[b], #56]\n\t"
        "ldr	r14, [%[b], #60]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #48]\n\t"
        "str	r5, [%[r], #52]\n\t"
        "str	r6, [%[r], #56]\n\t"
        "str	r7, [%[r], #60]\n\t"
        "ldr	r4, [%[a], #64]\n\t"
        "ldr	r5, [%[a], #68]\n\t"
        "ldr	r6, [%[a], #72]\n\t"
        "ldr	r7, [%[a], #76]\n\t"
        "ldr	r8, [%[b], #64]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "ldr	r10, [%[b], #72]\n\t"
        "ldr	r14, [%[b], #76]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #64]\n\t"
        "str	r5, [%[r], #68]\n\t"
        "str	r6, [%[r], #72]\n\t"
        "str	r7, [%[r], #76]\n\t"
        "ldr	r4, [%[a], #80]\n\t"
        "ldr	r5, [%[a], #84]\n\t"
        "ldr	r6, [%[a], #88]\n\t"
        "ldr	r7, [%[a], #92]\n\t"
        "ldr	r8, [%[b], #80]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "ldr	r10, [%[b], #88]\n\t"
        "ldr	r14, [%[b], #92]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #80]\n\t"
        "str	r5, [%[r], #84]\n\t"
        "str	r6, [%[r], #88]\n\t"
        "str	r7, [%[r], #92]\n\t"
        "ldr	r4, [%[a], #96]\n\t"
        "ldr	r5, [%[a], #100]\n\t"
        "ldr	r6, [%[a], #104]\n\t"
        "ldr	r7, [%[a], #108]\n\t"
        "ldr	r8, [%[b], #96]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "ldr	r10, [%[b], #104]\n\t"
        "ldr	r14, [%[b], #108]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #96]\n\t"
        "str	r5, [%[r], #100]\n\t"
        "str	r6, [%[r], #104]\n\t"
        "str	r7, [%[r], #108]\n\t"
        "ldr	r4, [%[a], #112]\n\t"
        "ldr	r5, [%[a], #116]\n\t"
        "ldr	r6, [%[a], #120]\n\t"
        "ldr	r7, [%[a], #124]\n\t"
        "ldr	r8, [%[b], #112]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "ldr	r10, [%[b], #120]\n\t"
        "ldr	r14, [%[b], #124]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #112]\n\t"
        "str	r5, [%[r], #116]\n\t"
        "str	r6, [%[r], #120]\n\t"
        "str	r7, [%[r], #124]\n\t"
        "ldr	r4, [%[a], #128]\n\t"
        "ldr	r5, [%[a], #132]\n\t"
        "ldr	r6, [%[a], #136]\n\t"
        "ldr	r7, [%[a], #140]\n\t"
        "ldr	r8, [%[b], #128]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "ldr	r10, [%[b], #136]\n\t"
        "ldr	r14, [%[b], #140]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #128]\n\t"
        "str	r5, [%[r], #132]\n\t"
        "str	r6, [%[r], #136]\n\t"
        "str	r7, [%[r], #140]\n\t"
        "ldr	r4, [%[a], #144]\n\t"
        "ldr	r5, [%[a], #148]\n\t"
        "ldr	r6, [%[a], #152]\n\t"
        "ldr	r7, [%[a], #156]\n\t"
        "ldr	r8, [%[b], #144]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "ldr	r10, [%[b], #152]\n\t"
        "ldr	r14, [%[b], #156]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #144]\n\t"
        "str	r5, [%[r], #148]\n\t"
        "str	r6, [%[r], #152]\n\t"
        "str	r7, [%[r], #156]\n\t"
        "ldr	r4, [%[a], #160]\n\t"
        "ldr	r5, [%[a], #164]\n\t"
        "ldr	r6, [%[a], #168]\n\t"
        "ldr	r7, [%[a], #172]\n\t"
        "ldr	r8, [%[b], #160]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "ldr	r10, [%[b], #168]\n\t"
        "ldr	r14, [%[b], #172]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #160]\n\t"
        "str	r5, [%[r], #164]\n\t"
        "str	r6, [%[r], #168]\n\t"
        "str	r7, [%[r], #172]\n\t"
        "ldr	r4, [%[a], #176]\n\t"
        "ldr	r5, [%[a], #180]\n\t"
        "ldr	r6, [%[a], #184]\n\t"
        "ldr	r7, [%[a], #188]\n\t"
        "ldr	r8, [%[b], #176]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "ldr	r10, [%[b], #184]\n\t"
        "ldr	r14, [%[b], #188]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #176]\n\t"
        "str	r5, [%[r], #180]\n\t"
        "str	r6, [%[r], #184]\n\t"
        "str	r7, [%[r], #188]\n\t"
        "ldr	r4, [%[a], #192]\n\t"
        "ldr	r5, [%[a], #196]\n\t"
        "ldr	r6, [%[a], #200]\n\t"
        "ldr	r7, [%[a], #204]\n\t"
        "ldr	r8, [%[b], #192]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "ldr	r10, [%[b], #200]\n\t"
        "ldr	r14, [%[b], #204]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #192]\n\t"
        "str	r5, [%[r], #196]\n\t"
        "str	r6, [%[r], #200]\n\t"
        "str	r7, [%[r], #204]\n\t"
        "ldr	r4, [%[a], #208]\n\t"
        "ldr	r5, [%[a], #212]\n\t"
        "ldr	r6, [%[a], #216]\n\t"
        "ldr	r7, [%[a], #220]\n\t"
        "ldr	r8, [%[b], #208]\n\t"
        "ldr	r9, [%[b], #212]\n\t"
        "ldr	r10, [%[b], #216]\n\t"
        "ldr	r14, [%[b], #220]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #208]\n\t"
        "str	r5, [%[r], #212]\n\t"
        "str	r6, [%[r], #216]\n\t"
        "str	r7, [%[r], #220]\n\t"
        "ldr	r4, [%[a], #224]\n\t"
        "ldr	r5, [%[a], #228]\n\t"
        "ldr	r6, [%[a], #232]\n\t"
        "ldr	r7, [%[a], #236]\n\t"
        "ldr	r8, [%[b], #224]\n\t"
        "ldr	r9, [%[b], #228]\n\t"
        "ldr	r10, [%[b], #232]\n\t"
        "ldr	r14, [%[b], #236]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #224]\n\t"
        "str	r5, [%[r], #228]\n\t"
        "str	r6, [%[r], #232]\n\t"
        "str	r7, [%[r], #236]\n\t"
        "ldr	r4, [%[a], #240]\n\t"
        "ldr	r5, [%[a], #244]\n\t"
        "ldr	r6, [%[a], #248]\n\t"
        "ldr	r7, [%[a], #252]\n\t"
        "ldr	r8, [%[b], #240]\n\t"
        "ldr	r9, [%[b], #244]\n\t"
        "ldr	r10, [%[b], #248]\n\t"
        "ldr	r14, [%[b], #252]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #240]\n\t"
        "str	r5, [%[r], #244]\n\t"
        "str	r6, [%[r], #248]\n\t"
        "str	r7, [%[r], #252]\n\t"
        "ldr	r4, [%[a], #256]\n\t"
        "ldr	r5, [%[a], #260]\n\t"
        "ldr	r6, [%[a], #264]\n\t"
        "ldr	r7, [%[a], #268]\n\t"
        "ldr	r8, [%[b], #256]\n\t"
        "ldr	r9, [%[b], #260]\n\t"
        "ldr	r10, [%[b], #264]\n\t"
        "ldr	r14, [%[b], #268]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #256]\n\t"
        "str	r5, [%[r], #260]\n\t"
        "str	r6, [%[r], #264]\n\t"
        "str	r7, [%[r], #268]\n\t"
        "ldr	r4, [%[a], #272]\n\t"
        "ldr	r5, [%[a], #276]\n\t"
        "ldr	r6, [%[a], #280]\n\t"
        "ldr	r7, [%[a], #284]\n\t"
        "ldr	r8, [%[b], #272]\n\t"
        "ldr	r9, [%[b], #276]\n\t"
        "ldr	r10, [%[b], #280]\n\t"
        "ldr	r14, [%[b], #284]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #272]\n\t"
        "str	r5, [%[r], #276]\n\t"
        "str	r6, [%[r], #280]\n\t"
        "str	r7, [%[r], #284]\n\t"
        "ldr	r4, [%[a], #288]\n\t"
        "ldr	r5, [%[a], #292]\n\t"
        "ldr	r6, [%[a], #296]\n\t"
        "ldr	r7, [%[a], #300]\n\t"
        "ldr	r8, [%[b], #288]\n\t"
        "ldr	r9, [%[b], #292]\n\t"
        "ldr	r10, [%[b], #296]\n\t"
        "ldr	r14, [%[b], #300]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #288]\n\t"
        "str	r5, [%[r], #292]\n\t"
        "str	r6, [%[r], #296]\n\t"
        "str	r7, [%[r], #300]\n\t"
        "ldr	r4, [%[a], #304]\n\t"
        "ldr	r5, [%[a], #308]\n\t"
        "ldr	r6, [%[a], #312]\n\t"
        "ldr	r7, [%[a], #316]\n\t"
        "ldr	r8, [%[b], #304]\n\t"
        "ldr	r9, [%[b], #308]\n\t"
        "ldr	r10, [%[b], #312]\n\t"
        "ldr	r14, [%[b], #316]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #304]\n\t"
        "str	r5, [%[r], #308]\n\t"
        "str	r6, [%[r], #312]\n\t"
        "str	r7, [%[r], #316]\n\t"
        "ldr	r4, [%[a], #320]\n\t"
        "ldr	r5, [%[a], #324]\n\t"
        "ldr	r6, [%[a], #328]\n\t"
        "ldr	r7, [%[a], #332]\n\t"
        "ldr	r8, [%[b], #320]\n\t"
        "ldr	r9, [%[b], #324]\n\t"
        "ldr	r10, [%[b], #328]\n\t"
        "ldr	r14, [%[b], #332]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #320]\n\t"
        "str	r5, [%[r], #324]\n\t"
        "str	r6, [%[r], #328]\n\t"
        "str	r7, [%[r], #332]\n\t"
        "ldr	r4, [%[a], #336]\n\t"
        "ldr	r5, [%[a], #340]\n\t"
        "ldr	r6, [%[a], #344]\n\t"
        "ldr	r7, [%[a], #348]\n\t"
        "ldr	r8, [%[b], #336]\n\t"
        "ldr	r9, [%[b], #340]\n\t"
        "ldr	r10, [%[b], #344]\n\t"
        "ldr	r14, [%[b], #348]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #336]\n\t"
        "str	r5, [%[r], #340]\n\t"
        "str	r6, [%[r], #344]\n\t"
        "str	r7, [%[r], #348]\n\t"
        "ldr	r4, [%[a], #352]\n\t"
        "ldr	r5, [%[a], #356]\n\t"
        "ldr	r6, [%[a], #360]\n\t"
        "ldr	r7, [%[a], #364]\n\t"
        "ldr	r8, [%[b], #352]\n\t"
        "ldr	r9, [%[b], #356]\n\t"
        "ldr	r10, [%[b], #360]\n\t"
        "ldr	r14, [%[b], #364]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #352]\n\t"
        "str	r5, [%[r], #356]\n\t"
        "str	r6, [%[r], #360]\n\t"
        "str	r7, [%[r], #364]\n\t"
        "ldr	r4, [%[a], #368]\n\t"
        "ldr	r5, [%[a], #372]\n\t"
        "ldr	r6, [%[a], #376]\n\t"
        "ldr	r7, [%[a], #380]\n\t"
        "ldr	r8, [%[b], #368]\n\t"
        "ldr	r9, [%[b], #372]\n\t"
        "ldr	r10, [%[b], #376]\n\t"
        "ldr	r14, [%[b], #380]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #368]\n\t"
        "str	r5, [%[r], #372]\n\t"
        "str	r6, [%[r], #376]\n\t"
        "str	r7, [%[r], #380]\n\t"
        "ldr	r4, [%[a], #384]\n\t"
        "ldr	r5, [%[a], #388]\n\t"
        "ldr	r6, [%[a], #392]\n\t"
        "ldr	r7, [%[a], #396]\n\t"
        "ldr	r8, [%[b], #384]\n\t"
        "ldr	r9, [%[b], #388]\n\t"
        "ldr	r10, [%[b], #392]\n\t"
        "ldr	r14, [%[b], #396]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #384]\n\t"
        "str	r5, [%[r], #388]\n\t"
        "str	r6, [%[r], #392]\n\t"
        "str	r7, [%[r], #396]\n\t"
        "ldr	r4, [%[a], #400]\n\t"
        "ldr	r5, [%[a], #404]\n\t"
        "ldr	r6, [%[a], #408]\n\t"
        "ldr	r7, [%[a], #412]\n\t"
        "ldr	r8, [%[b], #400]\n\t"
        "ldr	r9, [%[b], #404]\n\t"
        "ldr	r10, [%[b], #408]\n\t"
        "ldr	r14, [%[b], #412]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #400]\n\t"
        "str	r5, [%[r], #404]\n\t"
        "str	r6, [%[r], #408]\n\t"
        "str	r7, [%[r], #412]\n\t"
        "ldr	r4, [%[a], #416]\n\t"
        "ldr	r5, [%[a], #420]\n\t"
        "ldr	r6, [%[a], #424]\n\t"
        "ldr	r7, [%[a], #428]\n\t"
        "ldr	r8, [%[b], #416]\n\t"
        "ldr	r9, [%[b], #420]\n\t"
        "ldr	r10, [%[b], #424]\n\t"
        "ldr	r14, [%[b], #428]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #416]\n\t"
        "str	r5, [%[r], #420]\n\t"
        "str	r6, [%[r], #424]\n\t"
        "str	r7, [%[r], #428]\n\t"
        "ldr	r4, [%[a], #432]\n\t"
        "ldr	r5, [%[a], #436]\n\t"
        "ldr	r6, [%[a], #440]\n\t"
        "ldr	r7, [%[a], #444]\n\t"
        "ldr	r8, [%[b], #432]\n\t"
        "ldr	r9, [%[b], #436]\n\t"
        "ldr	r10, [%[b], #440]\n\t"
        "ldr	r14, [%[b], #444]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #432]\n\t"
        "str	r5, [%[r], #436]\n\t"
        "str	r6, [%[r], #440]\n\t"
        "str	r7, [%[r], #444]\n\t"
        "ldr	r4, [%[a], #448]\n\t"
        "ldr	r5, [%[a], #452]\n\t"
        "ldr	r6, [%[a], #456]\n\t"
        "ldr	r7, [%[a], #460]\n\t"
        "ldr	r8, [%[b], #448]\n\t"
        "ldr	r9, [%[b], #452]\n\t"
        "ldr	r10, [%[b], #456]\n\t"
        "ldr	r14, [%[b], #460]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #448]\n\t"
        "str	r5, [%[r], #452]\n\t"
        "str	r6, [%[r], #456]\n\t"
        "str	r7, [%[r], #460]\n\t"
        "ldr	r4, [%[a], #464]\n\t"
        "ldr	r5, [%[a], #468]\n\t"
        "ldr	r6, [%[a], #472]\n\t"
        "ldr	r7, [%[a], #476]\n\t"
        "ldr	r8, [%[b], #464]\n\t"
        "ldr	r9, [%[b], #468]\n\t"
        "ldr	r10, [%[b], #472]\n\t"
        "ldr	r14, [%[b], #476]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #464]\n\t"
        "str	r5, [%[r], #468]\n\t"
        "str	r6, [%[r], #472]\n\t"
        "str	r7, [%[r], #476]\n\t"
        "ldr	r4, [%[a], #480]\n\t"
        "ldr	r5, [%[a], #484]\n\t"
        "ldr	r6, [%[a], #488]\n\t"
        "ldr	r7, [%[a], #492]\n\t"
        "ldr	r8, [%[b], #480]\n\t"
        "ldr	r9, [%[b], #484]\n\t"
        "ldr	r10, [%[b], #488]\n\t"
        "ldr	r14, [%[b], #492]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #480]\n\t"
        "str	r5, [%[r], #484]\n\t"
        "str	r6, [%[r], #488]\n\t"
        "str	r7, [%[r], #492]\n\t"
        "ldr	r4, [%[a], #496]\n\t"
        "ldr	r5, [%[a], #500]\n\t"
        "ldr	r6, [%[a], #504]\n\t"
        "ldr	r7, [%[a], #508]\n\t"
        "ldr	r8, [%[b], #496]\n\t"
        "ldr	r9, [%[b], #500]\n\t"
        "ldr	r10, [%[b], #504]\n\t"
        "ldr	r14, [%[b], #508]\n\t"
        "adcs	r4, r4, r8\n\t"
        "adcs	r5, r5, r9\n\t"
        "adcs	r6, r6, r10\n\t"
        "adcs	r7, r7, r14\n\t"
        "str	r4, [%[r], #496]\n\t"
        "str	r5, [%[r], #500]\n\t"
        "str	r6, [%[r], #504]\n\t"
        "str	r7, [%[r], #508]\n\t"
        "adc	%[c], r12, r12\n\t"
        : [c] "+r" (c)
        : [r] "r" (r), [a] "r" (a), [b] "r" (b)
        : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
    );

    return c;
}

/* Multiply a and b into r. (r = a * b)
 *
 * r  A single precision integer.
 * a  A single precision integer.
 * b  A single precision integer.
 */
static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
{
    __asm__ __volatile__ (
        "sub	sp, sp, #256\n\t"
        "mov	r10, #0\n\t"
        "#  A[0] * B[0]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r3, r4, r8, r9\n\t"
        "mov	r5, #0\n\t"
        "str	r3, [sp]\n\t"
        "#  A[0] * B[1]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[0]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #4]\n\t"
        "#  A[0] * B[2]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[1]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[0]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #8]\n\t"
        "#  A[0] * B[3]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[2]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[1]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[0]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #12]\n\t"
        "#  A[0] * B[4]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[3]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[2]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[1]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[0]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #16]\n\t"
        "#  A[0] * B[5]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[4]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[3]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[2]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[1]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[0]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #20]\n\t"
        "#  A[0] * B[6]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[5]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[4]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[3]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[2]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[1]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[0]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #24]\n\t"
        "#  A[0] * B[7]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[6]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[5]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[4]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[3]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[2]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[1]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[0]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #28]\n\t"
        "#  A[0] * B[8]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[7]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[6]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[5]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[4]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[3]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[2]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[1]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[0]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #32]\n\t"
        "#  A[0] * B[9]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[8]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[7]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[6]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[5]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[4]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[3]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[2]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[1]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[0]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #36]\n\t"
        "#  A[0] * B[10]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[9]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[8]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[7]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[6]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[5]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[4]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[3]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[2]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[1]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[0]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #40]\n\t"
        "#  A[0] * B[11]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[10]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[9]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[8]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[7]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[6]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[5]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[4]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[3]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[2]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[1]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[0]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #44]\n\t"
        "#  A[0] * B[12]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[11]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[10]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[9]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[8]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[7]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[6]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[5]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[4]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[3]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[2]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[1]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[0]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #48]\n\t"
        "#  A[0] * B[13]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[12]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[11]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[10]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[9]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[8]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[7]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[6]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[5]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[4]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[3]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[2]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[1]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[0]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #52]\n\t"
        "#  A[0] * B[14]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[13]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[12]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[11]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[10]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[9]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[8]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[7]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[6]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[5]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[4]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[3]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[2]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[1]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[0]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #56]\n\t"
        "#  A[0] * B[15]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[14]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[13]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[12]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[11]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[10]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[9]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[8]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[7]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[6]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[5]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[4]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[3]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[2]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[1]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[0]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #60]\n\t"
        "#  A[0] * B[16]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[15]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[14]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[13]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[12]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[11]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[10]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[9]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[8]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[7]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[6]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[5]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[4]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[3]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[2]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[1]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[0]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #64]\n\t"
        "#  A[0] * B[17]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[16]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[15]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[14]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[13]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[12]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[11]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[10]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[9]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[8]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[7]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[6]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[5]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[4]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[3]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[2]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[1]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[0]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #68]\n\t"
        "#  A[0] * B[18]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[17]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[16]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[15]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[14]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[13]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[12]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[11]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[10]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[9]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[8]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[7]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[6]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[5]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[4]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[3]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[2]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[1]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[0]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #72]\n\t"
        "#  A[0] * B[19]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[18]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[17]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[16]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[15]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[14]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[13]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[12]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[11]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[10]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[9]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[8]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[7]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[6]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[5]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[4]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[3]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[2]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[1]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[0]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #76]\n\t"
        "#  A[0] * B[20]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[19]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[18]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[17]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[16]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[15]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[14]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[13]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[12]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[11]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[10]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[9]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[8]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[7]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[6]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[5]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[4]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[3]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[2]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[1]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[0]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #80]\n\t"
        "#  A[0] * B[21]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[20]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[19]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[18]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[17]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[16]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[15]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[14]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[13]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[12]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[11]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[10]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[9]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[8]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[7]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[6]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[5]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[4]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[3]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[2]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[1]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[0]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #84]\n\t"
        "#  A[0] * B[22]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[21]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[20]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[19]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[18]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[17]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[16]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[15]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[14]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[13]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[12]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[11]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[10]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[9]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[8]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[7]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[6]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[5]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[4]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[3]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[2]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[1]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[0]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #88]\n\t"
        "#  A[0] * B[23]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[22]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[21]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[20]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[19]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[18]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[17]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[16]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[15]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[14]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[13]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[12]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[11]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[10]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[9]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[8]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[7]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[6]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[5]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[4]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[3]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[2]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[1]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[0]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #92]\n\t"
        "#  A[0] * B[24]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[23]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[22]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[21]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[20]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[19]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[18]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[17]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[16]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[15]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[14]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[13]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[12]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[11]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[10]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[9]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[8]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[7]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[6]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[5]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[4]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[3]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[2]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[1]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[0]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #96]\n\t"
        "#  A[0] * B[25]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[24]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[23]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[22]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[21]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[20]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[19]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[18]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[17]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[16]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[15]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[14]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[13]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[12]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[11]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[10]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[9]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[8]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[7]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[6]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[5]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[4]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[3]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[2]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[1]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[0]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #100]\n\t"
        "#  A[0] * B[26]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[25]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[24]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[23]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[22]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[21]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[20]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[19]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[18]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[17]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[16]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[15]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[14]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[13]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[12]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[11]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[10]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[9]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[8]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[7]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[6]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[5]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[4]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[3]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[2]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[1]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[0]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #104]\n\t"
        "#  A[0] * B[27]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[26]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[25]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[24]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[23]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[22]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[21]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[20]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[19]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[18]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[17]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[16]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[15]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[14]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[13]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[12]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[11]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[10]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[9]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[8]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[7]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[6]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[5]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[4]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[3]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[2]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[1]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[0]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #108]\n\t"
        "#  A[0] * B[28]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[27]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[26]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[25]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[24]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[23]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[22]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[21]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[20]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[19]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[18]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[17]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[16]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[15]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[14]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[13]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[12]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[11]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[10]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[9]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[8]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[7]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[6]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[5]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[4]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[3]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[2]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[1]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[0]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #112]\n\t"
        "#  A[0] * B[29]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[28]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[27]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[26]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[25]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[24]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[23]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[22]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[21]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[20]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[19]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[18]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[17]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[16]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[15]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[14]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[13]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[12]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[11]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[10]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[9]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[8]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[7]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[6]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[5]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[4]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[3]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[2]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[1]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[0]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #116]\n\t"
        "#  A[0] * B[30]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[29]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[28]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[27]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[26]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[25]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[24]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[23]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[22]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[21]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[20]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[19]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[18]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[17]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[16]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[15]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[14]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[13]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[12]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[11]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[10]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[9]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[8]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[7]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[6]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[5]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[4]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[3]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[2]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[1]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[0]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #120]\n\t"
        "#  A[0] * B[31]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[30]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[29]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[28]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[27]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[26]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[25]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[24]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[23]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[22]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[21]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[20]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[19]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[18]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[17]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[16]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[15]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[14]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[13]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[12]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[11]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[10]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[9]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[8]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[7]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[6]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[5]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[4]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[3]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[2]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[1]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[0]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #124]\n\t"
        "#  A[0] * B[32]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[31]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[30]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[29]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[28]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[27]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[26]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[25]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[24]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[23]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[22]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[21]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[20]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[19]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[18]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[17]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[16]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[15]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[14]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[13]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[12]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[11]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[10]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[9]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[8]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[7]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[6]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[5]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[4]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[3]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[30] * B[2]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[31] * B[1]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[32] * B[0]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #128]\n\t"
        "#  A[0] * B[33]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[32]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[31]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[30]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[29]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[28]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[27]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[26]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[25]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[24]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[23]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[22]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[21]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[20]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[19]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[18]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[17]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[16]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[15]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[14]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[13]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[12]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[11]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[10]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[9]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[8]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[7]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[6]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[5]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[4]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[3]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[31] * B[2]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[32] * B[1]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[33] * B[0]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #132]\n\t"
        "#  A[0] * B[34]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[33]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[32]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[31]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[30]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[29]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[28]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[27]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[26]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[25]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[24]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[23]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[22]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[21]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[20]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[19]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[18]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[17]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[16]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[15]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[14]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[13]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[12]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[11]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[10]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[9]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[8]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[7]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[6]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[5]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[4]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[3]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[32] * B[2]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[33] * B[1]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[34] * B[0]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #136]\n\t"
        "#  A[0] * B[35]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[34]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[33]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[32]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[31]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[30]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[29]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[28]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[27]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[26]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[25]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[24]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[23]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[22]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[21]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[20]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[19]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[18]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[17]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[16]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[15]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[14]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[13]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[12]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[11]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[10]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[9]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[8]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[7]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[6]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[30] * B[5]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[31] * B[4]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[32] * B[3]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[33] * B[2]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[34] * B[1]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[35] * B[0]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #140]\n\t"
        "#  A[0] * B[36]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[35]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[34]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[33]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[32]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[31]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[30]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[29]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[28]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[27]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[26]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[25]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[24]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[23]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[22]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[21]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[20]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[19]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[18]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[17]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[16]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[15]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[14]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[13]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[12]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[11]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[10]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[9]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[8]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[7]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[6]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[31] * B[5]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[32] * B[4]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[33] * B[3]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[34] * B[2]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[35] * B[1]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[36] * B[0]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #144]\n\t"
        "#  A[0] * B[37]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[36]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[35]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[34]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[33]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[32]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[31]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[30]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[29]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[28]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[27]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[26]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[25]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[24]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[23]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[22]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[21]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[20]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[19]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[18]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[17]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[16]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[15]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[14]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[13]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[12]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[11]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[10]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[9]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[8]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[7]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[6]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[32] * B[5]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[33] * B[4]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[34] * B[3]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[35] * B[2]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[36] * B[1]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[37] * B[0]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #148]\n\t"
        "#  A[0] * B[38]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[37]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[36]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[35]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[34]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[33]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[32]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[31]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[30]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[29]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[28]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[27]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[26]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[25]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[24]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[23]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[22]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[21]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[20]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[19]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[18]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[17]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[16]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[15]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[14]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[13]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[12]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[11]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[10]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[9]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[30] * B[8]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[31] * B[7]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[32] * B[6]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[33] * B[5]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[34] * B[4]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[35] * B[3]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[36] * B[2]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[37] * B[1]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[38] * B[0]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #152]\n\t"
        "#  A[0] * B[39]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[38]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[37]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[36]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[35]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[34]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[33]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[32]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[31]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[30]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[29]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[28]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[27]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[26]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[25]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[24]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[23]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[22]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[21]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[20]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[19]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[18]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[17]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[16]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[15]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[14]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[13]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[12]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[11]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[10]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[9]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[31] * B[8]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[32] * B[7]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[33] * B[6]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[34] * B[5]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[35] * B[4]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[36] * B[3]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[37] * B[2]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[38] * B[1]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[39] * B[0]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #156]\n\t"
        "#  A[0] * B[40]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[39]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[38]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[37]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[36]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[35]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[34]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[33]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[32]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[31]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[30]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[29]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[28]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[27]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[26]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[25]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[24]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[23]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[22]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[21]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[20]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[19]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[18]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[17]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[16]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[15]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[14]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[13]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[12]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[11]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[10]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[9]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[32] * B[8]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[33] * B[7]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[34] * B[6]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[35] * B[5]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[36] * B[4]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[37] * B[3]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[38] * B[2]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[39] * B[1]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[40] * B[0]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #160]\n\t"
        "#  A[0] * B[41]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[40]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[39]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[38]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[37]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[36]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[35]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[34]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[33]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[32]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[31]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[30]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[29]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[28]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[27]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[26]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[25]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[24]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[23]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[22]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[21]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[20]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[19]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[18]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[17]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[16]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[15]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[14]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[13]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[12]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[30] * B[11]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[31] * B[10]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[32] * B[9]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[33] * B[8]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[34] * B[7]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[35] * B[6]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[36] * B[5]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[37] * B[4]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[38] * B[3]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[39] * B[2]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[40] * B[1]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[41] * B[0]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #164]\n\t"
        "#  A[0] * B[42]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[41]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[40]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[39]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[38]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[37]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[36]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[35]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[34]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[33]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[32]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[31]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[30]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[29]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[28]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[27]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[26]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[25]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[24]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[23]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[22]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[21]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[20]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[19]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[18]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[17]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[16]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[15]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[14]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[13]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[12]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[31] * B[11]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[32] * B[10]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[33] * B[9]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[34] * B[8]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[35] * B[7]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[36] * B[6]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[37] * B[5]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[38] * B[4]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[39] * B[3]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[40] * B[2]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[41] * B[1]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[42] * B[0]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #168]\n\t"
        "#  A[0] * B[43]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[42]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[41]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[40]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[39]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[38]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[37]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[36]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[35]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[34]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[33]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[32]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[31]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[30]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[29]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[28]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[27]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[26]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[25]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[24]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[23]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[22]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[21]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[20]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[19]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[18]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[17]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[16]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[15]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[14]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[13]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[12]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[32] * B[11]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[33] * B[10]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[34] * B[9]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[35] * B[8]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[36] * B[7]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[37] * B[6]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[38] * B[5]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[39] * B[4]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[40] * B[3]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[41] * B[2]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[42] * B[1]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[43] * B[0]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #172]\n\t"
        "#  A[0] * B[44]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[43]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[42]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[41]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[40]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[39]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[38]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[37]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[36]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[35]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[34]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[33]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[32]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[31]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[30]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[29]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[28]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[27]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[26]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[25]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[24]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[23]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[22]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[21]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[20]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[19]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[18]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[17]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[16]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[15]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[30] * B[14]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[31] * B[13]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[32] * B[12]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[33] * B[11]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[34] * B[10]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[35] * B[9]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[36] * B[8]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[37] * B[7]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[38] * B[6]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[39] * B[5]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[40] * B[4]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[41] * B[3]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[42] * B[2]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[43] * B[1]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[44] * B[0]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #176]\n\t"
        "#  A[0] * B[45]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[44]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[43]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[42]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[41]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[40]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[39]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[38]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[37]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[36]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[35]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[34]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[33]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[32]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[31]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[30]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[29]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[28]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[27]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[26]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[25]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[24]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[23]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[22]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[21]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[20]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[19]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[18]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[17]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[16]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[15]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[31] * B[14]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[32] * B[13]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[33] * B[12]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[34] * B[11]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[35] * B[10]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[36] * B[9]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[37] * B[8]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[38] * B[7]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[39] * B[6]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[40] * B[5]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[41] * B[4]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[42] * B[3]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[43] * B[2]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[44] * B[1]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[45] * B[0]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #180]\n\t"
        "#  A[0] * B[46]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[45]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[44]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[43]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[42]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[41]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[40]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[39]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[38]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[37]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[36]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[35]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[34]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[33]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[32]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[31]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[30]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[29]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[28]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[27]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[26]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[25]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[24]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[23]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[22]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[21]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[20]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[19]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[18]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[17]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[16]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[15]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[32] * B[14]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[33] * B[13]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[34] * B[12]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[35] * B[11]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[36] * B[10]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[37] * B[9]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[38] * B[8]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[39] * B[7]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[40] * B[6]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[41] * B[5]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[42] * B[4]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[43] * B[3]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[44] * B[2]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[45] * B[1]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[46] * B[0]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #184]\n\t"
        "#  A[0] * B[47]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[46]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[45]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[44]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[43]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[42]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[41]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[40]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[39]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[38]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[37]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[36]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[35]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[34]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[33]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[32]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[31]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[30]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[29]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[28]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[27]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[26]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[25]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[24]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[23]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[22]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[21]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[20]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[19]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[18]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[30] * B[17]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[31] * B[16]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[32] * B[15]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[33] * B[14]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[34] * B[13]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[35] * B[12]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[36] * B[11]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[37] * B[10]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[38] * B[9]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[39] * B[8]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[40] * B[7]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[41] * B[6]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[42] * B[5]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[43] * B[4]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[44] * B[3]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[45] * B[2]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[46] * B[1]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[47] * B[0]\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #188]\n\t"
        "#  A[0] * B[48]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #192]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[47]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[46]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[45]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[44]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[43]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[42]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[41]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[40]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[39]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[38]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[37]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[36]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[35]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[34]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[33]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[32]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[31]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[30]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[29]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[28]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[27]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[26]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[25]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[24]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[23]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[22]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[21]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[20]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[19]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[18]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[31] * B[17]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[32] * B[16]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[33] * B[15]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[34] * B[14]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[35] * B[13]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[36] * B[12]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[37] * B[11]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[38] * B[10]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[39] * B[9]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[40] * B[8]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[41] * B[7]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[42] * B[6]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[43] * B[5]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[44] * B[4]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[45] * B[3]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[46] * B[2]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[47] * B[1]\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[48] * B[0]\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #192]\n\t"
        "#  A[0] * B[49]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[48]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #192]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[47]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[46]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[45]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[44]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[43]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[42]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[41]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[40]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[39]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[38]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[37]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[36]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[35]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[34]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[33]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[32]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[31]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[30]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[29]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[28]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[27]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[26]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[25]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[24]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[23]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[22]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[21]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[20]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[19]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[18]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[32] * B[17]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[33] * B[16]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[34] * B[15]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[35] * B[14]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[36] * B[13]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[37] * B[12]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[38] * B[11]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[39] * B[10]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[40] * B[9]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[41] * B[8]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[42] * B[7]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[43] * B[6]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[44] * B[5]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[45] * B[4]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[46] * B[3]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[47] * B[2]\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[48] * B[1]\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[49] * B[0]\n\t"
        "ldr	r8, [%[a], #196]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #196]\n\t"
        "#  A[0] * B[50]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #200]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[49]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[48]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #192]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[47]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[46]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[45]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[44]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[43]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[42]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[41]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[40]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[39]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[38]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[37]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[36]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[35]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[34]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[33]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[32]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[31]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[30]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[29]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[28]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[27]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[26]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[25]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[24]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[23]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[22]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[21]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[30] * B[20]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[31] * B[19]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[32] * B[18]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[33] * B[17]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[34] * B[16]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[35] * B[15]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[36] * B[14]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[37] * B[13]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[38] * B[12]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[39] * B[11]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[40] * B[10]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[41] * B[9]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[42] * B[8]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[43] * B[7]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[44] * B[6]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[45] * B[5]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[46] * B[4]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[47] * B[3]\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[48] * B[2]\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[49] * B[1]\n\t"
        "ldr	r8, [%[a], #196]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[50] * B[0]\n\t"
        "ldr	r8, [%[a], #200]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #200]\n\t"
        "#  A[0] * B[51]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #204]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[50]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #200]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[49]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[48]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #192]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[47]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[46]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[45]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[44]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[43]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[42]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[41]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[40]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[39]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[38]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[37]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[36]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[35]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[34]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[33]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[32]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[31]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[30]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[29]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[28]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[27]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[26]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[25]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[24]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[23]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[22]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[21]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[31] * B[20]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[32] * B[19]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[33] * B[18]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[34] * B[17]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[35] * B[16]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[36] * B[15]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[37] * B[14]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[38] * B[13]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[39] * B[12]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[40] * B[11]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[41] * B[10]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[42] * B[9]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[43] * B[8]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[44] * B[7]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[45] * B[6]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[46] * B[5]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[47] * B[4]\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[48] * B[3]\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[49] * B[2]\n\t"
        "ldr	r8, [%[a], #196]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[50] * B[1]\n\t"
        "ldr	r8, [%[a], #200]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[51] * B[0]\n\t"
        "ldr	r8, [%[a], #204]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #204]\n\t"
        "#  A[0] * B[52]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #208]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[51]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #204]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[50]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #200]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[49]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[48]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #192]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[47]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[46]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[45]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[44]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[43]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[42]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[41]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[40]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[39]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[38]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[37]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[36]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[35]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[34]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[33]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[32]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[31]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[30]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[29]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[28]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[27]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[26]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[25]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[24]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[23]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[22]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[21]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[32] * B[20]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[33] * B[19]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[34] * B[18]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[35] * B[17]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[36] * B[16]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[37] * B[15]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[38] * B[14]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[39] * B[13]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[40] * B[12]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[41] * B[11]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[42] * B[10]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[43] * B[9]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[44] * B[8]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[45] * B[7]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[46] * B[6]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[47] * B[5]\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[48] * B[4]\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[49] * B[3]\n\t"
        "ldr	r8, [%[a], #196]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[50] * B[2]\n\t"
        "ldr	r8, [%[a], #200]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[51] * B[1]\n\t"
        "ldr	r8, [%[a], #204]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[52] * B[0]\n\t"
        "ldr	r8, [%[a], #208]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "str	r4, [sp, #208]\n\t"
        "#  A[0] * B[53]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #212]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r10, r10\n\t"
        "#  A[1] * B[52]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #208]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[2] * B[51]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #204]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[3] * B[50]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #200]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[4] * B[49]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[5] * B[48]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #192]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[6] * B[47]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[7] * B[46]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[8] * B[45]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[9] * B[44]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[10] * B[43]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[11] * B[42]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[12] * B[41]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[13] * B[40]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[14] * B[39]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[15] * B[38]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[16] * B[37]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[17] * B[36]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[18] * B[35]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[19] * B[34]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[20] * B[33]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[21] * B[32]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[22] * B[31]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[23] * B[30]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[24] * B[29]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[25] * B[28]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[26] * B[27]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[27] * B[26]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[28] * B[25]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[29] * B[24]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[30] * B[23]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[31] * B[22]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[32] * B[21]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[33] * B[20]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[34] * B[19]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[35] * B[18]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[36] * B[17]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[37] * B[16]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[38] * B[15]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[39] * B[14]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[40] * B[13]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[41] * B[12]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[42] * B[11]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[43] * B[10]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[44] * B[9]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[45] * B[8]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[46] * B[7]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[47] * B[6]\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[48] * B[5]\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[49] * B[4]\n\t"
        "ldr	r8, [%[a], #196]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[50] * B[3]\n\t"
        "ldr	r8, [%[a], #200]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[51] * B[2]\n\t"
        "ldr	r8, [%[a], #204]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[52] * B[1]\n\t"
        "ldr	r8, [%[a], #208]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "#  A[53] * B[0]\n\t"
        "ldr	r8, [%[a], #212]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r5, r5, r6\n\t"
        "adcs	r3, r3, r7\n\t"
        "adc	r4, r4, r10\n\t"
        "str	r5, [sp, #212]\n\t"
        "#  A[0] * B[54]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #216]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r10, r10\n\t"
        "#  A[1] * B[53]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #212]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[2] * B[52]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #208]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[3] * B[51]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #204]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[4] * B[50]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #200]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[5] * B[49]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[6] * B[48]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #192]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[7] * B[47]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[8] * B[46]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[9] * B[45]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[10] * B[44]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[11] * B[43]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[12] * B[42]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[13] * B[41]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[14] * B[40]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[15] * B[39]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[16] * B[38]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[17] * B[37]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[18] * B[36]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[19] * B[35]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[20] * B[34]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[21] * B[33]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[22] * B[32]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[23] * B[31]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[24] * B[30]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[25] * B[29]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[26] * B[28]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[27] * B[27]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[28] * B[26]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[29] * B[25]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[30] * B[24]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[31] * B[23]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[32] * B[22]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[33] * B[21]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[34] * B[20]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[35] * B[19]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[36] * B[18]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #72]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[37] * B[17]\n\t"
        "ldr	r8, [%[a], #148]\n\t"
        "ldr	r9, [%[b], #68]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[38] * B[16]\n\t"
        "ldr	r8, [%[a], #152]\n\t"
        "ldr	r9, [%[b], #64]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[39] * B[15]\n\t"
        "ldr	r8, [%[a], #156]\n\t"
        "ldr	r9, [%[b], #60]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[40] * B[14]\n\t"
        "ldr	r8, [%[a], #160]\n\t"
        "ldr	r9, [%[b], #56]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[41] * B[13]\n\t"
        "ldr	r8, [%[a], #164]\n\t"
        "ldr	r9, [%[b], #52]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[42] * B[12]\n\t"
        "ldr	r8, [%[a], #168]\n\t"
        "ldr	r9, [%[b], #48]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[43] * B[11]\n\t"
        "ldr	r8, [%[a], #172]\n\t"
        "ldr	r9, [%[b], #44]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[44] * B[10]\n\t"
        "ldr	r8, [%[a], #176]\n\t"
        "ldr	r9, [%[b], #40]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[45] * B[9]\n\t"
        "ldr	r8, [%[a], #180]\n\t"
        "ldr	r9, [%[b], #36]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[46] * B[8]\n\t"
        "ldr	r8, [%[a], #184]\n\t"
        "ldr	r9, [%[b], #32]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[47] * B[7]\n\t"
        "ldr	r8, [%[a], #188]\n\t"
        "ldr	r9, [%[b], #28]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[48] * B[6]\n\t"
        "ldr	r8, [%[a], #192]\n\t"
        "ldr	r9, [%[b], #24]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[49] * B[5]\n\t"
        "ldr	r8, [%[a], #196]\n\t"
        "ldr	r9, [%[b], #20]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[50] * B[4]\n\t"
        "ldr	r8, [%[a], #200]\n\t"
        "ldr	r9, [%[b], #16]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[51] * B[3]\n\t"
        "ldr	r8, [%[a], #204]\n\t"
        "ldr	r9, [%[b], #12]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[52] * B[2]\n\t"
        "ldr	r8, [%[a], #208]\n\t"
        "ldr	r9, [%[b], #8]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[53] * B[1]\n\t"
        "ldr	r8, [%[a], #212]\n\t"
        "ldr	r9, [%[b], #4]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "#  A[54] * B[0]\n\t"
        "ldr	r8, [%[a], #216]\n\t"
        "ldr	r9, [%[b], #0]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r3, r3, r6\n\t"
        "adcs	r4, r4, r7\n\t"
        "adc	r5, r5, r10\n\t"
        "str	r3, [sp, #216]\n\t"
        "#  A[0] * B[55]\n\t"
        "ldr	r8, [%[a], #0]\n\t"
        "ldr	r9, [%[b], #220]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r10, r10\n\t"
        "#  A[1] * B[54]\n\t"
        "ldr	r8, [%[a], #4]\n\t"
        "ldr	r9, [%[b], #216]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[2] * B[53]\n\t"
        "ldr	r8, [%[a], #8]\n\t"
        "ldr	r9, [%[b], #212]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[3] * B[52]\n\t"
        "ldr	r8, [%[a], #12]\n\t"
        "ldr	r9, [%[b], #208]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[4] * B[51]\n\t"
        "ldr	r8, [%[a], #16]\n\t"
        "ldr	r9, [%[b], #204]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[5] * B[50]\n\t"
        "ldr	r8, [%[a], #20]\n\t"
        "ldr	r9, [%[b], #200]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[6] * B[49]\n\t"
        "ldr	r8, [%[a], #24]\n\t"
        "ldr	r9, [%[b], #196]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[7] * B[48]\n\t"
        "ldr	r8, [%[a], #28]\n\t"
        "ldr	r9, [%[b], #192]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[8] * B[47]\n\t"
        "ldr	r8, [%[a], #32]\n\t"
        "ldr	r9, [%[b], #188]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[9] * B[46]\n\t"
        "ldr	r8, [%[a], #36]\n\t"
        "ldr	r9, [%[b], #184]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[10] * B[45]\n\t"
        "ldr	r8, [%[a], #40]\n\t"
        "ldr	r9, [%[b], #180]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[11] * B[44]\n\t"
        "ldr	r8, [%[a], #44]\n\t"
        "ldr	r9, [%[b], #176]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[12] * B[43]\n\t"
        "ldr	r8, [%[a], #48]\n\t"
        "ldr	r9, [%[b], #172]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[13] * B[42]\n\t"
        "ldr	r8, [%[a], #52]\n\t"
        "ldr	r9, [%[b], #168]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[14] * B[41]\n\t"
        "ldr	r8, [%[a], #56]\n\t"
        "ldr	r9, [%[b], #164]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[15] * B[40]\n\t"
        "ldr	r8, [%[a], #60]\n\t"
        "ldr	r9, [%[b], #160]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[16] * B[39]\n\t"
        "ldr	r8, [%[a], #64]\n\t"
        "ldr	r9, [%[b], #156]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[17] * B[38]\n\t"
        "ldr	r8, [%[a], #68]\n\t"
        "ldr	r9, [%[b], #152]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[18] * B[37]\n\t"
        "ldr	r8, [%[a], #72]\n\t"
        "ldr	r9, [%[b], #148]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[19] * B[36]\n\t"
        "ldr	r8, [%[a], #76]\n\t"
        "ldr	r9, [%[b], #144]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[20] * B[35]\n\t"
        "ldr	r8, [%[a], #80]\n\t"
        "ldr	r9, [%[b], #140]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[21] * B[34]\n\t"
        "ldr	r8, [%[a], #84]\n\t"
        "ldr	r9, [%[b], #136]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[22] * B[33]\n\t"
        "ldr	r8, [%[a], #88]\n\t"
        "ldr	r9, [%[b], #132]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[23] * B[32]\n\t"
        "ldr	r8, [%[a], #92]\n\t"
        "ldr	r9, [%[b], #128]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[24] * B[31]\n\t"
        "ldr	r8, [%[a], #96]\n\t"
        "ldr	r9, [%[b], #124]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[25] * B[30]\n\t"
        "ldr	r8, [%[a], #100]\n\t"
        "ldr	r9, [%[b], #120]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[26] * B[29]\n\t"
        "ldr	r8, [%[a], #104]\n\t"
        "ldr	r9, [%[b], #116]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[27] * B[28]\n\t"
        "ldr	r8, [%[a], #108]\n\t"
        "ldr	r9, [%[b], #112]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[28] * B[27]\n\t"
        "ldr	r8, [%[a], #112]\n\t"
        "ldr	r9, [%[b], #108]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[29] * B[26]\n\t"
        "ldr	r8, [%[a], #116]\n\t"
        "ldr	r9, [%[b], #104]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[30] * B[25]\n\t"
        "ldr	r8, [%[a], #120]\n\t"
        "ldr	r9, [%[b], #100]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[31] * B[24]\n\t"
        "ldr	r8, [%[a], #124]\n\t"
        "ldr	r9, [%[b], #96]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[32] * B[23]\n\t"
        "ldr	r8, [%[a], #128]\n\t"
        "ldr	r9, [%[b], #92]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[33] * B[22]\n\t"
        "ldr	r8, [%[a], #132]\n\t"
        "ldr	r9, [%[b], #88]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[34] * B[21]\n\t"
        "ldr	r8, [%[a], #136]\n\t"
        "ldr	r9, [%[b], #84]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[35] * B[20]\n\t"
        "ldr	r8, [%[a], #140]\n\t"
        "ldr	r9, [%[b], #80]\n\t"
        "umull	r6, r7, r8, r9\n\t"
        "adds	r4, r4, r6\n\t"
        "adcs	r5, r5, r7\n\t"
        "adc	r3, r3, r10\n\t"
        "#  A[36] * B[19]\n\t"
        "ldr	r8, [%[a], #144]\n\t"
        "ldr	r9, [%[b], #76]\n\t"
        "umull	r6, r7, r8, r9\n\t"
    