From f313b919aec45b9558470c5e5b66964133b9edc0 Mon Sep 17 00:00:00 2001
From: Norman Ashley <nashley@cisco.com>
Date: Thu, 13 Jul 2023 14:26:03 -0400
Subject: [PATCH] Na lms (#1486)

* Add base LMS library

* ignore use of free() by adding // IGNORE free-check

* ignore use of free() by adding // IGNORE free-check
---
 src/sig_stfl/lms/external/common_defs.h       | 178 ++++
 src/sig_stfl/lms/external/config.h            |  36 +
 src/sig_stfl/lms/external/endian.c            |  23 +
 src/sig_stfl/lms/external/endian.h            |   9 +
 src/sig_stfl/lms/external/hash.c              | 119 +++
 src/sig_stfl/lms/external/hash.h              |  57 ++
 src/sig_stfl/lms/external/hss.c               | 169 ++++
 src/sig_stfl/lms/external/hss.h               | 417 ++++++++
 src/sig_stfl/lms/external/hss_alloc.c         | 555 +++++++++++
 src/sig_stfl/lms/external/hss_aux.c           | 355 +++++++
 src/sig_stfl/lms/external/hss_aux.h           |  59 ++
 src/sig_stfl/lms/external/hss_common.c        |  48 +
 src/sig_stfl/lms/external/hss_common.h        |  22 +
 src/sig_stfl/lms/external/hss_compute.c       | 174 ++++
 src/sig_stfl/lms/external/hss_derive.c        | 325 ++++++
 src/sig_stfl/lms/external/hss_derive.h        |  74 ++
 src/sig_stfl/lms/external/hss_generate.c      | 932 ++++++++++++++++++
 src/sig_stfl/lms/external/hss_internal.h      | 243 +++++
 src/sig_stfl/lms/external/hss_keygen.c        | 368 +++++++
 src/sig_stfl/lms/external/hss_param.c         | 153 +++
 src/sig_stfl/lms/external/hss_reserve.c       | 194 ++++
 src/sig_stfl/lms/external/hss_reserve.h       |  21 +
 src/sig_stfl/lms/external/hss_sign.c          | 736 ++++++++++++++
 src/sig_stfl/lms/external/hss_sign_inc.c      | 218 ++++
 src/sig_stfl/lms/external/hss_sign_inc.h      |  81 ++
 src/sig_stfl/lms/external/hss_thread.h        | 135 +++
 .../lms/external/hss_thread_pthread.c         | 298 ++++++
 src/sig_stfl/lms/external/hss_thread_single.c |  63 ++
 src/sig_stfl/lms/external/hss_verify.c        | 196 ++++
 src/sig_stfl/lms/external/hss_verify.h        |  23 +
 src/sig_stfl/lms/external/hss_verify_inc.c    | 203 ++++
 src/sig_stfl/lms/external/hss_verify_inc.h    |  82 ++
 src/sig_stfl/lms/external/hss_zeroize.c       |  49 +
 src/sig_stfl/lms/external/hss_zeroize.h       |  10 +
 src/sig_stfl/lms/external/lm_common.c         |  79 ++
 src/sig_stfl/lms/external/lm_common.h         |  20 +
 src/sig_stfl/lms/external/lm_ots.h            |  64 ++
 src/sig_stfl/lms/external/lm_ots_common.c     |  99 ++
 src/sig_stfl/lms/external/lm_ots_common.h     |  16 +
 src/sig_stfl/lms/external/lm_ots_sign.c       | 168 ++++
 src/sig_stfl/lms/external/lm_ots_verify.c     | 122 +++
 src/sig_stfl/lms/external/lm_ots_verify.h     |  23 +
 src/sig_stfl/lms/external/lm_verify.c         | 107 ++
 src/sig_stfl/lms/external/lm_verify.h         |  12 +
 src/sig_stfl/lms/external/sha256.c            | 183 ++++
 src/sig_stfl/lms/external/sha256.h            |  43 +
 46 files changed, 7561 insertions(+)
 create mode 100644 src/sig_stfl/lms/external/common_defs.h
 create mode 100644 src/sig_stfl/lms/external/config.h
 create mode 100644 src/sig_stfl/lms/external/endian.c
 create mode 100644 src/sig_stfl/lms/external/endian.h
 create mode 100644 src/sig_stfl/lms/external/hash.c
 create mode 100644 src/sig_stfl/lms/external/hash.h
 create mode 100644 src/sig_stfl/lms/external/hss.c
 create mode 100644 src/sig_stfl/lms/external/hss.h
 create mode 100644 src/sig_stfl/lms/external/hss_alloc.c
 create mode 100644 src/sig_stfl/lms/external/hss_aux.c
 create mode 100644 src/sig_stfl/lms/external/hss_aux.h
 create mode 100644 src/sig_stfl/lms/external/hss_common.c
 create mode 100644 src/sig_stfl/lms/external/hss_common.h
 create mode 100644 src/sig_stfl/lms/external/hss_compute.c
 create mode 100644 src/sig_stfl/lms/external/hss_derive.c
 create mode 100644 src/sig_stfl/lms/external/hss_derive.h
 create mode 100644 src/sig_stfl/lms/external/hss_generate.c
 create mode 100644 src/sig_stfl/lms/external/hss_internal.h
 create mode 100644 src/sig_stfl/lms/external/hss_keygen.c
 create mode 100644 src/sig_stfl/lms/external/hss_param.c
 create mode 100644 src/sig_stfl/lms/external/hss_reserve.c
 create mode 100644 src/sig_stfl/lms/external/hss_reserve.h
 create mode 100644 src/sig_stfl/lms/external/hss_sign.c
 create mode 100644 src/sig_stfl/lms/external/hss_sign_inc.c
 create mode 100644 src/sig_stfl/lms/external/hss_sign_inc.h
 create mode 100644 src/sig_stfl/lms/external/hss_thread.h
 create mode 100644 src/sig_stfl/lms/external/hss_thread_pthread.c
 create mode 100644 src/sig_stfl/lms/external/hss_thread_single.c
 create mode 100644 src/sig_stfl/lms/external/hss_verify.c
 create mode 100644 src/sig_stfl/lms/external/hss_verify.h
 create mode 100644 src/sig_stfl/lms/external/hss_verify_inc.c
 create mode 100644 src/sig_stfl/lms/external/hss_verify_inc.h
 create mode 100644 src/sig_stfl/lms/external/hss_zeroize.c
 create mode 100644 src/sig_stfl/lms/external/hss_zeroize.h
 create mode 100644 src/sig_stfl/lms/external/lm_common.c
 create mode 100644 src/sig_stfl/lms/external/lm_common.h
 create mode 100644 src/sig_stfl/lms/external/lm_ots.h
 create mode 100644 src/sig_stfl/lms/external/lm_ots_common.c
 create mode 100644 src/sig_stfl/lms/external/lm_ots_common.h
 create mode 100644 src/sig_stfl/lms/external/lm_ots_sign.c
 create mode 100644 src/sig_stfl/lms/external/lm_ots_verify.c
 create mode 100644 src/sig_stfl/lms/external/lm_ots_verify.h
 create mode 100644 src/sig_stfl/lms/external/lm_verify.c
 create mode 100644 src/sig_stfl/lms/external/lm_verify.h
 create mode 100644 src/sig_stfl/lms/external/sha256.c
 create mode 100644 src/sig_stfl/lms/external/sha256.h

diff --git a/src/sig_stfl/lms/external/common_defs.h b/src/sig_stfl/lms/external/common_defs.h
new file mode 100644
index 0000000000..83739949ee
--- /dev/null
+++ b/src/sig_stfl/lms/external/common_defs.h
@@ -0,0 +1,178 @@
+#if !defined( COMMON_DEFS_H_ )
+#define COMMON_DEFS_H_
+
+/*
+ * These are defintions for the LMS implementation that are common throughout
+ * the system (and so are collected in one place)
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#define MAX_HASH   32 /* Length of the largest hash we support */
+
+/* The I (Merkle tree identifier) value is 16 bytes long */
+#define I_LEN               16
+
+/* The maximum height of a Merkle tree */
+#define MAX_MERKLE_HEIGHT       25
+
+/* The mininum height of a Merkle tree.  Some of our update logic assumes */
+/* this isn't too small */
+#define MIN_MERKLE_HEIGHT        5
+
+/* The minimum/maximum number of levels of Merkle trees within an HSS trees */
+#define MIN_HSS_LEVELS 1    /* Minumum levels we allow */
+#define MAX_HSS_LEVELS 8    /* Maximum levels we allow */
+
+/* This is the length of our internal seed values */
+#define SEED_LEN 32         /* Enough to make Grover's infeasible */
+
+/* Here are some internal types used within the code.  They are listed more */
+/* for documentation ("this is what this variable is expected to be") rather */
+/* than to let the compiler do any sort of type checking */
+
+    /* This is an index into a Merkle tree */
+    /* Used for both the leaf index (0..N-1) and the node number (1..2*N-1), */
+    /* where N is the size 2**h of the tre */
+#if MAX_MERKLE_HEIGHT > 31
+    /* We need to express more than 32 bits in this type */
+typedef uint_fast64_t merkle_index_t;
+#error We need to extend the id we place within a hash to more than 4 bytes
+#else
+typedef uint_fast32_t merkle_index_t;
+#endif
+
+    /* This is the name of a parameter set */
+    /* Used for both an OTS parameter set or an LM parameter set */
+    /* Both are 32 bits */
+typedef uint_fast32_t param_set_t;
+
+    /* This is a sequence number over an HSS tree */
+    /* This means we can never generate more than 2**64 signatures from a */
+    /* private key (even if the parameter set would, in theory, allow us */
+    /* to do more) */
+typedef uint_fast64_t sequence_t;
+
+/* Defined LM parameter sets */
+#define LMS_SHA256_N32_H5  0x00000005
+#define LMS_SHA256_N32_H10 0x00000006
+#define LMS_SHA256_N32_H15 0x00000007
+#define LMS_SHA256_N32_H20 0x00000008
+#define LMS_SHA256_N32_H25 0x00000009
+
+/* LM-OTS registry */
+#define LMOTS_SHA256_N32_W1 0x00000001
+#define LMOTS_SHA256_N32_W2 0x00000002
+#define LMOTS_SHA256_N32_W4 0x00000003
+#define LMOTS_SHA256_N32_W8 0x00000004
+
+/*
+ * Internal formats of various hashes
+ *
+ * We do a number of different hashes as a part of this package; some
+ * specified by the draft, some specific to us.
+ * For each such hash, we list the values being hashed, and the offset
+ * from the start where they go.  We treat them as indicies into unsigned char
+ * arrays, and not structs, to avoid any potential padding issues with structs
+ *
+ * For a hash of type XXXX, XXXX_Z is the offset where component Z goes,
+ * XXXX_LEN(hash_len) is the length being hashed (assuming that hash length),
+ * XXXX_MAXLEN is the maximum length it can be (for allocation), and D_XXXX
+ * is the hash distinguisher (the value that makes it different from any other
+ * hash)
+ */
+
+/* The initial message hashing */
+#define MESG_I    0
+#define MESG_Q   16
+#define MESG_D   20   /* The fixed D_MESG value */
+#define MESG_C   22
+#define MESG_PREFIX_LEN(n) (MESG_C + (n))  /* Length not counting the actual */
+                                         /* message being signed */
+#define MESG_PREFIX_MAXLEN MESG_PREFIX_LEN(MAX_HASH)
+#define D_MESG 0x8181
+
+/* The Winternitz iteration hashes */
+#define ITER_I    0
+#define ITER_Q   16
+#define ITER_K   20   /* The RFC uses i here */
+#define ITER_J   22
+#define ITER_PREV 23  /* Hash from previous iteration; RFC uses tmp */
+#define ITER_LEN(hash_len) (ITER_PREV + (hash_len))
+#define ITER_MAX_LEN ITER_LEN(MAX_HASH)
+
+/* Hashing the OTS public key */
+#define PBLC_I    0
+#define PBLC_Q   16
+#define PBLC_D   20   /* The fixed D_PBLC value */
+#define PBLC_PREFIX_LEN 22   /* Not counting the OTS public keys */
+#define D_PBLC 0x8080
+
+/* Hashing Merkle tree leaf nodes */
+#define LEAF_I    0
+#define LEAF_R   16
+#define LEAF_D   20
+#define LEAF_PK  22
+#define LEAF_LEN(root_len) (LEAF_PK + (root_len))
+#define LEAF_MAX_LEN LEAF_LEN(MAX_HASH)
+#define D_LEAF 0x8282
+
+/* Hashing Merkle tree internal nodes */
+#define INTR_I    0
+#define INTR_R   16
+#define INTR_D   20
+#define INTR_PK  22
+#define INTR_LEN(root_len) (INTR_PK + 2 * (root_len))
+#define INTR_MAX_LEN INTR_LEN(MAX_HASH)
+#define D_INTR 0x8383
+
+/* The determanistic key generation */
+/* Also used to generate subkeys in the j-tree hierarchy */
+/* As we'll always do either one or the other, we can reuse the structure */
+/* for both purposes */
+#define PRG_I     0
+#define PRG_Q    16
+#define PRG_J    20
+#define PRG_FF   22   /* A fixed 0xff goes here */
+#define PRG_SEED 23
+#define PRG_LEN(seed_len) (23 + (seed_len))
+#define PRG_MAX_LEN PRG_LEN(MAX_HASH)
+
+/* The below are hash formats that the draft does not list, but we */
+/* implement ourselves (largely because we need to be determanistic */
+/* based on the seed) */
+
+/* Hash used to generate subkeys in the q tree hierarchy */
+#define QTREE_I    0
+#define QTREE_Q   16
+#define QTREE_D   20   /* D_QTREE goes here */
+#define QTREE_SEED 22
+#define QTREE_LEN (22 + 32)       /* We assume a fixed length seed */
+#define QTREE_MAX_LEN QTREE_LEN
+#define D_QTREE 0xffff
+
+/* Hash used to generate the master seed for the top level Merkle tree */
+#define TOPSEED_I    0  /* 16 0's here (we don't have an I value) */
+#define TOPSEED_Q   16  /* 0's here (as we don't have a Q value) */
+#define TOPSEED_D   20  /* D_TOPSEED */
+#define TOPSEED_WHICH 22 /* 0 -> Gen Master seed (used as seed for */
+                         /*      the next two) */
+                         /* 1 -> Create top level seed */
+                         /* 2 -> Create top level I */
+#define TOPSEED_SEED 23  /* 32 bytes long */
+#define TOPSEED_LEN (TOPSEED_SEED + 32)
+#define D_TOPSEED 0xfefe
+
+/* Hash used to generate the key used for the authenticating the aux values */
+#define DAUX_I    0   /* 16 0's here (no I value) */
+#define DAUX_Q   16   /* 4 more 0's here (no Q value) */
+#define DAUX_D   20   /* D_AUX_SEED_DERIVE */
+#define DAUX_PREFIX_LEN 22  /* Not counting the seed value */
+#define D_DAUX 0xfdfd
+
+/* Macro to set the D_XXXX value to the XXXX_D offset */
+#define SET_D(p, value) (void)(((p)[0] = (value) >> 8),   \
+                               ((p)[1] = (value) & 0xff))
+
+#endif /* COMMON_DEFS_H_ */
diff --git a/src/sig_stfl/lms/external/config.h b/src/sig_stfl/lms/external/config.h
new file mode 100644
index 0000000000..e23d19fa9a
--- /dev/null
+++ b/src/sig_stfl/lms/external/config.h
@@ -0,0 +1,36 @@
+#if !defined( CONFIG_H_ )
+#define CONFIG_H_
+
+#define LMS_UNUSED(x) (void)(x)
+
+/*
+ * This file has #define's that specify how this package operates, and
+ * are designed to be tweaked by the user.
+ *
+ * These can be adjusted to be appropriate for what the application and
+ * the operating environment needs
+ */
+
+/*
+ * This modifies which seed generation logic we use
+ * Note that changing these parameters will change the mapping
+ * between private keys.
+ *
+ * 0 -> We generate seeds using the process defined in Appendix A of the draft
+ *      This is slightly faster
+ * 1 -> We use a side channel resistant process, never using any single secret
+ *      seed in more than a defined number of distinct hashes
+ * 2 -> We generate seeds and secrets in a way which is compatible with ACVP
+ */
+#define SECRET_METHOD 2
+
+/*
+ * If we're using the side channel resistant method, this defines the max
+ * number of times we'll use a single secret.  Note that this is the log2
+ * of the max number of times, and so 3 means 'no more than 8 times'
+ * Reducing SECRET_MAX is a bit more costly; however I don't know that if
+ * it is significant
+ */
+#define SECRET_MAX 4  /* Never use a seed more than 16 times */
+
+#endif /* CONFIG_H_ */
diff --git a/src/sig_stfl/lms/external/endian.c b/src/sig_stfl/lms/external/endian.c
new file mode 100644
index 0000000000..709dc7bf98
--- /dev/null
+++ b/src/sig_stfl/lms/external/endian.c
@@ -0,0 +1,23 @@
+#include "endian.h"
+
+void put_bigendian( void *target, unsigned long long value, size_t bytes ) {
+    unsigned char *b = target;
+    int i;
+
+    for (i = bytes-1; i >= 0; i--) {
+        b[i] = value & 0xff;
+        value >>= 8;
+    }
+}
+
+unsigned long long get_bigendian( const void *target, size_t bytes ) {
+    const unsigned char *b = target;
+    unsigned long long result = 0;
+    size_t i;
+
+    for (i=0; i<bytes; i++) {
+        result = 256 * result + (b[i] & 0xff);
+    }
+
+    return result;
+}
diff --git a/src/sig_stfl/lms/external/endian.h b/src/sig_stfl/lms/external/endian.h
new file mode 100644
index 0000000000..9f8099d808
--- /dev/null
+++ b/src/sig_stfl/lms/external/endian.h
@@ -0,0 +1,9 @@
+#if !defined( ENDIAN_H_ )
+#define ENDIAN_H_
+
+#include <stddef.h>
+
+void put_bigendian( void *target, unsigned long long value, size_t bytes );
+unsigned long long get_bigendian( const void *target, size_t bytes );
+
+#endif /* ENDIAN_H_ */
diff --git a/src/sig_stfl/lms/external/hash.c b/src/sig_stfl/lms/external/hash.c
new file mode 100644
index 0000000000..dffcdaf6a6
--- /dev/null
+++ b/src/sig_stfl/lms/external/hash.c
@@ -0,0 +1,119 @@
+#include <string.h>
+#include "hash.h"
+#include "sha256.h"
+#include "hss_zeroize.h"
+
+#define ALLOW_VERBOSE 0  /* 1 -> we allow the dumping of intermediate */
+                         /*      states.  Useful for debugging; horrid */
+                         /*      for security */
+
+/*
+ * This is the file that implements the hashing APIs we use internally.
+ * At the present, our parameter sets support only one hash function
+ * (SHA-256, using full 256 bit output), however, that is likely to change
+ * in the future
+ */
+
+#if ALLOW_VERBOSE
+#include <stdio.h>
+#include <stdbool.h>
+/*
+ * Debugging flag; if this is set, we chat about what we're hashing, and what
+ * the result is it's useful when debugging; however we probably don't want to
+ * do this if we're multithreaded...
+ */
+bool hss_verbose = false;
+#endif
+
+/*
+ * This will hash the message, given the hash type. It assumes that the result
+ * buffer is large enough for the hash
+ */
+void hss_hash_ctx(void *result, int hash_type, union hash_context *ctx,
+          const void *message, size_t message_len) {
+#if ALLOW_VERBOSE
+    if (hss_verbose) {
+        int i; for (i=0; i< message_len; i++) printf( " %02x%s", ((unsigned char*)message)[i], (i%16 == 15) ? "\n" : "" );
+    }
+#endif
+
+    switch (hash_type) {
+    case HASH_SHA256: {
+        SHA256_Init(&ctx->sha256);
+        SHA256_Update(&ctx->sha256, message, message_len);
+        SHA256_Final(result, &ctx->sha256);
+#if ALLOW_VERBOSE
+        if (hss_verbose) {
+            printf( " ->" );
+            int i; for (i=0; i<32; i++) printf( " %02x", ((unsigned char *)result)[i] ); printf( "\n" );
+        }
+#endif
+        break;
+    }
+    }
+}
+
+void hss_hash(void *result, int hash_type,
+          const void *message, size_t message_len) {
+    union hash_context ctx;
+    hss_hash_ctx(result, hash_type, &ctx, message, message_len);
+    hss_zeroize(&ctx, sizeof ctx);
+}
+
+
+/*
+ * This provides an API to do incremental hashing.  We use it when hashing the
+ * message; since we don't know how long it could be, we don't want to
+ * allocate a buffer that's long enough for that, plus the decoration we add
+ */
+void hss_init_hash_context(int h, union hash_context *ctx) {
+    switch (h) {
+    case HASH_SHA256:
+        SHA256_Init( &ctx->sha256 );
+        break;
+    }
+}
+
+void hss_update_hash_context(int h, union hash_context *ctx,
+                         const void *msg, size_t len_msg) {
+#if ALLOW_VERBOSE
+    if (hss_verbose) {
+        int i; for (i=0; i<len_msg; i++) printf( " %02x", ((unsigned char*)msg)[i] );
+    }
+#endif
+    switch (h) {
+    case HASH_SHA256:
+        SHA256_Update(&ctx->sha256, msg, len_msg);
+        break;
+    }
+}
+
+void hss_finalize_hash_context(int h, union hash_context *ctx, void *buffer) {
+    switch (h) {
+    case HASH_SHA256:
+        SHA256_Final(buffer, &ctx->sha256);
+#if ALLOW_VERBOSE
+    if (hss_verbose) {
+        printf( " -->" );
+        int i; for (i=0; i<32; i++) printf( " %02x", ((unsigned char*)buffer)[i] );
+        printf( "\n" );
+    }
+#endif
+        break;
+    }
+}
+
+
+unsigned hss_hash_length(int hash_type) {
+    switch (hash_type) {
+    case HASH_SHA256: return 32;
+    }
+    return 0;
+}
+
+unsigned hss_hash_blocksize(int hash_type) {
+    switch (hash_type) {
+    case HASH_SHA256: return 64;
+    }
+    return 0;
+}
diff --git a/src/sig_stfl/lms/external/hash.h b/src/sig_stfl/lms/external/hash.h
new file mode 100644
index 0000000000..a61f9f5039
--- /dev/null
+++ b/src/sig_stfl/lms/external/hash.h
@@ -0,0 +1,57 @@
+#if !defined( HASH_H__ )
+#define HASH_H__
+#include "sha256.h"
+#include <stddef.h>
+#include <stdbool.h>
+
+/*
+ * This defines the hash interface used within HSS.
+ * All globals are prefixed with hss_ to avoid name conflicts
+ * Gee, C++ namespaces would be nice...
+ */
+
+/*
+ * Hash types
+ */
+enum {
+    HASH_SHA256 = 1,    /* SHA256 */
+};
+
+union hash_context {
+    SHA256_CTX sha256;
+    /* Any other hash contexts would go here */
+};
+
+/* Hash the message */
+void hss_hash(void *result, int hash_type,
+          const void *message, size_t message_len);
+
+/* Does the same, but with the passed hash context (which isn't zeroized) */
+/* This is here to save time; let the caller use the same ctx for multiple */
+/* hashes, and then finally zeroize it if necessary */
+void hss_hash_ctx(void *result, int hash_type, union hash_context *ctx,
+          const void *message, size_t message_len);
+
+/*
+ * This is a debugging flag; turning this on will cause the system to dump
+ * the inputs and the outputs of all hash functions.  It only works if
+ * debugging is allowed in hash.c (it's off by default), and it is *real*
+ * chatty; however sometimes you really need it for debugging
+ */
+extern bool hss_verbose;
+
+/*
+ * This constant has migrated to common_defs.h
+ */
+/* #define MAX_HASH   32 */  /* Length of the largest hash we support */
+
+unsigned hss_hash_length(int hash_type);
+unsigned hss_hash_blocksize(int hash_type);
+
+void hss_init_hash_context( int h, union hash_context *ctx );
+void hss_update_hash_context( int h, union hash_context *ctx,
+                          const void *msg, size_t len_msg );
+void hss_finalize_hash_context( int h, union hash_context *ctx,
+                          void *buffer);
+
+#endif /* HASH_H__  */
diff --git a/src/sig_stfl/lms/external/hss.c b/src/sig_stfl/lms/external/hss.c
new file mode 100644
index 0000000000..c38455daed
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss.c
@@ -0,0 +1,169 @@
+/*
+ * This is an implementation of the HSS signature scheme from LMS
+ * This is designed to be full-featured
+ *
+ * Currently, this file consists of functions that don't have a better home
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "common_defs.h"
+#include "hss.h"
+#include "hash.h"
+#include "endian.h"
+#include "hss_internal.h"
+#include "hss_aux.h"
+#include "hss_derive.h"
+#include "config.h"
+#include "lm_ots_common.h"
+
+/*
+ * Allocate and load an ephemeral key
+ */
+struct hss_working_key *hss_load_private_key(
+    bool (*read_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+        void *context,
+    size_t memory_target,
+    const unsigned char *aux_data, size_t len_aux_data,
+    struct hss_extra_info *info ) {
+
+    /* Step 1: determine the parameter set */
+    unsigned levels;
+    param_set_t lm[ MAX_HSS_LEVELS ];
+    param_set_t ots[ MAX_HSS_LEVELS ];
+    if (!hss_get_parameter_set( &levels, lm, ots, read_private_key, context)) {
+        /* Can't read private key, or private key invalid */
+        return 0;
+    }
+
+    /* Step 2: allocate the ephemeral key */
+    struct hss_working_key *w = allocate_working_key(levels, lm, ots,
+                                                 memory_target, info);
+    if (!w) {
+        /* Memory allocation failure, most likely (we've already vetted */
+        /* the parameter sets) */
+        return 0;
+    }
+
+    /* Step 3: load the ephemeral key */
+    if (! hss_generate_working_key( read_private_key, context,
+                                    aux_data, len_aux_data, w, info )) {
+        /* About the only thing I can see failing here is perhaps */
+        /* attempting to reread the private key failed the second time; */
+        /* seems unlikely, but not impossible */
+        hss_free_working_key( w );
+        return 0;
+    }
+
+    /* Success! */
+    return w;
+}
+
+/*
+ * Internal function to generate the root seed and I value (based on the
+ * private seed).  We do this (rather than select seed, I at random) so that
+ * we don't need to store it in our private key; we can recompute them
+ */
+bool hss_generate_root_seed_I_value(unsigned char *seed, unsigned char *I,
+                                    const unsigned char *master_seed) {
+#if SECRET_METHOD == 2
+    /* In ACVP mode, we use the master seed as the source for both the */
+    /* root seed, and the root I value */
+    memcpy( seed, master_seed, SEED_LEN );
+    memcpy( I, master_seed + SEED_LEN, I_LEN );
+#else
+    /*
+     * We use a two-level hashing scheme so that we end up using the master
+     * seed only twice throughout the system (once here, once to generate the
+     * aux hmac key)
+     */
+    unsigned char hash_preimage[ TOPSEED_LEN ];
+    unsigned char hash_postimage[ MAX_HASH ];
+
+    memset( hash_preimage + TOPSEED_I, 0, I_LEN );
+    memset( hash_preimage + TOPSEED_Q, 0, 4 );
+    SET_D( hash_preimage + TOPSEED_D, D_TOPSEED );
+    hash_preimage[TOPSEED_WHICH] = 0x00;
+    memcpy( hash_preimage + TOPSEED_SEED, master_seed, SEED_LEN );
+
+        /* We use a fixed SHA256 hash; we don't care about interoperability */
+        /* so we don't need to worry about what parameter set the */
+        /* user specified */
+#if I_LEN > 32 || SEED_LEN != 32
+#error This logic needs to be reworked
+#endif
+    union hash_context ctx;
+
+    hss_hash_ctx(hash_postimage, HASH_SHA256, &ctx, hash_preimage,
+                                                            TOPSEED_LEN );
+    memcpy( hash_preimage + TOPSEED_SEED, hash_postimage, SEED_LEN );
+
+    /* Now compute the top level seed */
+    hash_preimage[TOPSEED_WHICH] = 0x01;
+    hss_hash_ctx(seed, HASH_SHA256, &ctx, hash_preimage, TOPSEED_LEN );
+
+    /* Now compute the top level I value */
+    hash_preimage[TOPSEED_WHICH] = 0x02;
+    hss_hash_ctx(hash_postimage, HASH_SHA256, &ctx, hash_preimage,
+                                                            TOPSEED_LEN );
+    memcpy( I, hash_postimage, I_LEN );
+
+    hss_zeroize( hash_preimage, sizeof hash_preimage );  /* There's keying */
+                                                       /* data here */
+    hss_zeroize( &ctx, sizeof ctx );
+#endif
+    return true;
+}
+
+/*
+ * Internal function to generate the child I value (based on the parent's
+ * I value).  While this needs to be determanistic (so that we can create the
+ * same I values between reboots), there's no requirement for interoperability.
+ * So we use a fixed SHA256; when we support a hash function other than SHA256,
+ * we needn't update this.
+ */
+bool hss_generate_child_seed_I_value( unsigned char *seed, unsigned char *I,
+                   const unsigned char *parent_seed,
+                   const unsigned char *parent_I,
+                   merkle_index_t index,
+                   param_set_t lm, param_set_t ots) {
+    struct seed_derive derive;
+    if (!hss_seed_derive_init( &derive, lm, ots, parent_I, parent_seed )) {
+        return false;
+    }
+
+    hss_seed_derive_set_q( &derive, index );
+
+    /* Compute the child seed value */
+    hss_seed_derive_set_j( &derive, SEED_CHILD_SEED );
+    hss_seed_derive( seed, &derive, true );
+        /* True sets the j value to SEED_CHILD_I */
+
+    /* Compute the child I value; with increment_j set to true in the */
+    /* above call, derive has been set to the SEED_CHILD_I position */
+    unsigned char postimage[ SEED_LEN ];
+    hss_seed_derive( postimage, &derive, false );
+    memcpy( I, postimage, I_LEN );
+
+    hss_seed_derive_done( &derive );
+
+    return true;
+}
+
+void hss_init_extra_info( struct hss_extra_info *p ) {
+    if (p) memset( p, 0, sizeof *p );
+}
+
+void hss_extra_info_set_threads( struct hss_extra_info *p, int num_threads ) {
+    if (p) p->num_threads = num_threads;
+}
+
+bool hss_extra_info_test_last_signature( struct hss_extra_info *p ) {
+    if (!p) return false;
+    return p->last_signature;
+}
+
+enum hss_error_code hss_extra_info_test_error_code( struct hss_extra_info *p ) {
+    if (!p) return hss_error_got_null;
+    return p->error_code;
+}
diff --git a/src/sig_stfl/lms/external/hss.h b/src/sig_stfl/lms/external/hss.h
new file mode 100644
index 0000000000..b4e5e1698d
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss.h
@@ -0,0 +1,417 @@
+#if !defined(HSS_H_)
+#define HSS_H_
+
+#include <stdlib.h>
+#include <stddef.h>
+#include "common_defs.h"
+
+/*
+ * This is intended to be a usable (nontoy) implementation of the LMS
+ * signature scheme.  The public data (public keys, signatures) are
+ * precisely the same as the standard LMS implmentation; however it
+ * strives to be more usable, in the following ways:
+ *
+ * - During signature generation time, it incrementally computes the next
+ *   trees; that means that it doesn't need to generate the next Merkle tree
+ *   from scratch on the 1025th signature.
+ * - It doesn't try to hold the entire Merkle tree in memory; hence a level
+ *   25 Merkle tree doesn't need to save 2**25 internal node values.  This
+ *   does increase the time to generate the next siganture (as we will need
+ *   to recompute some internal nodes); however by only a small constant factor
+ * - It divides the private key into three parts, only one of which needs to
+ *   be kept secret, and updated dynamically; the other parts are a working
+ *   copy (that can be kept in RAM, and can be dynamically regenerated as
+ *   needed), and some optional static (nonprivate) data (which can speed up
+ *   the regeneration process)
+ * - API to explicitly reserve the next N signatures (so that we don't need
+ *   to update the secure storage copy quite as often)
+ *
+ *
+ * We use a nonflat memory structure for the working_key.  Part of the reason
+ * we use a flat representation elsewhere is so that they can be written (and
+ * later read) to/from disk as required; we specifically assume that the
+ * working_key is never written to disk. And, being able to use C structures
+ * makes this rather nontrivial structure a bit more transparent
+ *
+ * Here is the intended order of usage:
+ * Step 1: generate the private/public keypair:
+ *   The API to do this is hss_generate_private_key; this is done once per
+ *   private key; and you should write the private key to secure storage
+ *   (which the passed update_private_key function could do)
+ *
+ * Step 2: (which you can do per restart):
+ *   Load the private keypair into memory: hss_load_private_key
+ *
+ * Step 3: generate signatures (which you can do lots of time after you've
+ *      loaded the key into memory):
+ *   The API to do this is hss_generate_signature.  Note that this needs
+ *   to update the private key state; the update_private_key function pointer
+ *   can be useful here
+ *
+ * Step 4: (when you're done with the loaded private key; optional)
+ *   Free the ephemeral copy (hss_free_working_key).  Note that this is not
+ *   required for correctness; this just does a free()
+ *
+ *
+ * One can also verify signatures at any time; all that needs is a public
+ * key, a signature and a message; it's not a part of the intended order
+ * of usage
+ */
+
+struct hss_extra_info;
+
+/*
+ * This will generate a fresh (unadorned) private key, with the selected
+ * parameter set, the corresponding public key, and (optionally) the aux_data
+ * that is associated with the private key.
+ *
+ * The generate_random function will be called when this function needs
+ * random values; it is assumed to generate cryptographically secure ones.
+ * We ask you to pass a function, rather than an array of random values,
+ * to emphasize that we really do need fresh random data here; the security
+ * of this entire system depends on it.
+ *
+ * levels, lm_type, lm_ots_type is the parameter set for the new key.
+ * levels is the number of levels in the HSS hierarchy (1-8), while
+ * lm_type[], lm_ots_type[] are arrays giving the parameter set of each
+ * individual level; level i of the hierarchy will have LMS parameter set
+ * lm_type[i] and OTS parameter set lm_ots_type[i] (where i=0 is the topmost
+ * Merkle tree.
+ *
+ * The update_private_key function will be called when the private key is
+ * generated; it is expected to write the private key to secure storage (and
+ * the context pointer is a value that is passed to the update_private_key
+ * function; it can be used to tell the update_private_key function where
+ * in the secure storage to place the key).  If the passed update_private_key
+ * function pointer is NULL, the private will will be written to the context
+ * pointer (which is expected to hold 48 bytes of data)
+ *
+ * public_key is where the freshly generated public key will be placed, and
+ * len_public_key is the size of the array (and this will generate an error
+ * if the public key is larger than the array).  See the hss_get_public_key_len
+ * function for the expected length of the public key
+ *
+ * aux_data is where to place internal nodes of the Merkle tree, and
+ * len_aux_data is the length of the provided buffer.  This aux_data
+ * is optional (pass in a NULL if it's not being used), but does significantly
+ * speed the generate_working_key process.  It's envisioned use is to write
+ * this aux_data to disk, and reread it when it's time to regenerate the
+ * ephemeral key; it need not be kept in secure storage; revealing it doesn't
+ * help an attacker to generate forgeries, and if an attacker does manage to
+ * corrupt it, the regeneration process will detect the corruption and ignore
+ * it.  Also, even if writing it to disk is not possible, passing in a
+ * small array here and passing that to the initial regeneration call will
+ * speed that up (and later ones can omit it; those will go slow, but at
+ * least you got the speed up benefit the first time).
+ *
+ * One slightly tricky thing about aux data is that the required length of the
+ * aux data; there are several different possible time/memory trade-offs.
+ * Depending on the length, we'll automatically pick the fastest option that
+ * fits. If we have N bytes available total, see hss_get_aux_data_len for
+ * the amount of data we'll actually use (and so the amount you need to write
+ * to disk)
+ */
+bool hss_generate_private_key(
+    bool (*generate_random)(void *output, size_t length),
+    unsigned levels,
+    const param_set_t *lm_type, const param_set_t *lm_ots_type,
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+        void *context,
+    unsigned char *public_key, size_t len_public_key,
+    unsigned char *aux_data, size_t len_aux_data,
+    struct hss_extra_info *info);
+
+/*
+ * This is the routine to load a private key into memory, and
+ * initialize the working data structures; these data structures
+ * allow us to generate signtures quickly
+ *
+ * The read_private_key is a function to read the private key from secure
+ * storage, with context being a value passed to that function.
+ * If the read_private_key pointer is NULL, we assume that the context
+ * pointer points to the private key.
+ * This assumes that the key has already been generated by
+ * hss_generate_private_key
+ *
+ * memory_target is a value which gives a goal for the amount of memory (in
+ * bytes) that this structure should take up.  There are a number of
+ * time/memory trade-offs possible; the function uses this parameter as a
+ * guide as to what trade-offs it should take.  This structure tries to
+ * allocate no more than memory_target bytes; however it is considered
+ * advisatory; this function will never fail beccause memory_target was too
+ * small (so passing 0 will work, and will minimize the memory used)
+ *
+ * aux_data points to a buffer containing the auxiliary data generated
+ * during the key generation process, with len_aux_data being the length
+ * of the buffer.  Passing it a NULL means that we're not providing that
+ * data (which is fine; it just means this will take longer)
+ *
+ * On success, this malloc's the ephemeral key (struct hss_working_key*) and
+ * retursn it.  Because it mallocs it, it asssumes that the caller will
+ * eventually free it (via the hss_free_working_key function, don't try
+ * calling free() yourself)
+ */
+struct hss_working_key;
+struct hss_working_key *hss_load_private_key(
+    bool (*read_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+        void *context,
+    size_t memory_target,
+    const unsigned char *aux_data, size_t len_aux_data, /* Optional */
+    struct hss_extra_info *info);
+
+/*
+ * Corresponding function to free the working key
+ */
+void hss_free_working_key( struct hss_working_key * );
+
+/*
+ * This will actually generate a signature
+ *
+ * working_key is the key that has been allocated by allocate_working_key and
+ * initialied by hss_generate_working_key
+ *
+ * The update_private_key function will be called when the private key is
+ * updated; it is expected to write the private key to secure storage (and the
+ * context pointer is a value that is passed to the update_private_key
+ * function; it can be used to tell the update_private_key function where
+ * in the secure storage to place the key).  And, if it is NULL, the context
+ * is expected to point to a copy of the private_key in RAM.
+ * One distinction is that, on an update, len_private_key will be 8;
+ * the update_private_key can choose to update only the first 8 bytes
+ * of the private key (the rest will be unchanged), or write all
+ * 48 bytes (private_key will point to the full 48 byte value)
+ *
+ * message, message_len are the message being signed
+ *
+ * signature is where the signature will be written, with signature_len being
+ * the length of the buffer.  See the hss_get_signature_len function for the
+ * expected signature length for this parameter set; if signature_len is too
+ * short for the signature to fit, this will fail.
+ */
+bool hss_generate_signature(
+    struct hss_working_key *working_key,
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+    void *context,
+    const void *message, size_t message_len,
+    unsigned char *signature, size_t signature_len,
+    struct hss_extra_info *info);
+
+/*
+ * See hss_verify.h for the signature verfication routine; it's in a
+ * separate file for those programs that only need to verify a signature
+ */
+#include "hss_verify.h"
+
+/*
+ * Lower level routines to allocate and initialize a working key.
+ *
+ * hss_load_working_key will do the work of the below routines; these are
+ * provided separately in case you need more control (e.g. reuse an already
+ * allocated working key)
+ *
+ * First, the routine to allocate (but not initialize) a working key.
+ *
+ * The levels/lm_type/lm_ots_type are the same parameter sets as in the
+ * generate public/private keypair call; the parameter set must match the
+ * values for the private key.
+ *
+ * memory_target is a value which gives a goal for the amount of memory that
+ * this structure should take up.  There are a number of time/memory trade-offs
+ * possible; the function uses this parameter as a guide as to what trade-offs
+ * it should take.  This structure tries to allocate no more than memory_target
+ * bytes; however it is considered advisatory; this function will never fail
+ * beccause memory_target was too small (so passing 0 will work, and will
+ * minimize the memory used)
+ */
+struct hss_working_key *allocate_working_key(
+    unsigned levels,
+    const param_set_t *lm_type, const param_set_t *lm_ots_type,
+    size_t memory_target,
+    struct hss_extra_info *info);
+
+/*
+ * This is called on reload (or initial key generation), it'll take the
+ * working key that's been allocated by allocate_working_key, and initialize
+ * it based on the private key; this working key is what we need to actually
+ * generate signatures.
+ *
+ * The read_private_key is a function to read the private key from secure
+ * storage, with context being a value passed to that function.
+ * If NULL, we assume that the context pointer points to the private key
+ *
+ * aux_data points to a buffer containing the auxiliary data generated
+ * during the key generation process, with len_aux_data being the length
+ * of the buffer.  Passing it a NULL means that we're not providing that
+ * data (which is fine; it just means this will take longer)
+ *
+ * working_key is a pointer to the allocated working key
+ */
+bool hss_generate_working_key(
+    bool (*read_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+        void *context,
+    const unsigned char *aux_data, size_t len_aux_data,  /* Optional */
+    struct hss_working_key *working_key,
+    struct hss_extra_info *info);
+
+/*
+ * This will make sure that (at least) N signatures are reserved; that is, we
+ * won't need to actually call the update function for the next N signatures
+ * generated
+ *
+ * This can be useful if the update_private_key function is expensive.
+ *
+ * Note that if, N (or more) signatures are already reserved, this won't do
+ * anything.
+ */
+bool hss_reserve_signature(
+    struct hss_working_key *w,
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+    void *context,
+    unsigned sigs_to_reserve,
+    struct hss_extra_info *info);
+
+/*
+ * This will set the autoreserve, so that when the signing process runs out,
+ * it will automatically reserve N more signatures (in addition to the one
+ * that is being used for the current signature)
+ *
+ * This can be useful if the update_private_key function is expensive,
+ * setting sigs_to_autoreserve=99 means will actually update the private
+ * key once every 100 signatures
+ */
+bool hss_set_autoreserve(
+    struct hss_working_key *w,
+    unsigned sigs_to_autoreserve,
+    struct hss_extra_info *info);
+
+/*
+ * This returns the required lengths for the various objects we export
+ *
+ * This is the length of the private key (which is written to secure storage)
+ */
+size_t hss_get_private_key_len(unsigned levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type);
+#define HSS_MAX_PRIVATE_KEY_LEN (8 + 8 + SEED_LEN + 16)
+
+/*
+ * This include file has the functions that contains the lengths of the other
+ * public objects
+ */
+#include "hss_common.h"
+
+/*
+ * Get the signature length.  We don't put this in hss_common because we
+ * assume we have a loaded private key
+ * Returns 0 on error
+ */
+size_t hss_get_signature_len_from_working_key(
+                                        struct hss_working_key *working_key);
+
+/*
+ * This returns the amount of aux data we use
+ * This is slightly different from the above routines; given the bound on the
+ * amount of data the aux_data is allowed to take (max_length), this returns
+ * the amount of data we'll actually use
+ */
+size_t hss_get_aux_data_len(size_t max_length,
+                   unsigned levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type);
+
+/*
+ * This returns the parameter set for a given private key.
+ * This is here to solve a chicken-and-egg problem: the hss_working_key
+ * must be initialized to the same parameter set as the private key,
+ * but (other than this function, or somehow remembering it) there's
+ * no way to retreive the parameter set.
+ *
+ * read_private_key/context will read the private key (if read_private_key is
+ * NULL, context is assumed to point to the private key)
+ *
+ * On success, *levels will be set to the number of levels, and lm_type[]
+ * and lm_ots_type[] will be set to the lm/ots parameter sets
+ *
+ * On success, this returns true; on failure (can't read the private key, or
+ * the * private key is invalid), returns false
+ */
+bool hss_get_parameter_set( unsigned *levels,
+                           param_set_t lm_type[ MAX_HSS_LEVELS ],
+                           param_set_t lm_ots_type[ MAX_HSS_LEVELS ],
+                           bool (*read_private_key)(unsigned char *private_key,
+                                       size_t len_private_key, void *context),
+                           void *context);
+
+enum hss_error_code {
+    hss_error_none = 0,      /* I don't know nothing about any error */
+
+    hss_range_normal_failures, /* There errors happen during normal use */
+                             /* of the signature scheme */
+    hss_error_bad_signature, /* Invalid signature */
+    hss_error_private_key_expired, /* This private key has generated all */
+                             /* the signatures it is allowed */
+    hss_error_not_that_many_sigs_left, /* Reservation request failed */
+                             /* because the key couldn't do that many  */
+                             /* signatures */
+
+    hss_range_bad_parameters, /* These errors are cause by the */
+                          /* application passing in a bad parameter */
+    hss_error_no_randomness, /* No RNG supplied */
+    hss_error_bad_param_set, /* Application asked for an illegal parmaeter */
+                             /* set */
+    hss_error_buffer_overflow, /* Buffer provide not big enough */
+    hss_error_got_null,      /* Application passed in a NULL pointer */
+    hss_error_bad_aux,       /* Error with provided aux buffer */
+    hss_error_no_private_buffer, /* Application didn't provide a place */
+                             /* to put the private key */
+    hss_error_incompatible_param_set, /* The parameter set of the working */
+                             /* set didn't agree with the private key */
+    hss_error_key_uninitialized, /* The working key used had never been */
+                             /* initialized with a private key */
+    hss_error_key_mismatch,  /* The working set and the private key */
+                             /* do not correspond */
+    hss_error_ctx_uninitialized, /* The incremental ctx wasn't initialized */
+                             /* properly */
+    hss_error_ctx_already_used, /* The ctx has already been used */
+    hss_error_bad_public_key, /* Somehow, we got an invalid public key */
+
+    hss_range_processing_error, /* These errors are cause by an */
+                             /* error while processing */
+    hss_error_bad_randomness, /* The RNG claimed failure */
+    hss_error_private_key_write_failed, /* The write of the private key */
+                             /* to NVRAM failed */
+    hss_error_private_key_read_failed, /* The read of the private key */
+                             /* from NVRAM failed */
+    hss_error_out_of_memory, /* A malloc failure caused us to fail */
+
+    hss_range_my_problem,    /* These are caused by internal errors */
+                             /* within the HSS implementation */
+    hss_error_internal,      /* Some internal assertion failed (should */
+                             /* never happen) */
+};
+
+/*
+ * This is the structure that allows us to pass noncritical information
+ * to and from the above routines (without requiring us to add each
+ * one as an additional parameter
+ */
+struct hss_extra_info {
+    int num_threads;     /* Number of threads we're allowed to ues */
+    bool last_signature; /* Set if we just signed the last signature */
+                         /* allowed by this private key */
+    enum hss_error_code error_code; /* The more recent error detected */
+};
+
+/* Accessor APIs in case someone doesn't feel comfortable about reaching */
+/* into the structure */
+void hss_init_extra_info( struct hss_extra_info * );
+void hss_extra_info_set_threads( struct hss_extra_info *, int );
+bool hss_extra_info_test_last_signature( struct hss_extra_info * );
+enum hss_error_code hss_extra_info_test_error_code( struct hss_extra_info * );
+
+#endif /* HSS_H_ */
diff --git a/src/sig_stfl/lms/external/hss_alloc.c b/src/sig_stfl/lms/external/hss_alloc.c
new file mode 100644
index 0000000000..8f7cf6054b
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_alloc.c
@@ -0,0 +1,555 @@
+/*
+ * This is the code which allocates a working key (and initializes the fields
+ * that are independent of the key)
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "hss.h"
+#include "hss_internal.h"
+#include "lm_common.h"
+
+#define MALLOC_OVERHEAD  8   /* Our simplistic model about the overhead */
+                             /* that malloc takes up is that it adds 8 */
+                             /* bytes to any request we make.  This isn't */
+                             /* precise (especially if we consider external */
+                             /* fragmentation), it's just a guideline */
+
+/*
+ * Function to estimate the amount of memory we'd use at a particular level,
+ * if we went with a particular subtree size
+ * - i is which tree in the scheme we're talking about; 0 is the root tree
+ *   We have this because we allocate less for the root tree
+ * - subtree_size is the size of the subtrees we're considering
+ * - total_length is the size of the trees
+ * - size_hash is the length of the hash output (always 32 currently)
+ * - if psubtree_levels is non-NULL, we'll return the number of subtree levels
+ *   here
+ * - if pstack_total is non-NULL, we'll return the bytes of stack space needed
+ *   by the subtrees of this level here
+ * The value returned is the amount of space used by the merkle
+ * level structures, the subtree structures, plus the additional stack
+ * space required
+ */
+static size_t compute_level_memory_usage(int i, unsigned subtree_size,
+                       unsigned total_height, unsigned size_hash,
+                       unsigned *psubtree_levels,
+                       size_t *pstack_total) {
+    /* Compute the number of subtree levels we'd have */
+    unsigned subtree_levels = (total_height + subtree_size - 1) / subtree_size;
+    unsigned top_subtree_size = total_height - (subtree_levels-1)*subtree_size;
+        /* The top level tree has no next subtrees */
+    int have_next_subtree = (i == 0) ? 0 : 1;
+    size_t stack_total = 0;
+
+    /* Compute the memory this would use */
+    size_t memory_used = sizeof(struct merkle_level) + MALLOC_OVERHEAD;
+    unsigned j;
+    for (j=0; j<subtree_levels-1; j++) {
+            /* # of subtrees at this level */
+        int num_subtrees = 2 + have_next_subtree;
+            /* the size of each subtree */
+        size_t size_subtree = sizeof(struct subtree) + MALLOC_OVERHEAD +
+                               size_hash * (((size_t)2<<subtree_size)-1);
+        size_t size_stack = (num_subtrees-1) * size_hash * j * subtree_size;
+        memory_used += num_subtrees * size_subtree + size_stack;
+        stack_total += size_stack;
+    }
+
+    /* The top level subtree is a bit different; it has no building subtree */
+    int num_subtrees = 1 + have_next_subtree;  /* No BUILDING subtrees */
+    size_t size_subtree = sizeof(struct subtree) + MALLOC_OVERHEAD +
+                               size_hash * (((size_t)2<<top_subtree_size)-1);
+    size_t size_stack = (num_subtrees-1) * size_hash * j * subtree_size;
+    memory_used += num_subtrees * size_subtree + size_stack;
+    stack_total += size_stack;
+
+    if (psubtree_levels) *psubtree_levels = subtree_levels;
+    if (pstack_total) *pstack_total = stack_total;
+    return memory_used;
+}
+
+/*
+ * As we generate signatures, we walk through the Merkle tree.  As we walk
+ * through the tree at a particular level, it will occassionally send an
+ * update to its parent tree (that is, give an opportunity for the parent tree
+ * to do a walk itself).  This function returns, given the number of levels
+ * and subtree height within a Merkle tree, the number of updates this level
+ * would give its parent
+ *
+ * It does come up in some corner cases (such as a bottom level tree with H=5
+ * and subtree height 3 (8 updates sent to the paremt); and a top level tree
+ * with H=20 or 25).
+ */
+static merkle_index_t compute_updates_generated( unsigned height,
+                                                 unsigned subtree ) {
+        /* Special case: if the tree consists of 1 subtree, we'll send an */
+        /* update at each signature */
+    if (height <= subtree) return (merkle_index_t)1<<height;
+
+    int num_sublevels = (height + subtree - 1) / subtree;
+
+        /* We'll send an update for every node covered by the next-to-top */
+        /* subtree */
+    return (merkle_index_t)1 << ((num_sublevels-1) * subtree);
+}
+
+/*
+ * This will compute the number of updates this tree will need from its child
+ */
+static merkle_index_t compute_updates_required( unsigned height,
+                                                unsigned subtree ) {
+    int num_sublevels = (height + subtree - 1) / subtree;
+
+    /*
+     * This tree will need this many updates:
+     * num_sublevels-1 for the BUILDING subtrees (the topmost subtree doesn't
+     *       get one)
+     * 1 for the NEXT tree
+     * 1 for our parent
+     */
+    return num_sublevels + 1;
+}
+
+/*
+ * Given a tree height (and whether it's the top level, and the hash size),
+ * this returns the subtree size that yields the least amount of memory.
+ * This is used for the non-bottom trees (as there really isn't any point in
+ * trying to using more memory to make them faster)
+ * i = 0 is when tree level we're talking about; 0 for the root tree; nonzero
+ * of the others.  n is the size of the hash (which, for the current parameter
+ * sets, is always 32).  We included these because they do influence the
+ * memory allocation; however, with the current parameter sets, these don't
+ * actually change the answer
+ */
+int hss_smallest_subtree_size(int tree_height, int i, int n) {
+    /* Experements have found that subtree height 2 consistently
+     * uses the least memory
+     * Actually, in the corner case of H=15,25, i!=0, it turned out
+     * that height 3 used a bit less, however not enough to call it
+     * out
+     * These expirements were done with n=32; different n would likely
+     * give other results
+     */
+    LMS_UNUSED(tree_height);
+    LMS_UNUSED(i);
+    LMS_UNUSED(n);
+#if MIN_SUBTREE > 2
+#error We assume that a subtree of size 2 is allowed
+#endif
+    return 2;
+}
+
+/*
+ * This allocates a working key for a particular parameter set, and sets up
+ * the data fields that are key independent; it doesn't set anything that
+ * does depend on the key.  memory_target is used to guide time/memory
+ * trade-offs; it's the target memory budget that we try to stay below if
+ * possible
+ */
+struct hss_working_key *allocate_working_key(
+    unsigned levels,
+    const param_set_t *lm_type, const param_set_t *lm_ots_type,
+    size_t memory_target,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };
+    if (!info) info = &temp_info;
+
+    if (levels < MIN_HSS_LEVELS || levels > MAX_HSS_LEVELS) {
+        info->error_code = hss_error_bad_param_set;
+        return 0;
+    }
+
+    /* Assign the memory target to a *signed* variable; signed so that it */
+    /* can take on negative values meaningfully (to account for cases where */
+    /* we are "overbudget") */
+    unsigned long mem_target;
+    if (memory_target > LONG_MAX) {
+        mem_target = LONG_MAX;
+    } else {
+        mem_target = memory_target;
+    }
+#if 0
+signed long initial_mem_target = mem_target; /* DEBUG HACK */
+#endif
+
+    struct hss_working_key *w = malloc( sizeof *w );
+    if (!w) {
+        info->error_code = hss_error_out_of_memory;
+        return NULL;
+    }
+    mem_target -= sizeof(*w) + MALLOC_OVERHEAD;
+    unsigned i;
+    w->levels = levels;
+    w->status = hss_error_key_uninitialized; /* Not usable until we see a */
+                                             /* private key */
+    w->autoreserve = 0;
+
+    /* Initialize all the allocated data structures to NULL */
+    /* We do this up front so that if we hit an error in the middle, we can */
+    /* just free everything */
+    for (i=0; i<MAX_HSS_LEVELS-1; i++) {
+        w->signed_pk[i] = NULL;
+    }
+    for (i=0; i<MAX_HSS_LEVELS; i++) {
+        w->tree[i] = NULL;
+    }
+    w->stack = NULL;
+
+    /* Allocate all the memory for the level signatures */
+    size_t signature_len = 4;   /* At the same time, ocmpute the sig length */
+    for (i=0; i < levels; i++) {
+        w->siglen[i] = lm_get_signature_len( lm_type[i], lm_ots_type[i] );
+        signature_len += w->siglen[i];
+            /* Size of this level's Merkle public key */
+        size_t pklen = lm_get_public_key_len(lm_type[i]);
+        if (i != 0) signature_len += pklen;
+        if (w->siglen[i] == 0) {
+            hss_free_working_key(w);
+            info->error_code = hss_error_bad_param_set;
+            return 0;
+        }
+            /* We don't need a allocate a signature for the topmost */
+        if (i == 0) continue;
+
+        w->signed_pk_len[i] = w->siglen[i-1] + pklen;
+
+        w->signed_pk[i] = malloc( w->signed_pk_len[i] );
+        if (!w->signed_pk[i]) {
+            hss_free_working_key(w);
+            info->error_code = hss_error_out_of_memory;
+            return 0;
+        }
+        mem_target -= w->signed_pk_len[i] + MALLOC_OVERHEAD;
+    }
+    w->signature_len = signature_len;
+
+    /* Also account for the overhead for the stack allocation (the memory */
+    /* used by the stack will be accounted as a part of the tree level size */
+     mem_target -= MALLOC_OVERHEAD;
+
+    /*
+     * Plot out how many subtree sizes we have at each level.  We start by
+     * computing how much memory we'd use if we minimize each level
+     */
+    unsigned subtree_size[MAX_HSS_LEVELS];
+    unsigned subtree_levels[MAX_HSS_LEVELS];
+    unsigned level_hash[MAX_HSS_LEVELS];
+    unsigned level_height[MAX_HSS_LEVELS];
+    unsigned hash_size[MAX_HSS_LEVELS];
+    unsigned total_height = 0;
+
+    /* Parse the parameter sets */
+    for (i=0; i<levels; i++) {
+
+        if (!lm_look_up_parameter_set(lm_type[i], &level_hash[i],
+                              &hash_size[i], &level_height[i])) {
+            hss_free_working_key(w);
+            info->error_code = hss_error_bad_param_set;
+            return 0;
+        }
+
+        total_height += level_height[i];  /* Also track the number of */
+                      /* signatures we can generate with this parm set */
+    }
+
+    /*
+     * Select which subtree sizes that is faster, and fit within the memory
+     * we've been given. For the nonbottom levels, we always use what's the
+     * smallest for that particular tree height; there's no point in wasting
+     * extra memory to make them faster (in that each one can be done during
+     * the time the bottom level BUILDING subtrees don't need updating).
+     */
+    size_t stack_usage = 0;
+    for (i=0; i<levels-1; i++) {
+        int subtree =  hss_smallest_subtree_size(level_height[i], i,
+                                                 hash_size[i]);
+
+        /*
+         * Double check to make sure that this subtree size will allow
+         * enough updates to perculate to the tree above it.  I'm pretty
+         * sure that, with the current logic and parameter sets, we'll
+         * never actually need to adjust the subtree size; this is here
+         * as a double check (at the very least, in case someone adds a
+         * new parameter set)
+         */
+
+            /* Updates we will to send to the parent */
+        merkle_index_t updates_generated = compute_updates_generated(
+                                                         level_height[i],
+                                                         subtree );
+
+            /* Updates that the parent will need */
+        merkle_index_t updates_required;
+        if (i == 0)
+            updates_required = 0;   /* no parent; no updates needed */
+        else {
+            updates_required = compute_updates_required( level_height[i-1],
+                                                         subtree_size[i-1] );
+        }
+
+        if (updates_generated < updates_required) {
+            /*
+             * The current settings don't have the parent receiving enough
+             * updates; if we generate enough signatures, we'll end up using
+             * a partially updated parent (and that's not good eats).  Adjust
+             * things to avoid the possibility
+             */
+            /* A subtree of 5 is always sufficient (as it always generates */
+            /* at least 32 updates) */
+            subtree = 5;
+        }
+        subtree_size[i] = subtree;
+        size_t stack_used;
+        size_t mem = compute_level_memory_usage(i, subtree,
+                       level_height[i], hash_size[i], &subtree_levels[i],
+                        &stack_used );
+
+        mem_target -= mem;
+        stack_usage += stack_used;
+    }
+
+    /*
+     * For the bottom-most level, look for the size that is the fastest (fewest
+     * number of sublevels), and fits within the memory we've been given
+     * Rules of preference:
+     * - We prefer something that fits within our budget over something that
+     *   doesn't
+     * - For things that don't fit our budget, we'll take the samllest
+     * - For things that do fit out budget, we'll take the fastest (and the
+     *   smallest if they're equally fast; no need to waste memory)
+     */
+    i = levels - 1;
+    enum {
+        nothing_yet,              /* We haven't found anything yet */
+        found_overbudget,         /* We found something, but it used more */
+                                  /* memory than the budget we were given */
+        found_plenty_memory,      /* We found something that fits within */
+                                  /* out budget */
+    } search_status = nothing_yet;
+    unsigned long best_mem = 0;
+    unsigned long best_levels = 0;
+    unsigned best_j = 0;
+    size_t best_stack_used = 0;
+    unsigned j;
+    for (j = MIN_SUBTREE; j <= level_height[i]; j++) {
+        if (levels == 1) {
+            /* If the tree consists of a single level, then we don't need to */
+            /* make sure that we have enough time to update the higher trees */
+            /* (as there ain't none).  However, to make sure that we work */
+            /* with aux data, make sure that the number of levels is a */
+            /* multiple of the default subtree size.  We could make the aux */
+            /* data logic smarter to handle this corner case, but, well, */
+            /* it's a corner csae */
+            int only_subtree_size = hss_smallest_subtree_size(level_height[i],
+                                                         0, hash_size[i]);
+            if (j % only_subtree_size != 0) continue;
+        } else {
+            /* Make sure this size will send enough updates to our parent */
+            merkle_index_t updates_generated = compute_updates_generated(
+                                             level_height[i], j );
+
+            merkle_index_t updates_required = compute_updates_required(
+                                   level_height[i-1], subtree_size[i-1] );
+            if (updates_generated < updates_required) {
+                 /* This subtree size won't work */
+                 continue;
+            }
+        }
+
+            /* Amount of memory this would use */
+            /* This is a signed type so that the comparison works as */
+            /* expected if mem_target is negative */
+        size_t stack_used;
+        unsigned long mem = compute_level_memory_usage(i, j,
+                       level_height[i], hash_size[i], &subtree_levels[i],
+                       &stack_used );
+            /* # of sublevels this would have */
+        unsigned sub_levels = (level_height[i] + j - 1) / j;
+
+            /*
+             * Now, from a worse-case standpoint, one sublevel isn't any
+             * better than two (for a couple of the updates, we'll need to
+             * update the parent).  However, from an average time, one
+             * sublevel is much better.  So, we allow one sublevel to be
+             * considered better than two (even though everywhere else, we
+             * consider worse-case time).
+             */
+
+        if (mem > mem_target) {
+            /* This would use more memory than we'd like; accept it if */
+            /* either we have no solution, or it uses less memory than what */
+            /* we've seen */
+            if (search_status != nothing_yet && mem > best_mem) continue;
+
+            /* This solution is the best so far (however, it doesn't fit) */
+            search_status = found_overbudget;
+        } else {
+            /* This is within our budget; accept it if we haven't seen a */
+            /* previous solution within our budget, or this uses fewer */
+            /* levels than the previous solution */
+            if (search_status == found_plenty_memory) {
+                if (sub_levels > best_levels) {
+                    /* We've already seen a faster solution */
+                    continue;
+                }
+                if (sub_levels == best_levels && mem > best_mem) {
+                    /* We've already seen an equally fast solution that */
+                    /* uses less memory */
+                    continue;
+                }
+            }
+
+            /* This solution is the best so far (and it fits) */
+            search_status = found_plenty_memory;
+        }
+        /* This is the best option so far; record it */
+        best_j = j;
+        best_mem = mem;
+        best_levels = sub_levels;
+        best_stack_used = stack_used;
+    }
+
+    if (search_status == nothing_yet) {
+        /* This can't really happen */
+        hss_free_working_key(w);
+        info->error_code = hss_error_internal;
+        return 0;
+    }
+#if 0
+printf( "Allocation = %ld\n", initial_mem_target - mem_target + best_mem ); /* DEBUG HACK */
+#endif
+
+    subtree_size[i] = best_j;
+    subtree_levels[i] = (level_height[i] + best_j - 1) / best_j;
+    stack_usage += best_stack_used;
+
+    unsigned char *stack;
+    if (stack_usage == 0) {
+        stack = NULL;   /* Hey!  No stack required */
+                        /* Avoid the malloc, as malloc(0) is allowed to fail */
+    } else {
+        stack = malloc(stack_usage);
+        if (!stack) {
+            hss_free_working_key(w);
+            info->error_code = hss_error_out_of_memory;
+            return 0;
+        }
+    }
+    w->stack = stack;
+    size_t stack_index = 0;
+
+    /*
+     * Ok, we've figured out the sizes for everything; now do the actual
+     * allocations
+     */
+    for (i = 0; i<levels; i++) {
+        struct merkle_level *tree = malloc( sizeof *tree );
+        if (!tree) {
+            hss_free_working_key(w);
+            info->error_code = hss_error_out_of_memory;
+            return 0;
+        }
+        unsigned h0 = level_height[i];
+        tree->level = h0;
+        tree->h = level_hash[i];
+        tree->hash_size = hash_size[i];
+        tree->lm_type = lm_type[i];
+        tree->lm_ots_type = lm_ots_type[i];
+        /* We'll initialize current_index from the private key */
+        tree->max_index = (1L << tree->level) - 1;
+        tree->sublevels = subtree_levels[i];
+        tree->subtree_size = subtree_size[i];
+        unsigned top_subtree_size = h0 - (subtree_levels[i]-1)*subtree_size[i];
+        tree->top_subtree_size = top_subtree_size;
+
+        unsigned k;
+        for (j=0; j<MAX_SUBLEVELS; j++)
+            for (k=0; k<NUM_SUBTREE; k++)
+                tree->subtree[j][k] = NULL;
+        w->tree[i] = tree;
+
+        unsigned subtree_level = 0;
+        unsigned levels_below = h0;
+        for (j=0; j<subtree_levels[i]; j++) {
+            /* The height of the subtrees at this level  */
+            unsigned height = (j == 0) ? top_subtree_size : subtree_size[i];
+            levels_below -= height;
+
+            for (k=0; k<NUM_SUBTREE; k++) {
+                /* The root subtree doesn't get a 'building subtree' */
+                if (j == 0 && k == BUILDING_TREE) continue;
+                /* The subtrees in the topmost tree don't get a */
+                /* 'next subtree' */
+                if (k == NEXT_TREE && i == 0) continue;
+
+                struct subtree *s = malloc( sizeof *s + hash_size[i] *
+                                               (((size_t)2<<height)-1));
+                if (!s) {
+                    hss_free_working_key(w);
+                    info->error_code = hss_error_out_of_memory;
+                    return 0;
+                }
+
+                s->level = subtree_level;
+                s->levels_below = levels_below;
+                tree->subtree[j][k] = s;
+                if (k == ACTIVE_TREE) {
+                    /* Active trees don't need no stack */
+                    s->stack = NULL;
+                } else if (levels_below == 0) {
+                    /* Bottom level subtrees don't need no stack */
+                    s->stack = NULL;
+                } else {
+                    s->stack = &stack[stack_index];
+                    stack_index += hash_size[i] * levels_below;
+                }
+            }
+
+            subtree_level += height;
+        }
+    }
+
+/* SANITY CHECK */
+    if (stack_index != stack_usage) {
+        hss_free_working_key(w);
+        info->error_code = hss_error_internal;
+        return 0;
+    }
+/* SANITY CHECK */
+
+    /* Compute the max number of signatures we can generate */
+    if (total_height > 64) total_height = 64; /* (bounded by 2**64) */
+    w->max_count = ((sequence_t)2 << (total_height-1)) - 1; /* height-1 so */
+            /* we don't try to shift by 64, and hit undefined behavior */
+
+        /* We use the count 0xffff..ffff to signify 'we've used up all our */
+        /* signatures'.  Make sure that is above max_count, even for */
+        /* parameter sets that can literally generate 2**64 signatures (by */
+        /* letting them generate only 2**64-1) */
+    if (total_height == 64) w->max_count--;
+
+    return w;
+}
+
+void hss_free_working_key(struct hss_working_key *w) {
+    int i;
+    if (!w) return;
+    for (i=0; i<MAX_HSS_LEVELS; i++) {
+        struct merkle_level *tree = w->tree[i];
+        if (tree) {
+            unsigned j, k;
+            for (j=0; j<MAX_SUBLEVELS; j++)
+                for (k=0; k<3; k++)
+                    free(tree->subtree[j][k]); // IGNORE free-check
+            hss_zeroize( tree, sizeof *tree ); /* We have seeds here */
+        }
+        free(tree); // IGNORE free-check
+    }
+    for (i=0; i<MAX_HSS_LEVELS-1; i++) {
+        free(w->signed_pk[i]); // IGNORE free-check
+    }
+    free(w->stack); // IGNORE free-check
+    hss_zeroize( w, sizeof *w ); /* We have secret information here */
+    free(w); // IGNORE free-check
+}
diff --git a/src/sig_stfl/lms/external/hss_aux.c b/src/sig_stfl/lms/external/hss_aux.c
new file mode 100644
index 0000000000..5817b76c81
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_aux.c
@@ -0,0 +1,355 @@
+/*
+ * This is the implementation of the aux data within the HSS tree
+ */
+
+#include <string.h>
+#include "hss_aux.h"
+#include "hss_internal.h"
+#include "common_defs.h"
+#include "lm_common.h"
+#include "endian.h"
+#include "hash.h"
+#include "hss_zeroize.h"
+
+/*
+ * The structure of aux data
+ *
+ * The current format of the file is:
+ * [4 bytes of marker]:
+ *  - bit 31 is set (to indicate that the aux data is nonempty; a 0 first byte
+ *    indicates that, yes, we have no bananas); because we store the marker
+ *    in bigendian format, this bit 31 is in the first byte.
+ *  - bit i is set if we have the hashes for intermediate level i
+ * For each set bit i (in ascending sequence):
+ *   - 1<<i hashes (which are the node values for level i of the top level
+ *     Merkle tree)
+ * Finally, an HMAC for the entire file (except for the HMAC); the key for the
+ * HMAC is derived from the master seed
+ */
+#define AUX_DATA_MARKER 0   /* The marker for the aux data; either the aux */
+                            /* level we're saving (4 bytes; first byte */
+                            /* nonezero), or NO_AUX_DATA if we're not */
+                            /* using it */
+#define NO_AUX_DATA  0x00
+#define AUX_DATA_HASHES 4   /* The actual hashes start here */
+
+static void compute_seed_derive( unsigned char *result, unsigned hash,
+     const unsigned char *seed, union hash_context *ctx);
+static void compute_hmac( unsigned char *dest,
+                          unsigned hash, unsigned size_hash,
+                          union hash_context *ctx,
+                          unsigned char *key,
+                          const unsigned char *data, size_t len_data);
+
+/*
+ * This computes the optimal aux level (which is a bitmap of the levels we save
+ * in the aux data, plus a set msbit if we save anything), given the bound on
+ * the size of the aux data we can support.
+ *
+ * At the current time, the top level tree always has a subtree size which is
+ * a simple function of the tree height (unless we're dealing with an HSS
+ * height of 1, in which case it'll always be a multiple of that value); hence
+ * the nodes we need to recompute the subtrees are at the levels 0 mod n from
+ * the bottom.  So, we store as many of those levels as can fit; the highest
+ * levels give us the most savings (and are the cheapest to store), so we
+ * start there
+ */
+aux_level_t hss_optimal_aux_level( size_t max_length,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type,
+                   size_t *actual_len ) {
+    unsigned h0;  /* The height of the root tree */
+    unsigned size_hash;  /* The size of each hash that would appear in the */
+                  /* aux data */
+    LMS_UNUSED(lm_ots_type);
+    if (!lm_look_up_parameter_set(lm_type[0], NULL, &size_hash, &h0)) {
+        /* Unrecognized parameter set */
+        return 0;
+    }
+
+    if (max_length < AUX_DATA_HASHES + size_hash) {
+        /* Not enough room for even the marker and the MAC (not count the */
+        /* actual data we'd want to store) */
+        if (actual_len) *actual_len = 1;
+        return 0;
+    }
+    size_t orig_max_length = max_length;
+    max_length -= AUX_DATA_HASHES + size_hash;
+
+    aux_level_t aux_level = 0;
+    unsigned subtree_size = hss_smallest_subtree_size(h0, 0, size_hash);
+    unsigned level = h0 % subtree_size; /* This is the level of the base of the */
+                                 /* topmost subtree */
+    if (level == 0) level = subtree_size;   /* No point in saving the root */
+
+    /* Step through the levels, see what will fit */
+    for (; level < h0; level += subtree_size) {
+        size_t len_this_level = (size_t)size_hash<<level;
+        if (max_length >= len_this_level) {
+            /* This level fits; add it */
+            max_length -= len_this_level;
+            /* We also set the MSBit to signify that we're saving something */
+            aux_level |= 0x80000000UL | ((aux_level_t)1<<level);
+        } else {
+            /* This level doesn't fit; no larger one would fit either */
+            break;
+        }
+    }
+
+    if (actual_len) *actual_len = orig_max_length - max_length;
+
+    return aux_level;
+}
+
+/*
+ * This compares the n bytes at l'a' and at 'b' in time (hopefully) independent
+ * for what's in 'a' and 'b'
+ * If they are the same, it returns 0 - if not, it returns some other value
+ * I wrote it this way to make it less likely that a clever compiler would
+ * be able to 'optimize' this into something nonconstant time (in part, because
+ * while we know 'sum' will never wrap, the compiler is likely not able to
+ * verify it)
+ */
+static unsigned memcmp_consttime( const void *a, const void *b, size_t n ) {
+    unsigned sum = 0;
+    const unsigned char *p = a;
+    const unsigned char *q = b;
+    while (n--) {
+	sum += *p++ ^ *q++;
+    }
+    return sum;
+}
+
+/*
+ * This takes a saved aux data, and initializes an array of pointers into it
+ * If a working key is provided, it'll also authenticate the data within the
+ * structure
+ */
+struct expanded_aux_data *hss_expand_aux_data( const unsigned char *aux_data,
+                   size_t len_aux_data,
+                   struct expanded_aux_data *temp, unsigned size_hash,
+                   struct hss_working_key *w ) {
+    /* Check if we really have any aux data */
+    if (!aux_data || aux_data[AUX_DATA_MARKER] == NO_AUX_DATA) return 0;
+
+    const unsigned char *orig_aux_data = aux_data;
+    unsigned long aux_level = get_bigendian( aux_data, 4 );
+    aux_data += 4;
+    aux_level &= 0x7ffffffffL;  /* Turn off the 'used' marker */
+
+    unsigned h;
+    for (h = 0; h <= MAX_MERKLE_HEIGHT; h++, aux_level >>= 1) {
+        if (aux_level & 1) {
+            temp->data[h] = (void *)aux_data;
+            aux_data += (size_t)size_hash << h;
+        } else {
+            temp->data[h] = 0;   /* No data at this level */
+        }
+    }
+
+    /* Now, check if the data is valid */
+    if (w) {
+        /* Check to see if the data is valid */
+        size_t expected_len = (aux_data - orig_aux_data) + size_hash;
+        if (expected_len > len_aux_data) {
+            /* Either the first 4 bytes were messed up, or the file was */
+            /* truncated */
+            return 0;
+        }
+        if (len_aux_data < 4 + size_hash) return 0;
+
+        /* Now, MAC the entire aux file */
+        union hash_context ctx;
+        unsigned char key[ MAX_HASH ];
+        compute_seed_derive( key, w->tree[0]->h, w->working_key_seed, &ctx );
+        unsigned char expected_mac[ MAX_HASH ];
+        compute_hmac( expected_mac, w->tree[0]->h, size_hash, &ctx, key,
+                          orig_aux_data, aux_data - orig_aux_data );
+        hss_zeroize( key, size_hash );
+        hss_zeroize( &ctx, sizeof ctx );
+        if (0 != memcmp_consttime( expected_mac, aux_data, size_hash)) {
+            /* The MAC did not agree; ignore the aux data */
+            return 0;
+        }
+    }
+    return temp;
+}
+
+/*
+ * This returns the amount of aux data we would use, given the maximum bound
+ * on how much aux data we are allowed, and the parameter sets
+ */
+size_t hss_get_aux_data_len(size_t max_length,
+                   unsigned levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type) {
+    size_t len = 0;
+    LMS_UNUSED(levels);
+    if (!hss_optimal_aux_level( max_length, lm_type, lm_ots_type, &len )) {
+        return 1;  /* 1 byte marker to say 'we're not using it */
+    }
+
+    return len;
+}
+
+/*
+ * Save the marker within the aux data
+ */
+void hss_store_aux_marker( unsigned char *aux_data, aux_level_t aux_level ) {
+    if (aux_level == 0) {
+         /* Aux data doesn't help; mark it as unused */
+         aux_data[AUX_DATA_MARKER] = NO_AUX_DATA;
+    } else {
+         put_bigendian( &aux_data[AUX_DATA_MARKER], aux_level, 4 );
+    }
+}
+
+/*
+ * This is called while we are building the initial top level Merkle tree (to
+ * compute the root).  This is called for each internal node, and allows the
+ * aux data a chance to save the intermediate value
+ */
+void hss_save_aux_data( struct expanded_aux_data *data, unsigned level,
+                        unsigned size_hash, merkle_index_t q,
+                        const unsigned char *cur_val ) {
+    if (!data) return;     /* We're not recording anything */
+    if (!data->data[level]) return;  /* We're not recording anything for */
+                           /* this level */
+
+    /* We are recording it; save a copy in the aux data */
+    memcpy( data->data[level] + size_hash * q, cur_val, size_hash );
+}
+
+/*
+ * This generates the derived value that we'll use as a key the authenticate
+ * the aux data.  We pass the ctx (rather than using a local one) so we have
+ * one less thing to zeroize
+ *
+ * We use a derived key (rather than using the seed directly) because the
+ * outer hash within the HMAC don't use the diversification factors that every
+ * other hash within this packet does; hence for HMAC, we use a key that
+ * is independent of every other hash used
+ */
+static void compute_seed_derive( unsigned char *result, unsigned hash,
+     const unsigned char *seed, union hash_context *ctx) {
+    hss_init_hash_context( hash, ctx );
+    unsigned char prefix[ DAUX_PREFIX_LEN ];
+    memset( prefix, 0, DAUX_D );
+    SET_D( prefix + DAUX_D, D_DAUX );
+    hss_update_hash_context( hash, ctx, prefix, sizeof prefix );
+    hss_update_hash_context( hash, ctx, seed, SEED_LEN );
+    hss_finalize_hash_context( hash, ctx, result );
+
+    hss_zeroize( &ctx, sizeof ctx );
+}
+
+static void xor_key( unsigned char *key, unsigned xor_val, unsigned len_key) {
+    unsigned i;
+    for (i = 0; i<len_key; i++) {
+        key[i] ^= xor_val;
+    }
+}
+
+#define IPAD 0x36
+#define OPAD 0x5c
+
+/*
+ * This computes the hmac; it assumes that the key is size_hash bytes
+ * long (and while it does modify it during processing, it restores
+ * it at the end)
+ * This can obviously be optimized; however, this is not performance critical,
+ * so we keep it simple
+ */
+static void compute_hmac( unsigned char *dest,
+                          unsigned hash, unsigned size_hash,
+                          union hash_context *ctx,
+                          unsigned char *key,
+                          const unsigned char *data, size_t len_data) {
+    unsigned block_size = hss_hash_blocksize(hash);
+
+    /* Step 1: first phase of the HMAC */
+    hss_init_hash_context( hash, ctx );
+    xor_key( key, IPAD, size_hash );
+    hss_update_hash_context( hash, ctx, key, size_hash );
+    unsigned j;
+    for (j = size_hash; j<block_size; j++) {
+         const unsigned char ipad = IPAD;
+         hss_update_hash_context( hash, ctx, &ipad, 1 );
+    }
+    hss_update_hash_context( hash, ctx, data, len_data );
+
+    hss_finalize_hash_context( hash, ctx, dest );  /* We place the */
+               /* intermediate MAC result where the final result will go */
+
+    /* Step 2: second phase of the HMAC */
+    hss_init_hash_context( hash, ctx );
+    xor_key( key, IPAD^OPAD, size_hash );
+    hss_update_hash_context( hash, ctx, key, size_hash );
+    for (j = size_hash; j<block_size; j++) {
+         const unsigned char opad = OPAD;
+         hss_update_hash_context( hash, ctx, &opad, 1 );
+    }
+    hss_update_hash_context( hash, ctx, dest, size_hash );
+    hss_finalize_hash_context( hash, ctx, dest );  /* This time it's the */
+                   /* actual MAC result */
+
+    /* Step 3: clean up for the next round */
+    xor_key( key, OPAD, size_hash );
+}
+
+/*
+ * This is called when we're done computing the aux data; this generates the
+ * authentication code that goes with each level
+ */
+void hss_finalize_aux_data( struct expanded_aux_data *data,
+                            unsigned size_hash, unsigned hash,
+                            const unsigned char *seed) {
+    if (!data) return;     /* We're not recording anything */
+
+    union hash_context ctx;
+
+    /* Generate the key we'll use to authenticate the data */
+    unsigned char aux_seed[ MAX_HASH ];
+    compute_seed_derive( aux_seed, hash, seed, &ctx );
+
+    unsigned char *aux = 0;
+    size_t total_length = 4;
+    unsigned i;
+    for (i = 0; i<MAX_MERKLE_HEIGHT+1; i++) {
+        if (data->data[i]) {
+            total_length += (size_t)size_hash << i;
+            if (!aux) {
+                aux = data->data[i] - 4;
+            }
+        }
+    }
+    if (aux) {
+        compute_hmac( aux+total_length, hash, size_hash, &ctx, aux_seed,
+                      aux, total_length );
+    }
+
+    hss_zeroize( &ctx, sizeof ctx );
+    hss_zeroize( aux_seed, size_hash );
+}
+
+/*
+ * This is called when we need to use aux data; it checks to see if we've
+ * stored the nodes within the aux data; if we have, it extracts them,
+ * and returns true
+ */
+bool hss_extract_aux_data(const struct expanded_aux_data *aux, unsigned level,
+            const struct hss_working_key *w, unsigned char *dest,
+            merkle_index_t node_offset,    /* Offset of node on this level */
+            merkle_index_t node_count) {   /* # of nodes to restore */
+    if (!aux) return false;              /* No aux data */
+    if (!aux->data[level]) return false; /* We don't have that specific */
+                                     /* level saved */
+    unsigned hash_size = w->tree[0]->hash_size;
+
+    /* We do have the data; copy it to the destination */
+    memcpy( dest,
+            aux->data[level] + node_offset*hash_size,
+            node_count * hash_size );
+
+    return true;
+}
diff --git a/src/sig_stfl/lms/external/hss_aux.h b/src/sig_stfl/lms/external/hss_aux.h
new file mode 100644
index 0000000000..634df88684
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_aux.h
@@ -0,0 +1,59 @@
+#if !defined( HSS_AUX_H_ )
+#define HSS_AUX_H_
+
+/*
+ * This is the internal API to the subsystem that deals with aux data
+ * This should not be included by files outside this subsystem
+ */
+
+#include "common_defs.h"
+#include <stdlib.h>
+#include <stdint.h>
+
+struct hss_working_key;
+
+/* This is a bitmap that lists which aux levels we have */
+typedef uint_fast32_t aux_level_t;
+
+/* This is the expanded version of the aux data */
+struct expanded_aux_data {
+    unsigned char *data[ MAX_MERKLE_HEIGHT+1 ];
+};
+
+/*
+ * These are some internal routines that handle aux data
+ */
+/* Internal function used to compute the optimal aux level */
+aux_level_t hss_optimal_aux_level( size_t max_length,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type,
+                   size_t *actual_len );
+
+/* Generate pointers into a saved aux data */
+/* If w is provided, we do sanity checking on the data within aux_data */
+struct expanded_aux_data *hss_expand_aux_data( const unsigned char *aux_data,
+                   size_t len_aux_data,
+                   struct expanded_aux_data *temp, unsigned size_hash,
+                   struct hss_working_key *w );
+
+/*
+ * Save the marker within the aux data
+ */
+void hss_store_aux_marker( unsigned char *aux_data, aux_level_t aux_level );
+
+/* Save an intermediate node */
+void hss_save_aux_data( struct expanded_aux_data *data, unsigned level,
+                        unsigned size_hash, merkle_index_t q,
+                        const unsigned char *cur_val );
+
+/* Do the final touches on the aux data */
+void hss_finalize_aux_data(struct expanded_aux_data *data,
+                           unsigned size_hash, unsigned hash,
+                           const unsigned char *seed);
+
+/* Get a set of intermediate nodes from the aux data */
+bool hss_extract_aux_data(const struct expanded_aux_data *aux, unsigned level,
+            const struct hss_working_key *w, unsigned char *dest,
+            merkle_index_t node_offset, merkle_index_t node_count);
+
+#endif /* HSS_AUX_H_ */
diff --git a/src/sig_stfl/lms/external/hss_common.c b/src/sig_stfl/lms/external/hss_common.c
new file mode 100644
index 0000000000..d07261dd26
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_common.c
@@ -0,0 +1,48 @@
+/*
+ * This is the code that is common between an HSS verifier, and a full HSS
+ * implementation that both signs and verifies
+ */
+#include <stdint.h>
+#include "common_defs.h"
+#include "hss_common.h"
+#include "lm_common.h"
+#include "config.h"
+/*
+ * Get the length of the public key, given this particular parameter set
+ */
+size_t hss_get_public_key_len(unsigned levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type) {
+    LMS_UNUSED(lm_ots_type);
+    if (levels < MIN_HSS_LEVELS || levels > MAX_HSS_LEVELS) return 0;
+
+    size_t first_pubkey = lm_get_public_key_len(lm_type[0]);
+    if (first_pubkey == 0) return 0;
+
+    return 4 + first_pubkey;
+}
+
+/*
+ * Get the length of a signature, given this particular parameter set
+ */
+size_t hss_get_signature_len(unsigned levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type) {
+    if (levels < MIN_HSS_LEVELS || levels > MAX_HSS_LEVELS) return 0;
+
+    unsigned i;
+    size_t tot_len = 4;
+    for (i=0; i<levels; i++) {
+        size_t next_sig_len = lm_get_signature_len(lm_type[i], lm_ots_type[i]);
+        if (next_sig_len == 0) return 0;
+        tot_len += next_sig_len;
+
+        /* We also include the public keys for levels > 0 */
+        if (i > 0) {
+            size_t next_pub_len = lm_get_public_key_len(lm_type[i]);
+            if (next_pub_len == 0) return 0;
+            tot_len += next_pub_len;
+        }
+    }
+    return tot_len;
+}
diff --git a/src/sig_stfl/lms/external/hss_common.h b/src/sig_stfl/lms/external/hss_common.h
new file mode 100644
index 0000000000..c455b9af5e
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_common.h
@@ -0,0 +1,22 @@
+#if !defined( HSS_COMMON_H_ )
+#define HSS_COMMON_H_
+
+#include <stdlib.h>
+#include "common_defs.h"
+
+/*
+ * This returns the length of the public key for the given parameter set
+ */
+size_t hss_get_public_key_len(unsigned levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type);
+#define HSS_MAX_PUBLIC_KEY_LEN (4 + 8 + ((I_LEN+3) & ~3) + MAX_HASH)
+
+/*
+ * This returns the length of the signature for the given parameter set
+ */
+size_t hss_get_signature_len(unsigned levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type);
+
+#endif /* HSS_COMMON_H_ */
diff --git a/src/sig_stfl/lms/external/hss_compute.c b/src/sig_stfl/lms/external/hss_compute.c
new file mode 100644
index 0000000000..353ec939fb
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_compute.c
@@ -0,0 +1,174 @@
+/*
+ * This includes some computation methods that are shared between different
+ * subsystems of the HSS signature package
+ */
+
+#include <string.h>
+#include "hss_internal.h"
+#include "hss.h"
+#include "hash.h"
+#include "hss_thread.h"
+#include "lm_ots_common.h"
+#include "lm_ots.h"
+#include "endian.h"
+#include "hss_derive.h"
+
+/* Count the number of 1 bits at the end (lsbits) of the integer */
+/* Do it in the obvious way; straightline code may be faster (no */
+/* unpredictable jumps, which are costly), but that would be less scrutable */
+/* (and this code is "fast enough") */
+static int trailing_1_bits(merkle_index_t n) {
+    int i;
+    for (i=0; n&1; n>>=1, i++)
+        ;
+    return i;
+}
+
+/*
+ * Compute the value of an internal node within a Merkle tree
+ */
+static enum hss_error_code hss_compute_internal_node( unsigned char *dest,
+                            merkle_index_t node_num,
+                            const unsigned char *seed,
+                            param_set_t lm_type,
+                            param_set_t lm_ots_type,
+                            unsigned h,
+                            unsigned leaf_level,
+                            const unsigned char *I) {
+    unsigned hash_size = hss_hash_length(h);
+
+    /* We're store intermediate nodes here */
+    unsigned char stack[ MAX_HASH * MAX_MERKLE_HEIGHT];
+
+    merkle_index_t tree_size = (merkle_index_t)1 << leaf_level;
+    merkle_index_t r = node_num;
+    int levels_to_bottom = 0;
+    if (r == 0) return hss_error_internal;  /* So no to infinite loops */
+    while (r < tree_size) {
+        r <<= 1;
+        levels_to_bottom++;
+    }
+    merkle_index_t q = r - tree_size;
+
+    merkle_index_t i;
+    unsigned ots_len = lm_ots_get_public_key_len(lm_ots_type);
+    unsigned char pub_key[ LEAF_MAX_LEN ];
+    memcpy( pub_key + LEAF_I, I, I_LEN );
+    SET_D( pub_key + LEAF_D, D_LEAF );
+
+    struct seed_derive derive;
+    if (!hss_seed_derive_init( &derive, lm_type, lm_ots_type,
+                               I, seed)) {
+        return hss_error_bad_param_set;
+    }
+
+    for (i=0;; i++, r++, q++) {
+        /* Generate the next OTS public key */
+        hss_seed_derive_set_q( &derive, q );
+        if (!lm_ots_generate_public_key(lm_ots_type, I,
+                   q, &derive, pub_key + LEAF_PK, ots_len)) {
+            return hss_error_bad_param_set; /* The only reason the above */
+                                            /* could fail */
+        }
+
+        /*
+         * For the subtree which this leaf node forms the final piece, put the
+         * destination to where we'll want it, either on the stack, or if this
+         * is the final piece, to where the caller specified
+         */
+        unsigned char *current_buf;
+        int stack_offset = trailing_1_bits( i );
+        if (stack_offset == levels_to_bottom) {
+            current_buf = dest;
+        } else {
+            current_buf = &stack[stack_offset * hash_size ];
+        }
+
+        /* Hash it to form the leaf node */
+        put_bigendian( pub_key + LEAF_R, r, 4);
+        union hash_context ctx;
+        hss_hash_ctx( current_buf, h, &ctx, pub_key, LEAF_LEN(hash_size) );
+
+        /* Work up the stack, combining right nodes with the left nodes */
+        /* that we've already computed */
+        int sp;
+        for (sp = 1; sp <= stack_offset; sp++) {
+            hss_combine_internal_nodes( current_buf,
+                            &stack[(sp-1) * hash_size], current_buf,
+                            h, I, hash_size,
+                            r >> sp );
+        }
+
+        /* We're not at a left branch, or at the target node */
+
+        /* Because we've set current_buf to point to where we want to place */
+        /* the result of this loop, we don't need to memcpy it */
+
+        /* Check if this was the last leaf (and so we've just computed the */
+        /* target node) */
+        if (stack_offset == levels_to_bottom) {
+            /* We're at the target node; the node we were asked to compute */
+            /* We've already placed the value into dest, so we're all done */
+            break;
+        }
+    }
+
+    hss_seed_derive_done( &derive );
+
+    return hss_error_none;
+}
+
+/*
+ * Combine adjacent left and right nodes within the Merkle tree
+ * together
+ */
+void hss_combine_internal_nodes( unsigned char *dest,
+        const unsigned char *left_node, const unsigned char *right_node,
+        int h, const unsigned char *I, unsigned hash_size,
+        merkle_index_t node_num) {
+    unsigned char hash_val[ INTR_MAX_LEN ];
+    memcpy( hash_val + INTR_I, I, I_LEN );
+    put_bigendian( hash_val + INTR_R, node_num, 4 );
+    SET_D( hash_val + INTR_D, D_INTR );
+
+    memcpy( hash_val + INTR_PK,             left_node,  hash_size );
+    memcpy( hash_val + INTR_PK + hash_size, right_node, hash_size );
+    union hash_context ctx;
+    hss_hash_ctx( dest, h, &ctx, hash_val, INTR_LEN(hash_size) );
+}
+
+/*
+ * This computes an array of intermediate Merkle nodes given by data
+ * This may be run in a worker (non-main) thread
+ */
+void hss_gen_intermediate_tree(const void *data,
+                               struct thread_collection *col) {
+    const struct intermed_tree_detail *d = data;
+    unsigned hash_len = hss_hash_length(d->h);
+    unsigned i;
+
+    for (i=0; i<d->node_count; i++) {
+        unsigned char result[ MAX_HASH ];
+        enum hss_error_code status = hss_compute_internal_node( result,
+                            d->node_num + i,
+                            d->seed,
+                            d->lm_type,
+                            d->lm_ots_type,
+                            d->h,
+                            d->tree_height,
+                            d->I);
+
+        /* Report the results */
+        hss_thread_before_write(col);
+        if (status == hss_error_none) {
+            /* Copy out the resulting hash */
+            memcpy( d->dest + i*hash_len, result, hash_len );
+        } else {
+            /* Something went wrong; report the bad news */
+            *d->got_error = status;
+            hss_thread_after_write(col);  /* No point in working more */
+            return;
+        }
+        hss_thread_after_write(col);
+    }
+}
diff --git a/src/sig_stfl/lms/external/hss_derive.c b/src/sig_stfl/lms/external/hss_derive.c
new file mode 100644
index 0000000000..fc8833594a
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_derive.c
@@ -0,0 +1,325 @@
+/*
+ * This is the file that contains the routines that generate various 'random'
+ * values from the master seed.
+ *
+ * Values generated by this routine:
+ * - OTS private keys
+ * - Message randomizers (the random value we hash with the message when we
+ *   sign it)
+ * - I values
+ * - SEED values (which are the secret to derive all the above for a specific
+ *   LMS tree)
+ *
+ * We do things determanisticly, rather than picking things from random, so
+ * that if we reload from scratch, the values we use after the reload are
+ * consistent with what we used previously
+ *
+ * This provides several different possible derivation methods; they can be
+ * selected by setting SECRET_METHOD in config.h
+ */
+#include <string.h>
+#include "hss_derive.h"
+#include "hss_internal.h"
+#include "hash.h"
+#include "endian.h"
+#include "config.h"
+
+#if SECRET_METHOD == 2
+       /* We use a hash function based on the parameter set */
+#include "lm_common.h"  /* To get the prototype for the parameter set -> */
+                        /* hash function mapping */
+#else
+#if SEED_LEN == 32
+#define HASH HASH_SHA256  /* We always use SHA-256 to derive seeds */
+#else
+#error We need to define a hash function for this seed length
+#endif
+#endif
+
+#if SECRET_METHOD == 0 || SECRET_METHOD == 2
+/*
+ * This is the method of deriving LM-OTS keys that conforms to the
+ * Appendix A method
+ * As you can see, it's fairly simple
+ */
+
+/* This creates a seed derivation object */
+bool hss_seed_derive_init( struct seed_derive *derive,
+                 param_set_t lm, param_set_t ots,
+                 const unsigned char *I, const unsigned char *seed ) {
+    derive->I = I;
+    derive->master_seed = seed;
+    LMS_UNUSED(ots);
+    /* q, j will be set later */
+#if SECRET_METHOD == 2
+    /* Grab the hash function to use */
+    if (!lm_look_up_parameter_set(lm, &derive->hash, &derive->m, 0)) {
+        return false;
+    }
+
+    /* Note: currently, this assumes that the hash length is always 256 */
+    /* bits; error out if that isn't the case */
+    if (derive->m != SEED_LEN) {
+        return false;
+    }
+#endif
+
+    return true;
+}
+
+/* This sets the internal 'q' value for seed derivation object */
+void hss_seed_derive_set_q( struct seed_derive *derive, merkle_index_t q ) {
+    derive->q = q;
+}
+
+/* This sets the internal 'j' value for seed derivation object */
+void hss_seed_derive_set_j( struct seed_derive *derive, unsigned j ) {
+    derive->j = j;
+}
+
+
+/* This derives the current seed value.  If increment_j is set, it'll then */
+/* reset the object to the next j value */
+void hss_seed_derive( unsigned char *seed, struct seed_derive *derive,
+                 bool increment_j ) {
+    unsigned char buffer[ PRG_MAX_LEN ];
+    memcpy( buffer + PRG_I, derive->I, I_LEN );
+    put_bigendian( buffer + PRG_Q, derive->q, 4 );
+    put_bigendian( buffer + PRG_J, derive->j, 2 );
+    buffer[PRG_FF] = 0xff;
+    memcpy( buffer + PRG_SEED, derive->master_seed, SEED_LEN );
+
+#if SECRET_METHOD == 2
+    int hash = derive->hash;    /* Our the parameter set's hash function */
+#else
+    int hash = HASH;            /* Use our standard one */
+#endif
+
+    hss_hash( seed, hash, buffer, PRG_LEN(SEED_LEN) );
+
+    hss_zeroize( buffer, PRG_LEN(SEED_LEN) );
+
+    if (increment_j) derive->j += 1;
+}
+
+/* This is called when we're done with a seed derivation object */
+void hss_seed_derive_done( struct seed_derive *derive ) {
+    /* No secrets here */
+    LMS_UNUSED(derive);
+}
+
+#elif SECRET_METHOD == 1
+/*
+ * This is a method of deriving LM-OTS keys that tries to be more
+ * side-channel resistant; in particular, we never include any
+ * specific secret value in more than 2**SECRET_MAX distinct
+ * hashes.
+ * We do this by deriving subseeds using a tree-based structure;
+ * each node in the tree has up to 2**SECRET_MAX children, and we use any
+ * seed within the node (including the root) in no other hash.
+ * We actually have two levels of trees; one based on q (Merkle tree index),
+ * the other based on j (Winternitz digit); we could design a single level
+ * tree that could incorporate both, but it'd be more complex
+ *
+ * Much of the complexity that does exist is there to avoid recomputation
+ */
+#include "lm_common.h"
+#include "lm_ots_common.h"
+static unsigned my_log2(merkle_index_t n);
+
+/* This creates a seed derivation object */
+bool hss_seed_derive_init( struct seed_derive *derive,
+                 param_set_t lm, param_set_t ots,
+                 const unsigned char *I, const unsigned char *seed ) {
+    derive->I = I;
+    derive->master_seed = seed;
+
+    /* These parameter sets will define the size of the trees we'll use */
+    unsigned height, p;
+    if (!lm_look_up_parameter_set(lm, 0, 0, &height) ||
+        !lm_ots_look_up_parameter_set(ots, 0, 0, 0, &p, 0)) {
+        return false;
+    }
+
+    p += NUM_ARTIFICIAL_SEEDS; /* We use one artifical value for the */
+         /* randomizer and two artificial values to generate seed, I */
+         /* for child trees */
+
+    /* Compute the number of r-levels we have */
+    derive->q_levels = (height + SECRET_MAX - 1)/SECRET_MAX;
+
+    /* And which bit to set when converting 'q' to 'r' */
+    derive->r_mask = (merkle_index_t)1 << height;
+
+    /* Compute the number of j-levels we have */
+    unsigned j_height = my_log2(p);
+    derive->j_levels = (j_height + SECRET_MAX - 1)/SECRET_MAX;
+
+    /* And which bit to set when writing q values into the hash */
+    derive->j_mask = 1 << j_height;
+
+    /* We reset the current 'q' value to am impossible value; we do this so */
+    /* that the initial 'q' value given to use by the application will */
+    /* rebuild the entire path through the tree */
+    derive->q = derive->r_mask;
+
+    return true;
+}
+
+/* This sets the internal 'q' value for seed derivation object */
+/* This also updates our internal q-path (the q_index/q_seed arrays) */
+/* to reflect the new 'q' value, while minimizing the number of hashes */
+/* done (by reusing as much of the previous path as possible) */
+void hss_seed_derive_set_q( struct seed_derive *derive, merkle_index_t q ) {
+    merkle_index_t change = q ^ derive->q;
+    derive->q = q;
+    unsigned bits_change = my_log2(change);
+    unsigned q_levels = derive->q_levels;
+
+        /* levels_change will be the number of levels of the q-tree we'll */
+        /* need to recompute */
+    unsigned levels_change = (bits_change + SECRET_MAX - 1) / SECRET_MAX;
+    if (levels_change > q_levels) levels_change = q_levels;
+
+    int i;
+    union hash_context ctx;
+    unsigned char buffer[ QTREE_MAX_LEN ];
+    merkle_index_t r = q | derive->r_mask;
+
+    for (i = levels_change; i > 0; i--) {
+        int j = q_levels - i;
+        int shift = (i-1) * SECRET_MAX;
+
+        memcpy( buffer + QTREE_I, derive->I, I_LEN );
+        put_bigendian( buffer + QTREE_Q, r >> shift, 4 );
+        SET_D( buffer + QTREE_D, D_QTREE );
+        if (j == 0) {
+            memcpy( buffer + QTREE_SEED, derive->master_seed, SEED_LEN );
+        } else {
+            memcpy( buffer + QTREE_SEED, derive->q_seed[j-1], SEED_LEN );
+        }
+
+        hss_hash_ctx( derive->q_seed[j], HASH, &ctx, buffer, QTREE_LEN );
+    }
+
+    hss_zeroize( buffer, PRG_LEN(SEED_LEN) );
+    hss_zeroize( &ctx, sizeof ctx );
+}
+
+/* Helper function to recompute the j_seed[i] value, based on the */
+/* j_value[i] already set */
+/* ctx, buffer are passed are areas this function can use; we reuse those */
+/* areas so we need to zeroize those buffers only once */
+static void set_j_seed( struct seed_derive *derive, int i,
+                        union hash_context *ctx, unsigned char *buffer) {
+
+    memcpy( buffer + PRG_I, derive->I, I_LEN );
+    put_bigendian( buffer + PRG_Q, derive->q, 4 );
+    put_bigendian( buffer + PRG_J, derive->j_value[i], 2 );
+    buffer[PRG_FF] = 0xff;
+    if (i == 0) {
+        /* The root of this tree; it gets its seed from the bottom level */
+        /* of the q-tree */
+        memcpy( buffer + PRG_SEED, derive->q_seed[ derive->q_levels-1],
+                                                         SEED_LEN );
+    } else {
+        /* Non-root node; it gets its seed from its parent */
+        memcpy( buffer + PRG_SEED, derive->j_seed[i-1], SEED_LEN );
+    }
+
+    hss_hash_ctx( derive->j_seed[i], HASH, ctx, buffer, PRG_LEN(SEED_LEN) );
+}
+
+/* This sets the internal 'j' value for seed derivation object */
+/* This computes the entire path to the 'j' value.  Because this is used */
+/* immediately after resetting the q value, we don't try to reuse the */
+/* previous hashes (as there won't be anything there we could reuse) */
+/* Note that we don't try to take advantage of any preexisting hashes */
+/* in the j_seed array; we don't bother because this function is typically */
+/* used only immediately after a set_q call, and so there aren't any */
+/* hashes we could take advantage of */
+void hss_seed_derive_set_j( struct seed_derive *derive, unsigned j ) {
+    int i;
+    unsigned j_levels = derive->j_levels;
+    unsigned shift = SECRET_MAX * j_levels;
+
+    unsigned j_mask = derive->j_mask;
+    j &= j_mask-1; /* Set the high-order bit; clear any bits above that */
+    j |= j_mask;  /* This ensures that when we do the hashes, that the */
+                  /* prefix for the hashes at two different levels of the */
+                  /* tree are distinct */
+
+    union hash_context ctx;
+    unsigned char buffer[ PRG_MAX_LEN ];
+
+    for (i = 0; i<j_levels; i++ ) {
+        shift -= SECRET_MAX;
+        derive->j_value[i] = (j >> shift);
+        set_j_seed( derive, i, &ctx, buffer );
+    }
+
+    hss_zeroize( &ctx, sizeof ctx );
+    hss_zeroize( buffer, PRG_LEN(SEED_LEN) );
+}
+
+/* This derives the current seed value (actually, we've already computed */
+/* it); we just need to copy it to the buffer) */
+/* If increment_j is set, it'll then reset the object to the next j value */
+/* (which means incrementally computing that path) */
+void hss_seed_derive( unsigned char *seed, struct seed_derive *derive,
+                      bool increment_j ) {
+    memcpy( seed, derive->j_seed[ derive->j_levels - 1], SEED_LEN );
+
+    if (increment_j) {
+        int i;
+
+        /* Update the j_values, and figure out which hashes we'll need */
+        /* to recompute */
+        for (i = derive->j_levels-1;; i--) {
+            unsigned index = derive->j_value[i];
+            index += 1;
+            derive->j_value[i] = index;
+            if (0 != (index & SECRET_MAX_MASK)) {
+                /* The increment didn't cause a carry to the next level; */
+                /* we can stop propogating the increment here (and we */
+                /* also know this is the top level that we need to */
+                /* recompute the hashes */
+                break;
+            }
+            if (i == 0) {
+               /* This is the top level; stop here */
+               break;
+            }
+        }
+
+        /* Recompute the hashes that need updating; we need to do it */
+        /* top-down, as each hash depends on the previous one */
+        union hash_context ctx;
+        unsigned char buffer[ PRG_MAX_LEN ];
+        for (; i < derive->j_levels; i++) {
+            set_j_seed( derive, i, &ctx, buffer );
+        }
+        hss_zeroize( &ctx, sizeof ctx );
+        hss_zeroize( buffer, PRG_LEN(SEED_LEN) );
+    }
+}
+
+/* This is called when we're done with a seed derivation object */
+/* This makes sure any secret values are zeroized */
+void hss_seed_derive_done( struct seed_derive *derive ) {
+    /* These values are secret, and should never be leaked */
+    hss_zeroize( derive->q_seed, sizeof derive->q_seed );
+    hss_zeroize( derive->j_seed, sizeof derive->j_seed );
+}
+
+static unsigned my_log2(merkle_index_t n) {
+    unsigned lg;
+    for (lg = 0; n > 0; lg++) n >>= 1;
+    return lg;
+}
+
+#else
+
+#error Unknown secret method
+
+#endif
diff --git a/src/sig_stfl/lms/external/hss_derive.h b/src/sig_stfl/lms/external/hss_derive.h
new file mode 100644
index 0000000000..ee47eb6cfc
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_derive.h
@@ -0,0 +1,74 @@
+#if !defined( HSS_DERIVE_H_ )
+#define HSS_DERIVE_H_
+
+#include "common_defs.h"
+
+#include "config.h"
+
+#if SECRET_MAX > 31
+#error The code is not designed for a SECRET_MAX that high
+#endif
+#define SECRET_MAX_MASK (((merkle_index_t)1 << SECRET_MAX) - 1)
+
+struct seed_derive {
+    const unsigned char *I;
+    const unsigned char *master_seed;
+    merkle_index_t q;
+    unsigned j;
+#if SECRET_METHOD == 2
+    unsigned hash;  /* Hash function to use */
+    unsigned m;     /* Length of hash function */
+#endif
+
+#if SECRET_METHOD == 1
+    unsigned q_levels, j_levels;
+    merkle_index_t r_mask;
+    unsigned j_mask;
+#define MAX_Q_HEIGHT ((MAX_MERKLE_HEIGHT + SECRET_MAX - 1) / SECRET_MAX)
+#define MAX_J_HEIGHT ((                9 + SECRET_MAX - 1) / SECRET_MAX)
+        /* '9' is the number of bits a maximum 'p' can take up */
+
+    unsigned j_value[MAX_J_HEIGHT];  /* these are the values we insert */
+        /* into the hash.  The lower SECRET_MAX bits are which child of */
+        /* the parent it is; the higher bits indicate the parents' */
+        /* identities */
+
+    unsigned char q_seed[MAX_Q_HEIGHT][SEED_LEN];
+    unsigned char j_seed[MAX_Q_HEIGHT][SEED_LEN];
+#endif
+};
+
+bool hss_seed_derive_init( struct seed_derive *derive,
+                 param_set_t lm, param_set_t ots,
+                 const unsigned char *I, const unsigned char *seed );
+
+/* This sets the internal 'q' value */
+/* If we've already have a 'q' value set, it'll try to minimize the number */
+/* of hashes done */
+/* Once you've done that, you'll need to reset the 'h' */
+void hss_seed_derive_set_q( struct seed_derive *derive, merkle_index_t q );
+
+/* This sets the internal 'j' value */
+void hss_seed_derive_set_j( struct seed_derive *derive, unsigned j );
+
+#define NUM_ARTIFICIAL_SEEDS    3  /* 3 seeds are listed below */
+    /* This is the j value used when we're deriving the seed value */
+    /* for child Merkle trees */
+#define SEED_CHILD_SEED         (~1)
+    /* This is the j value used when we're deriving the I value */
+    /* used; either in the context of the parent tree, or of this tree */
+#define SEED_CHILD_I            (SEED_CHILD_SEED + 1)
+    /* This is the j value used when we're asking for the randomizer C */
+    /* for signing a message */
+#define SEED_RANDOMIZER_INDEX   (~2)
+
+/* This generates the current seed.  If increment_j is set, this will set */
+/* up for the next j value */
+void hss_seed_derive( unsigned char *seed, struct seed_derive *derive,
+                      bool increment_j );
+
+/* This needs to be called when we done with a seed_derive */
+/* That structure contains keying data, this makes sure those are cleaned */
+void hss_seed_derive_done( struct seed_derive *derive );
+
+#endif /* HSS_DERIVE_H_ */
diff --git a/src/sig_stfl/lms/external/hss_generate.c b/src/sig_stfl/lms/external/hss_generate.c
new file mode 100644
index 0000000000..b604ab3593
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_generate.c
@@ -0,0 +1,932 @@
+/*
+ * This is the routine that generates the ephemeral ("working") key from the
+ * short private value.  It builds all the various current, building and
+ * next subtrees for the various levels (to at least the extent required
+ * for the current count within the key).
+ *
+ * The code is made considerably more complex because we try to take
+ * advantage of parallelism.  To do this, we explicitly list the parts
+ * of the subtrees we need to build (which is most of the computation), and
+ * have different worker threads build the various parts,
+ *
+ * However, it turns out that this is sometimes insufficient; sometimes,
+ * the work consists of one or two expensive nodes (perhaps the top level
+ * subtree), and a lot of comparatively cheap ones; in this case, we'd have
+ * most of our threads go through the cheap ones quickly, and have one or
+ * two threads working on the expensive one, and everyone will end up waiting
+ * for that.  To mitigate that, we attempt to subdivide the most expensive
+ * requests; instead of having a single thread computing the expensive node,
+ * we may issue four or eight threads to compute the nodes two or three
+ * levels below (and have the main thread do the final computation when
+ * all the threads are completed).
+ *
+ * This works out pretty good; however man does add complexity :-(
+ */
+#include <string.h>
+#include <limits.h>
+#include "hss.h"
+#include "hss_internal.h"
+#include "hss_aux.h"
+#include "hash.h"
+#include "hss_thread.h"
+#include "hss_reserve.h"
+#include "lm_ots_common.h"
+#include "endian.h"
+
+#define DO_FLOATING_POINT 1  /* If clear, we avoid floating point operations */
+    /* You can turn this off for two reasons: */
+    /* - Your platform doesn't implement floating point */
+    /* - Your platform is single threaded (we use floating point to figure */
+    /*   out how to split up tasks between threads; if the same thread */
+    /*   will do all the work, dividing it cleverly doesn't buy anything */
+    /*   (and that's a quite a bit of code that gets eliminated) */
+    /* On the other hand, if you are threaded, you'd really want this if */
+    /* at all possible; without this, one thread ends up doing the bulk of */
+    /* the work, and so we end up going not that much faster than single */
+    /* threaded mode */
+
+/*
+ * This routine assumes that we have filled in the bottom node_count nodes of
+ * the subtree; it tries to compute as many internal nodes as possible
+ */
+static void fill_subtree(const struct merkle_level *tree,
+                         struct subtree *subtree,
+                         merkle_index_t node_count,
+                         const unsigned char *I) {
+    if (node_count <= 1) return; /* If we can't compute any more nodes, */
+                                 /* don't bother trying */
+    unsigned h_subtree = (subtree->level == 0) ? tree->top_subtree_size :
+                                                 tree->subtree_size;
+
+        /* Index into the node array where we're starting */
+    merkle_index_t lower_index = ((merkle_index_t)1 << h_subtree) - 1;
+
+    unsigned hash_size = tree->hash_size;
+
+        /* The node identier (initially of the bottom left node of the */
+        /* subtree */
+    merkle_index_t node_id = (((merkle_index_t)1 << tree->level) +
+                                                       subtree->left_leaf)
+                                    >> subtree->levels_below;
+
+    /* Fill in as many levels of internal nodes as possible */
+    int sublevel;
+    for (sublevel = h_subtree-1; sublevel >= 0; sublevel--) {
+        node_count >>= 1;
+        if (node_count == 0) break;   /* Can't do any more */
+        merkle_index_t prev_lower_index = lower_index;
+        lower_index >>= 1;
+        node_id >>= 1;
+
+        merkle_index_t i;
+        for (i=0; i<node_count; i++) {
+            hss_combine_internal_nodes(
+                        &subtree->nodes[ hash_size *(lower_index + i)],
+                        &subtree->nodes[ hash_size *(prev_lower_index + 2*i)],
+                        &subtree->nodes[ hash_size *(prev_lower_index + 2*i+1)],
+                        tree->h, I, hash_size,
+                        node_id + i);
+        }
+    }
+}
+
+/*
+ * This routine takes the 2**num_level hashes, and computes up num_level's,
+ * returning the value of the top node.  This is sort of like fill_tree,
+ * except that it returns only the top node, not the intermediate ones
+ * One warning: this does modify the passed value of hashes; our current
+ * caller doesn't care about that.
+ */
+static void hash_subtree( unsigned char *dest,
+                          unsigned char *hashes,
+                          unsigned num_level, merkle_index_t node_index,
+                          unsigned hash_size,
+                          int h, const unsigned char *I) {
+
+    /* Combine the nodes to form the tree, until we get to the two top nodes */
+    /* This will overwrite the hashes array; that's OK, because we don't */
+    /* need those anymore */
+    for (; num_level > 1; num_level--) {
+        unsigned i;
+        merkle_index_t this_level_node_index = node_index << (num_level-1);
+        for (i = 0; i < ((unsigned)1<<(num_level-1)); i++) {
+            hss_combine_internal_nodes(
+                        &hashes[ hash_size * i ],
+                        &hashes[ hash_size * (2*i) ],
+                        &hashes[ hash_size * (2*i + 1) ],
+                        h, I, hash_size,
+                        this_level_node_index + i);
+        }
+    }
+
+    /* Combine the top two nodes to form our actual target */
+    hss_combine_internal_nodes(
+                        dest,
+                        &hashes[ 0 ],
+                        &hashes[ hash_size ],
+                        h, I, hash_size,
+                        node_index);
+}
+
+#if DO_FLOATING_POINT
+/*
+ * This structure is a note reminding us that we've decided to split this
+ * init_order into several requests, which can be run on independent threads
+ */
+struct sub_order {
+    unsigned num_hashes;         /* The number of hashes this suborder is */
+                                 /* split up into */
+    unsigned level;              /* Levels deep into the tree we go */
+    merkle_index_t node_num_first_target; /* The node number of the left */
+                                 /* most hash that we're standing in for */
+    unsigned char h[1];          /* The hashes go here; we'll malloc */
+                                 /* enough space to let them fit */
+};
+#endif
+
+/*
+ * This is an internal request to compute the bottom N nodes (starting from the
+ * left) of a subtree (and to contruct the internal nodes that based solely on
+ * those N leaf nodes)
+ */
+struct init_order {
+    const struct merkle_level *tree;
+    struct subtree *subtree;
+    merkle_index_t count_nodes;   /* # of bottom level nodes we need to */
+                                  /* generate */
+    const unsigned char *prev_node; /* For nonbottom subtrees, sometimes one */
+                                  /* of the nodes is the root of the */
+                                  /* next level subtree that we compute in */
+                                  /* its entirety. If so, this is a pointer */
+                                  /* to where we will find the precomputed */
+                                  /* value.  This allows us to avoid */
+                                  /* computing that specific node */
+    merkle_index_t prev_index;    /* This is the index of the */
+                                  /* precomputed node, where 0 is the */
+                                  /* leftmost bottom node of this subtree */
+    char next_tree;               /* If clear, we do this on the current */
+                                  /* tree level (seed, I values); if set, */
+                                  /* we do this on the next */
+    char already_computed_lower;  /* If set, we've already computed the */
+                                  /* lower nodes (and all we need to do is */
+                                  /* fill the upper); no need to ask the */
+                                  /* threads do do anything */
+                                  /* We may still need to build the */
+                                  /* interiors of the subtrees, of course */
+#if DO_FLOATING_POINT
+    float cost;                   /* Approximate number of hash compression */
+                                  /* operations per node */
+    struct sub_order *sub;        /* If non-NULL, this gives details on how */
+                                  /* we want to subdivide the order between */
+                                  /* different threads */
+#endif
+};
+
+#if DO_FLOATING_POINT
+    /* This comparison function sorts the most expensive orders first */
+static int compare_order_by_cost(const void *a, const void *b) {
+    const struct init_order *p = a;
+    const struct init_order *q = b;
+
+    if (p->cost > q->cost) return -1;
+    if (p->cost < q->cost) return  1;
+
+    return 0;
+}
+#else
+    /* This comparison function sorts the higher level subtrees first */
+static int compare_order_by_subtree_level(const void *a, const void *b) {
+    const struct init_order *p = a;
+    unsigned p_subtree = p->subtree->level;
+    const struct init_order *q = b;
+    unsigned q_subtree = q->subtree->level;
+
+    if (p_subtree < q_subtree) return -1;
+    if (p_subtree > q_subtree) return  1;
+
+    return 0;
+}
+#endif
+
+#if DO_FLOATING_POINT
+static float estimate_total_cost(struct init_order *order,
+                                 unsigned count_order);
+
+/*
+ * This is a simple minded log function, returning an int.  Yes, using the
+ * built-in log() function would be easier, however I don't want to pull in
+ * the -lm library just for this
+ */
+static unsigned my_log2(float f) {
+#define MAX_LOG 10
+    unsigned n;
+    for (n=1; f > 2 && n < MAX_LOG; n++)
+        f /= 2;
+    return n;
+}
+#endif
+
+/*
+ * This is the point of this entire file.
+ *
+ * It fills in an already allocated working key, based on the private key
+ */
+bool hss_generate_working_key(
+    bool (*read_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+        void *context,
+    const unsigned char *aux_data, size_t len_aux_data,  /* Optional */
+    struct hss_working_key *w,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };
+    if (!info) info = &temp_info;
+
+    if (!w) {
+        info->error_code = hss_error_got_null;
+        return false;
+    }
+    w->status = hss_error_key_uninitialized; /* In case we detect an */
+                                             /* error midway */
+
+    if (!read_private_key && !context) {
+        info->error_code = hss_error_no_private_buffer;
+        return false;
+    }
+
+    /* Read the private key */
+    unsigned char private_key[ PRIVATE_KEY_LEN ];
+    if (read_private_key) {
+        if (!read_private_key( private_key, PRIVATE_KEY_LEN, context)) {
+            info->error_code = hss_error_private_key_read_failed;
+            goto failed;
+        }
+    } else {
+        memcpy( private_key, context, PRIVATE_KEY_LEN );
+    }
+
+    /*
+     * Make sure that the private key and the allocated working key are
+     * compatible; that the working_key was initialized with the same
+     * parameter set
+     */
+    {
+        if (w->levels > MAX_HSS_LEVELS) {
+            info->error_code = hss_error_internal;
+            goto failed;
+        }
+        unsigned char compressed[PRIVATE_KEY_PARAM_SET_LEN];
+        param_set_t lm_type[MAX_HSS_LEVELS], lm_ots_type[MAX_HSS_LEVELS];
+        unsigned i;
+        for (i=0; i<w->levels; i++) {
+            lm_type[i] = w->tree[i]->lm_type;
+            lm_ots_type[i] = w->tree[i]->lm_ots_type;
+        }
+
+        if (!hss_compress_param_set( compressed, w->levels,
+                      lm_type, lm_ots_type,
+                      sizeof compressed )) {
+               /* We're passed an unsupported param set */
+            info->error_code = hss_error_internal;
+            goto failed;
+        }
+        if (0 != memcmp( private_key + PRIVATE_KEY_PARAM_SET, compressed,
+                      PRIVATE_KEY_PARAM_SET_LEN )) {
+               /* The working set was initiallized with a different parmset */
+            info->error_code = hss_error_incompatible_param_set;
+            goto failed;
+        }
+    }
+
+    sequence_t current_count = get_bigendian(
+                 private_key + PRIVATE_KEY_INDEX, PRIVATE_KEY_INDEX_LEN );
+    if (current_count > w->max_count) {
+        info->error_code = hss_error_private_key_expired;  /* Hey!  We */
+        goto failed;             /* can't generate any more signatures */
+    }
+    hss_set_reserve_count(w, current_count);
+
+    memcpy( w->private_key, private_key, PRIVATE_KEY_LEN );
+
+    /* Initialize all the levels of the tree */
+
+    /* Initialize the current count for each level (from the bottom-up) */
+    sequence_t i;
+    sequence_t count = current_count;
+    for (i = w->levels; i >= 1 ; i--) {
+        struct merkle_level *tree = w->tree[i-1];
+        unsigned index = count & tree->max_index;
+        count >>= tree->level;
+        tree->current_index = index;
+    }
+
+    /* Initialize the I values */
+    for (i = 0; i < w->levels; i++) {
+        struct merkle_level *tree = w->tree[i];
+
+        /* Initialize the I, I_next elements */
+        if (i == 0) {
+            /* The root seed, I value is derived from the secret key */
+            hss_generate_root_seed_I_value( tree->seed, tree->I,
+                                            private_key+PRIVATE_KEY_SEED );
+            /* We don't use the I_next value */
+        } else {
+            /* The seed, I is derived from the parent's values */
+
+            /* Where we are in the Merkle tree */
+            struct merkle_level *parent = w->tree[i-1];
+            merkle_index_t index = parent->current_index;
+
+            hss_generate_child_seed_I_value( tree->seed, tree->I,
+                                             parent->seed,  parent->I,
+                                             index, parent->lm_type,
+                                             parent->lm_ots_type );
+            /* The next seed, I is derived from either the parent's I */
+            /* or the parent's next value */
+            if (index == tree->max_index) {
+                hss_generate_child_seed_I_value( tree->seed_next, tree->I_next,
+                                            parent->seed_next,  parent->I_next,
+                                            0, parent->lm_type,
+                                            parent->lm_ots_type);
+            } else {
+                hss_generate_child_seed_I_value( tree->seed_next, tree->I_next,
+                                            parent->seed,  parent->I,
+                                            index+1, parent->lm_type,
+                                            parent->lm_ots_type);
+            }
+        }
+    }
+
+    /* Generate the expanded aux data structure (or NULL if we don't have a */
+    /* viable aux structure */
+    struct expanded_aux_data *expanded_aux, temp_aux;
+    expanded_aux = hss_expand_aux_data( aux_data, len_aux_data, &temp_aux,
+                                        w->tree[0]->hash_size, w );
+
+    /*
+     * Now, build all the subtrees within the tree
+     *
+     * We initialize the various data structures, and create a list of
+     * the nodes on the bottom levels of the subtrees that need to be
+     * initialized
+     */
+        /* There are enough structures in this array to handle the maximum */
+        /* number of orders we'll ever see */
+    struct init_order order[MAX_HSS_LEVELS * MAX_SUBLEVELS * NUM_SUBTREE];
+    struct init_order *p_order = order;
+    int count_order = 0;
+
+    /* Step through the levels, and for each Merkle tree, compile a list of */
+    /* the orders to initialize the bottoms of the subtrees that we'll need */
+    for (i = w->levels; i >= 1 ; i--) {
+        struct merkle_level *tree = w->tree[i-1];
+        unsigned hash_size = tree->hash_size;
+            /* The current count within this tree */
+        merkle_index_t tree_count = tree->current_index;
+            /* The index of the leaf we're on */
+        merkle_index_t leaf_index = tree_count;
+
+        /* Generate the active subtrees */
+        int j;
+        /*int bot_level_subtree = (int)tree->level;*/  /* The level of the bottom of */
+                                              /* the subtree */
+        unsigned char *active_prev_node = 0;
+        unsigned char *next_prev_node = 0;
+        for (j=tree->sublevels-1; j>=0; j--) {
+                /* The height of this subtree */
+            int h_subtree = (j == 0) ? tree->top_subtree_size :
+                                       tree->subtree_size;
+
+            /* Initialize the active tree */
+            struct subtree *active = tree->subtree[j][ACTIVE_TREE];
+
+                /* Total number of leaf nodes below this subtree */
+            merkle_index_t size_subtree = (merkle_index_t)1 <<
+                                             (h_subtree + active->levels_below);
+            /* Fill in the leaf index that's on the left side of this subtree */
+                /* This is the index of the leaf that we did when we first */
+                /* entered the active subtree */
+            merkle_index_t left_leaf = leaf_index & ~(size_subtree - 1);
+                /* This is the number of leaves we've done in this subtree */
+            merkle_index_t subtree_count = leaf_index - left_leaf;
+                /* If we're not in the bottom tree, it's possible that the */
+                /* update process will miss the very first update before we */
+                /* need to sign.  To account for that, generate one more */
+                /* node than what our current count would suggest */
+            if (i != w->levels - 1) {
+                subtree_count++;
+            }
+            active->current_index = 0;
+            active->left_leaf = left_leaf;
+            merkle_index_t num_bottom_nodes = (merkle_index_t)1 << h_subtree;
+
+            /* Check if we have aux data at this level */
+            int already_computed_lower = 0;
+            if (i == 0) {
+                merkle_index_t lower_index = num_bottom_nodes-1;
+                merkle_index_t node_offset = active->left_leaf>>active->levels_below;
+                if (hss_extract_aux_data(expanded_aux, active->level+h_subtree,
+                             w, &active->nodes[ hash_size * lower_index ],
+                             node_offset, num_bottom_nodes)) {
+                    /* We do have it precomputed in our aux data */
+                    already_computed_lower = 1;
+                }
+            }
+            /* No aux data at this level; schedule the bottom row to be computed */
+            /* Schedule the creation of the entire active tree */
+            p_order->tree = tree;
+            p_order->subtree = active;
+            p_order->count_nodes = (merkle_index_t)1 << h_subtree; /* All */
+                                                /* the nodes in this subtree */
+            p_order->next_tree = 0;
+                /* Mark the root we inherented from the subtree just below us */
+            p_order->prev_node = already_computed_lower ? NULL : active_prev_node;
+            p_order->prev_index = (tree->current_index >> active->levels_below) & (num_bottom_nodes-1);
+
+            p_order->already_computed_lower = already_computed_lower;
+            p_order++; count_order++;
+
+            /* For the next subtree, here's where our root will be */
+            active_prev_node = &active->nodes[0];
+
+            /* And initialize the building tree, assuming there is one, and */
+            /* assuming that the active subtree isn't at the right edge of */
+            /* the Merkle tree */
+            if (j > 0 && (leaf_index + size_subtree <= tree->max_index )) {
+                struct subtree *building = tree->subtree[j][BUILDING_TREE];
+
+                    /* The number of leaves that make up one bottom node */
+                    /* of this subtree */
+                merkle_index_t size_below_tree = (merkle_index_t)1 << building->levels_below;
+                    /* We need to initialize the building tree current index */
+                    /* to a value at least as large as subtree_count */
+                    /* We'd prefer not to have to specificallly initialize */
+                    /* the stack, and so we round up to the next place the */
+                    /* stack is empty */
+                merkle_index_t building_count =
+                              (subtree_count + size_below_tree - 1) &
+                                                    ~(size_below_tree - 1);
+                    /* # of bottom level nodes we've building right now */
+                merkle_index_t num_nodes = building_count >> building->levels_below;
+                building->left_leaf = left_leaf + size_subtree;
+                building->current_index = building_count;
+
+                /* Check if this is already in the aux data */
+                already_computed_lower = 0;
+                if (i == 0) {
+                    merkle_index_t lower_index = num_bottom_nodes-1;
+                    merkle_index_t node_offset = building->left_leaf>>building->levels_below;
+                    if (hss_extract_aux_data(expanded_aux, building->level+h_subtree,
+                             w, &building->nodes[ hash_size * lower_index ],
+                             node_offset, num_nodes)) {
+                        /* We do have it precomputed in our aux data */
+                        already_computed_lower = 1;
+                    }
+                }
+
+                /* Schedule the creation of the subset of the building tree */
+                p_order->tree = tree;
+                p_order->subtree = building;
+                    /* # of nodes to construct */
+                p_order->count_nodes = num_nodes;
+                p_order->next_tree = 0;
+                    /* We generally can't use the prev_node optimization */
+                p_order->prev_node = NULL;
+                p_order->prev_index = 0;
+
+                p_order->already_computed_lower = already_computed_lower;
+                p_order++; count_order++;
+            } else if (j > 0) {
+                tree->subtree[j][BUILDING_TREE]->current_index = 0;
+            }
+
+            /* And the NEXT_TREE (which is always left-aligned) */
+            if ((i-1) > 0) {
+                struct subtree *next = tree->subtree[j][NEXT_TREE];
+                next->left_leaf = 0;
+                merkle_index_t leaf_size =
+                                     (merkle_index_t)1 << next->levels_below;
+
+                merkle_index_t next_index = tree_count;
+                /* If we're not in the bottom tree, it's possible that the */
+                /* update process will miss the very first update before we */
+                /* need to sign.  To account for that, potetially generate */
+                /* one more node than what our current count would suggest */
+                if ((i-1) != w->levels - 1) {
+                    next_index++;
+                }
+
+                /* Make next_index the # of leaves we'll need to process to */
+                /* forward this NEXT subtree to this state */
+                next_index = (next_index + leaf_size - 1)/leaf_size;
+
+                    /* This is set if we have a previous subtree */
+                merkle_index_t prev_subtree = (next->levels_below ? 1 : 0);
+                merkle_index_t num_nodes;
+                unsigned char *next_next_node = 0;
+
+                /* If next_index == 1, then if we're on a nonbottom subtree */
+                /* the previous subtree is still building (and so we */
+                /* needn't do anything).  The exception is if we're on the */
+                /* bottom level, then there is no subtree, and so we still */
+                /* need to build the initial left leaf */
+                if (next_index <= prev_subtree) {
+                    /* We're not started on this subtree yet */
+                    next->current_index = 0;
+                    num_nodes = 0;
+                } else if (next_index < num_bottom_nodes) {
+                    /* We're in the middle of building this tree */
+                    next->current_index = next_index << next->levels_below;
+                    num_nodes = next_index;
+                } else {
+                    /* We've completed building this tree */
+                        /* How we note "we've generated this entire subtree" */
+                    next->current_index = MAX_SUBINDEX;
+                    num_nodes = num_bottom_nodes;
+                        /* We've generated this entire tree; allow it to */
+                        /* be inhereited for the next one */
+                    next_next_node = &next->nodes[0];
+                }
+                if (num_nodes > 0) {
+                    /* Schedule the creation of these nodes */
+                    p_order->tree = tree;
+                    p_order->subtree = next;
+                        /* # of nodes to construct */
+                    p_order->count_nodes = num_nodes;
+                    p_order->next_tree = 1;
+                    p_order->prev_node = next_prev_node;
+                    p_order->prev_index = 0;
+
+                    p_order->already_computed_lower = 0;
+                    p_order++; count_order++;
+                }
+                next_prev_node = next_next_node;
+            }
+
+//            bot_level_subtree -= h_subtree;
+            if (j == 0) break; //This is a single level tree
+         }
+        if (i == 0) break; //This is a single level tree
+    }
+
+#if DO_FLOATING_POINT
+    /* Fill in the cost estimates */
+    for (i=0; i<(sequence_t)count_order; i++) {
+        p_order = &order[i];
+
+        /*
+         * While we're here, NULL out all the suborders; we'll fill them in
+         * later if necessary
+         */
+        p_order->sub = 0;
+        if (p_order->already_computed_lower) {
+            /* If we pulled the data from the aux, no work required */
+            p_order->cost = 0;
+            continue;
+        }
+        unsigned winternitz = 8;
+        unsigned p = 128;
+        (void)lm_ots_look_up_parameter_set(p_order->tree->lm_ots_type, 0, 0,
+                                           &winternitz, &p, 0);
+
+        struct subtree *subtree = p_order->subtree;
+        unsigned levels_below = subtree->levels_below;
+
+        /*
+         * Estimate the number of hashes that we'll need to compute to compute
+         * one node; this is the number of leaf nodes times the number of
+         * hashes used during a winternitz computation.  This ignores a few
+         * other hashes, but gets the vast bulk of them
+         */
+        p_order->cost = (float)((merkle_index_t)1<<levels_below) *
+                        (float)p *
+                        (float)(1<<winternitz);
+    }
+
+    /*
+     * We have a list of work items to be done.  It doesn't matter (for
+     * correctness) what order we do them in; however we'd like to keep the
+     * threads as busy as possible (an idle thread is wasted time).  So, what
+     * we try is sort the list in decreasing work order; that makes it more
+     * likely that all the threads will complete moderately close to the same
+     * time.  Doing this optimally is (in the general case) an NP-hard
+     * problem; this is a fairly decent heuristic.
+     */
+    qsort( order, count_order, sizeof *order, compare_order_by_cost );
+#else
+    /*
+     * We have a list of work items to be done.  We don't need to sort the
+     * objects into 'most costly first' order; however the prev_node logic
+     * will assume that if a higher order subtree depends on a lower one,
+     * the higher order subtree will appear first.  Make it so.
+     */
+    qsort( order, count_order, sizeof *order, compare_order_by_subtree_level );
+#endif
+
+#if DO_FLOATING_POINT
+    /* Generate an estimate of the total cost */
+    float est_total = estimate_total_cost( order, count_order );
+
+    /* Estimate how much we should target each work item should take */
+    unsigned num_tracks = 4 * hss_thread_num_tracks(info->num_threads);
+    if (num_tracks == 0) num_tracks = 4;   /* Divide by 0; just say no */
+    float est_max_per_work_item = est_total / num_tracks;
+
+    /* Scan through the items, and see which ones should be subdivided */
+    for (i=0; i<(sequence_t)count_order; i++) {
+        p_order = &order[i];
+        if (p_order->cost <= est_max_per_work_item) {
+            break; /* Break because once we hit this point, the rest of the */
+                   /* items will be cheaper */
+        }
+
+            /* Try to subdivide each item into subdiv pieces */
+        unsigned subdiv = my_log2(p_order->cost / est_max_per_work_item);
+        struct subtree *subtree = p_order->subtree;
+            /* Make sure we don't try to subdivide lower than what the */
+            /* Merkle tree structure allows */
+        if (subdiv > subtree->levels_below) subdiv = subtree->levels_below;
+        if (subdiv == 0) continue;
+        merkle_index_t max_subdiv = (merkle_index_t)1 << subtree->levels_below;
+        if (subdiv > max_subdiv) subdiv = max_subdiv;
+        if (subdiv <= 1) continue;
+
+        const struct merkle_level *tree = p_order->tree;
+        size_t hash_len = tree->hash_size;
+        merkle_index_t count_nodes = p_order->count_nodes;
+        size_t total_hash = (hash_len * count_nodes) << subdiv;
+        unsigned h_subtree = (subtree->level == 0) ? tree->top_subtree_size :
+                                                     tree->subtree_size;
+        struct sub_order *sub = malloc( sizeof *sub + total_hash );
+        if (!sub) continue;  /* On malloc failure, don't bother trying */
+                             /* to subdivide */
+
+            /* Fill in the details of this suborder */
+        sub->level = subdiv;
+        sub->num_hashes = 1 << subdiv;
+        sub->node_num_first_target =
+                (subtree->left_leaf >> subtree->levels_below) +
+                     ((merkle_index_t)1 << (h_subtree + subtree->level));
+        p_order->sub = sub;
+    }
+#endif
+
+    /* Now, generate all the nodes we've listed in parallel */
+    struct thread_collection *col = hss_thread_init(info->num_threads);
+    enum hss_error_code got_error = hss_error_none;
+
+       /* We use this to decide the granularity of the requests we make */
+#if DO_FLOATING_POINT
+    unsigned core_target = 5 * hss_thread_num_tracks(info->num_threads);
+    float prev_cost = 0;
+#endif
+
+    for (i=0; i<(sequence_t)count_order; i++) {
+        p_order = &order[i];
+        if (p_order->already_computed_lower) continue;  /* If it's already */
+                                                  /* done, we needn't bother */
+            /* If this work order is cheaper than what we've issued, allow */
+            /* for a greater amount of consolidation */
+#if DO_FLOATING_POINT
+        if (prev_cost > 0) {
+            if (p_order->cost <= 2 * prev_cost) {
+                /* The cost per node has decreased by a factor of 2 (at */
+                /* least); allow a single core to do more of the work */
+                float ratio = prev_cost / p_order->cost;
+                if (ratio > 1000) {
+                    core_target = 1;
+                } else {
+                    core_target = core_target / ratio;
+                    if (core_target == 0) core_target = 1;
+                }
+                prev_cost = p_order->cost;
+            }
+        } else {
+            prev_cost = p_order->cost;
+        }
+#endif
+
+        const struct merkle_level *tree = p_order->tree;
+        struct subtree *subtree = p_order->subtree;
+        unsigned h_subtree = (subtree->level == 0) ? tree->top_subtree_size :
+                                                     tree->subtree_size;
+        merkle_index_t lower_index = ((merkle_index_t)1 << h_subtree) - 1;
+        unsigned hash_size = tree->hash_size;
+#if DO_FLOATING_POINT
+        unsigned max_per_request = p_order->count_nodes / core_target;
+        if (max_per_request == 0) max_per_request = 1;
+#else
+        unsigned max_per_request = UINT_MAX;
+#endif
+
+        /* If we're skipping a value, make sure we compute up to there */
+        merkle_index_t right_side = p_order->count_nodes;
+        if (p_order->prev_node && right_side > p_order->prev_index) {
+            right_side = p_order->prev_index;
+        }
+
+        merkle_index_t n;
+        struct intermed_tree_detail detail;
+
+        detail.seed = (p_order->next_tree ? tree->seed_next : tree->seed);
+        detail.lm_type = tree->lm_type;
+        detail.lm_ots_type = tree->lm_ots_type;
+        detail.h = tree->h;
+        detail.tree_height = tree->level;
+        detail.I = (p_order->next_tree ? tree->I_next : tree->I);
+        detail.got_error = &got_error;
+
+#if DO_FLOATING_POINT
+        /* Check if we're actually doing a suborder */
+        struct sub_order *sub = p_order->sub;
+        if (sub) {
+            /* Issue all the orders separately */
+            unsigned hash_len = tree->hash_size;
+            for (n = 0; n < p_order->count_nodes; n++ ) {
+                if (n == right_side) continue;  /* Skip the omitted value */
+                unsigned char *dest = &sub->h[ n * sub->num_hashes * hash_len ];
+                merkle_index_t node_num = (sub->node_num_first_target+n) << sub->level;
+                unsigned k;
+                for (k=0; k < sub->num_hashes; k++) {
+                    detail.dest = dest;
+                    dest += hash_len;
+                    detail.node_num = node_num;
+                    node_num++;
+                    detail.node_count = 1;
+
+                    hss_thread_issue_work(col, hss_gen_intermediate_tree,
+                                          &detail, sizeof detail );
+                }
+            }
+            continue;
+        }
+#endif
+        {
+            /* We're not doing a suborder; issue the request in as large of */
+            /* a chunk as we're allowed */
+            for (n = 0; n < p_order->count_nodes; ) {
+                merkle_index_t this_req = right_side - n;
+                if (this_req > max_per_request) this_req = max_per_request;
+                if (this_req == 0) {
+                    /* We hit the value we're skipping; skip it, and go on to */
+                    /* the real right side */
+                    n++;
+                    right_side = p_order->count_nodes;
+                    continue;
+                }
+
+                /* Issue a work order for the next this_req elements */
+                detail.dest = &subtree->nodes[ hash_size * (lower_index + n)];
+                detail.node_num = (subtree->left_leaf >> subtree->levels_below) +
+                     n + ((merkle_index_t)1 << (h_subtree + subtree->level));
+                detail.node_count = this_req;
+
+                hss_thread_issue_work(col, hss_gen_intermediate_tree,
+                                      &detail, sizeof detail );
+
+                n += this_req;
+             }
+         }
+    }
+
+    /* We've issued all the order; now wait until all the work is done */
+    hss_thread_done(col);
+    if (got_error != hss_error_none) {
+            /* One of the worker threads detected an error */
+#if DO_FLOATING_POINT
+            /* Don't leak suborders on an intermediate error */
+        for (i=0; i<(sequence_t)count_order; i++) {
+            free( order[i].sub ); // IGNORE free-check
+        }
+#endif
+        info->error_code = got_error;
+        goto failed;
+    }
+
+#if DO_FLOATING_POINT
+    /*
+     * Now, if we did have suborders, recombine them into what was actually
+     * wanted
+     */
+    for (i=0; i<(sequence_t)count_order; i++) {
+        p_order = &order[i];
+        struct sub_order *sub = p_order->sub;
+        if (!sub) continue;   /* This order wasn't subdivided */
+
+        const struct merkle_level *tree = p_order->tree;
+        const unsigned char *I = (p_order->next_tree ? tree->I_next : tree->I);
+        struct subtree *subtree = p_order->subtree;
+        unsigned hash_size = tree->hash_size;
+        unsigned h_subtree = (subtree->level == 0) ? tree->top_subtree_size :
+                                                     tree->subtree_size;
+        merkle_index_t lower_index = ((merkle_index_t)1 << h_subtree) - 1;
+
+        merkle_index_t n;
+        for (n = 0; n < p_order->count_nodes; n++ ) {
+            if (p_order->prev_node && n == p_order->prev_index) continue;
+
+            hash_subtree( &subtree->nodes[ hash_size * (lower_index + n)],
+                          &sub->h[ hash_size * sub->num_hashes * n ],
+                          sub->level, sub->node_num_first_target + n,
+                          hash_size, tree->h, I);
+        }
+
+        free( sub ); // IGNORE free-check
+        p_order->sub = 0;
+    }
+#endif
+
+    /*
+     * Now we have generated the lower level nodes of the subtrees; go back and
+     * fill in the higher level nodes.
+     * We do this in backwards order, so that we do the lower levels of the trees
+     * first (as lower levels are cheaper, they'll be listed later in the
+     * array; that's how we sorted, them, remember?).
+     * That means if any subtrees inherit the root values of lower trees,
+     * we compute those root values first
+     */
+    for (i=count_order; i>0; i--) {
+        p_order = &order[i-1];
+        const struct merkle_level *tree = p_order->tree;
+        const unsigned char *I = (p_order->next_tree ? tree->I_next : tree->I);
+        struct subtree *subtree = p_order->subtree;
+
+        if (p_order->prev_node) {
+            /* This subtree did have a bottom node that was the root node */
+            /* of a lower subtree; fill it in */
+            unsigned hash_size = tree->hash_size;
+            unsigned h_subtree = (subtree->level == 0) ? tree->top_subtree_size :
+                                                         tree->subtree_size;
+            merkle_index_t lower_index = ((merkle_index_t)1 << h_subtree) - 1;
+
+                /* Where in the subtree we place the previous root */
+            unsigned set_index = (lower_index + p_order->prev_index) * hash_size;
+            memcpy( &subtree->nodes[ set_index ], p_order->prev_node, hash_size );
+        }
+
+        /* Now, fill in all the internal nodes of the subtree */
+        fill_subtree(tree, subtree, p_order->count_nodes, I);
+    }
+
+    /*
+     * Hey; we've initialized all the subtrees (at least, as far as what
+     * they'd be expected to be given the current count); hurray!
+     */
+
+    /*
+     * Now, create all the signed public keys
+     * Again, we could parallelize this; it's also fast enough not to be worth
+     * the complexity
+     */
+    for (i = 1; i < w->levels; i++) {
+        if (!hss_create_signed_public_key( w->signed_pk[i], w->siglen[i-1],
+                                       w->tree[i], w->tree[i-1], w )) {
+            info->error_code = hss_error_internal; /* Really shouldn't */
+                                                   /* happen */
+            goto failed;
+        }
+    }
+    hss_zeroize( private_key, sizeof private_key );
+
+    /*
+     * And, we make each level as not needing an update from below (as we've
+     * initialized them as already having the first update)
+     */
+    for (i = 0; i < w->levels - 1; i++) {
+        w->tree[i]->update_count = UPDATE_DONE;
+    }
+
+    w->status = hss_error_none; /* This working key has been officially */
+                                /* initialized, and now can be used */
+    return true;
+
+failed:
+    hss_zeroize( private_key, sizeof private_key );
+    return false;
+}
+
+#if DO_FLOATING_POINT
+/*
+ * This goes through the order, and estimates the total amount
+ * This assumes that the highest cost element is listed first
+ *
+ * It returns the estimated number of hash compression operations total
+ *
+ * We use floating point because the number of hash compression functions can
+ * vary a *lot*; floating point has great dynamic range.
+ */
+static float estimate_total_cost( struct init_order *order,
+                                  unsigned count_order ) {
+    if (count_order == 0) return 0;
+    float total_cost = 0;
+
+    unsigned i;
+
+    for (i=0; i<count_order; i++) {
+        unsigned long count = order[i].count_nodes;
+        if (order[i].prev_node) count--;
+        total_cost += (float)order[i].cost * count;
+    }
+
+    return total_cost;
+}
+#endif
diff --git a/src/sig_stfl/lms/external/hss_internal.h b/src/sig_stfl/lms/external/hss_internal.h
new file mode 100644
index 0000000000..c1541375fe
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_internal.h
@@ -0,0 +1,243 @@
+#if !defined( HSS_INTERNAL_H_ )
+#define HSS_INTERNAL_H_
+
+#include <stdlib.h>
+#include "common_defs.h"
+#include "hss.h"
+#include "config.h"
+
+/*
+ * This is the central internal include file for the functions that make up
+ * this subsystem.  It should not be used by applications
+ */
+
+#define PARAM_SET_COMPRESS_LEN 1  /* We assume that we can compress the */
+                                  /* lm_type and the lm_ots type for a */
+                                  /* single level into 1 byte */
+
+#define PARM_SET_END 0xff   /* We set this marker in the parameter set */
+                            /* when fewer than the maximum levels are used */
+
+
+/*
+ * The internal structure of a private key
+ */
+#define PRIVATE_KEY_INDEX 0
+#define PRIVATE_KEY_INDEX_LEN 8  /* 2**64 signatures should be enough for */
+                                 /* everyone */
+#define PRIVATE_KEY_PARAM_SET (PRIVATE_KEY_INDEX + PRIVATE_KEY_INDEX_LEN)
+#define PRIVATE_KEY_PARAM_SET_LEN (PARAM_SET_COMPRESS_LEN * MAX_HSS_LEVELS)
+#define PRIVATE_KEY_SEED (PRIVATE_KEY_PARAM_SET + PRIVATE_KEY_PARAM_SET_LEN)
+#if SECRET_METHOD == 2
+#define PRIVATE_KEY_SEED_LEN (SEED_LEN + I_LEN)
+#else
+#define PRIVATE_KEY_SEED_LEN SEED_LEN
+#endif
+#define PRIVATE_KEY_LEN (PRIVATE_KEY_SEED + PRIVATE_KEY_SEED_LEN) /* That's */
+                                                                /* 48 bytes */
+
+struct merkle_level;
+struct hss_working_key {
+    unsigned levels;
+    enum hss_error_code status;   /* What is the status of this key */
+                                  /* hss_error_none if everything looks ok */
+                                  /* Otherwise, the error code we report if */
+                                  /* we try to use this key to sign */
+    sequence_t reserve_count;     /* The value written to the private key */
+                                  /* Will be higher than the 'current count' */
+                                  /* if some signaures are 'reserved' */
+    sequence_t max_count;         /* The maximum count we can ever have */
+    unsigned autoreserve;         /* How many signatures to attempt to */
+                                  /* reserve if the signing process hits */
+                                  /* the end of the current reservation */
+
+    size_t signature_len;         /* The length of the HSS signature */
+
+    unsigned char *stack;         /* The stack memory used by the subtrees */
+
+        /* The private key (in its entirety) */
+    unsigned char private_key[PRIVATE_KEY_LEN];
+        /* The pointer to the seed (contained within the private key) */
+        /* Warning: nonsyntaxic macro; need to be careful how we use this */
+#define working_key_seed private_key + PRIVATE_KEY_SEED
+
+    size_t siglen[MAX_HSS_LEVELS]; /* The lengths of the signatures */
+                                  /* generated by the various levels */
+    size_t signed_pk_len[MAX_HSS_LEVELS]; /* The lengths of the signed */
+                                  /* public keys for the various levels */
+    unsigned char *signed_pk[MAX_HSS_LEVELS]; /* The current signed public */
+                                  /* keys for the nontop levels */
+                                  /* Each array element is that level's */
+                                  /* current root value, signed by the */
+                                  /* previous level.  Unused for the */
+                                  /* topmost level */
+    struct merkle_level *tree[MAX_HSS_LEVELS]; /* The structures that manage */
+                                  /* each individual level */
+};
+
+#define MIN_SUBTREE    2  /* All subtrees (other than the root subtree) have */
+                          /* at least 2 levels */
+#define MAX_SUBLEVELS  ((MAX_MERKLE_HEIGHT + MIN_SUBTREE - 1) / MIN_SUBTREE)
+#if MAX_SUBLEVELS > (1 << (MIN_MERKLE_HEIGHT-1)) - 2
+#error We need to rethink our parent tree update logic, as there is a
+#error possibility we do not give the tree enough updates between signatures
+/* One possible fix would be to increase the subtree size for extremely */
+/* tall trees */
+#endif
+
+struct merkle_level {
+    unsigned level;               /* Total number of levels */
+    unsigned h, hash_size;        /* Hash function, width */
+    param_set_t lm_type;
+    param_set_t lm_ots_type;      /* OTS parameter */
+    merkle_index_t current_index; /* The number of signatures this tree has */
+                                  /* generated so far */
+    merkle_index_t max_index;     /* 1<<level - 1 */
+    unsigned sublevels;           /* Total number of levels of subtrees */
+    unsigned subtree_size;        /* Height up each subtree */
+    unsigned top_subtree_size;    /* The top subtree might be shorter */
+
+    unsigned update_count;
+        /* For nonbottom trees, we get a chance to update ourselves during */
+        /* (some) signature operations.  This flag indicates what we do */
+        /* with that chance */
+#define UPDATE_NEXT     0    /* We update our NEXT_TREE */
+#define UPDATE_PARENT   1    /* We kick the update to our parent tree */
+#define UPDATE_BUILDING 2    /* For 2 .. 2 + sublevels - 2, we update the */
+                             /* BUILDING_TREE subtree i-1 */
+#define UPDATE_DONE     255  /* We've done all the updates we need to until */
+                             /* the next signature operation */
+
+        /* For every subtree level, we have up to 3 different subtrees: */
+        /* ACTIVE_TREE; this is the subtree that the current authentication */
+        /*     path goes through.  It is always fully populated */
+        /* BUILDING_TREE; this is the subtree that is the next in line */
+        /*     within the current Merkle tree.  We gradually populate it as */
+        /*     we generate signatures for the current subtree.  When the */
+        /*     authentication path shifts to this, this subtree will be */
+        /*     fully populated, and we'll swap ACTIVE and BUILDING subtrees */
+        /* NEXT_TREE; this is the first subtree in the next Merkle tree. */
+        /*     We gradually populate the NEXT_TREE's as we generate */
+        /*     signatures for the current Merkle tree.  When the current */
+        /*     Merkle tree runs out, the NEXT_TREE's will be fully */
+        /*     populated, and we'll swap ACTIVE and NEXT subtrees */
+#define ACTIVE_TREE 0    /* The tree in the active path */
+#define BUILDING_TREE 1  /* The tree in the next active path */
+#define NEXT_TREE 2      /* The tree at the start of the next Merkle tree */
+#define NUM_SUBTREE 3    /* Maximum number of subtrees we have at each level */
+    struct subtree *subtree[MAX_SUBLEVELS][NUM_SUBTREE];
+
+       /* The I values for the current Merkle tree, and the next one */
+    unsigned char I[I_LEN], I_next[I_LEN];
+
+        /* The seed values for the current Merkle tree, and the next one */
+    unsigned char seed[SEED_LEN], seed_next[SEED_LEN];
+};
+
+/*
+ * This stands for a single subtree */
+struct subtree {
+    merkle_index_t current_index; /* For BUILDING_TREE, the number of OTS */
+                                  /*  public key's we've generated so far */
+                                  /* For NEXT_TREE, 0 if haven't started on */
+                                  /*  this subtree */
+                                  /*  MAX_SUBINDEX if we've finished it */
+                                  /*  Else number of OTS public keys so far */
+                                  /* For ACTIVE_TREE, unused; we'll use */
+                                  /*  merkle_tree::current_index to */
+                                  /*  determine where the authentication */
+                                  /*  path currently is */
+#define MAX_SUBINDEX (~(merkle_index_t)0)
+
+    merkle_index_t left_leaf;     /* The index of the leftmost leaf below */
+                                  /* this subtree (0..2**tree->levels) */
+    unsigned level;               /* The level that the root of this subtree */
+                                  /* is within the larger Merkle tree */
+    unsigned levels_below;        /* The number of levels below this subtree */
+                                  /* in the Merkle tree */
+    unsigned char *stack;         /* Pointer to the stack used when */
+                                  /* generating nodes; will be a pointer */
+                                  /* into the hss_working_key::stack array */
+                                  /* Used to incrementally compute bottom */
+                                  /* node values */
+    unsigned char nodes[1];       /* The actual subtree node values */
+                                  /* 2*(1<<subtree_size) - 1 of them */
+};
+
+/* Internal function to compress a list of parameters into a short format */
+/* that we use internally */
+bool hss_compress_param_set( unsigned char *compressed,
+                   int levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type,
+                   size_t len_compressed );
+
+/* Internal function to generate the root seed, I value (based on the */
+/* private seed).  We do this (rather than selecting them  at random) so */
+/* that we don't need to store them in our private key; we can recompute */
+bool hss_generate_root_seed_I_value(unsigned char *seed, unsigned char *I,
+                   const unsigned char *master_seed);
+
+/* Internal function to generate the seed, I value for a child Merkle tree */
+/* (based on the seed, I value of the parent.  We do this (rather than */
+/* selecting them at random) so we have consistent values between reboots */
+bool hss_generate_child_seed_I_value( unsigned char *seed, unsigned char *I,
+                   const unsigned char *parent_seed,
+                   const unsigned char *parent_I, merkle_index_t index,
+                   param_set_t parent_lm, param_set_t parent_ots );
+
+/* Combine two internal nodes */
+void hss_combine_internal_nodes( unsigned char *dest,
+        const unsigned char *left_node, const unsigned char *right_node,
+        int h, const unsigned char *I, unsigned hash_size,
+        merkle_index_t node_num);
+
+bool hss_create_signed_public_key(unsigned char *signed_key,
+                                    size_t len_signature,
+                                    struct merkle_level *tree,
+                                    struct merkle_level *parent,
+                                    struct hss_working_key *w);
+
+/* Used to generate the bottom nodes of a subtree in parallel */
+struct intermed_tree_detail {
+    unsigned char *dest;
+    merkle_index_t node_num;
+    const unsigned char *seed;
+    param_set_t lm_type;
+    param_set_t lm_ots_type;
+    unsigned h;        /* Hash function */
+    unsigned tree_height;
+    const unsigned char *I;
+    unsigned node_count;
+    enum hss_error_code *got_error;
+};
+struct thread_collection;
+void hss_gen_intermediate_tree(const void *data,
+                               struct thread_collection *col);
+#include "hss_zeroize.h"
+
+/*
+ * Given a tree height (and whether it's the top level, and the hash size),
+ * this returns the subtree size that yields the least amount of memory
+ */
+int hss_smallest_subtree_size(int tree_height, int i, int n);
+
+/*
+ * Here's the routine that a thread runs to verify its part of the signature
+ */
+struct verify_detail {
+    enum hss_error_code *got_error;
+    const unsigned char *public_key;
+    const void *message;
+    size_t message_len;
+    const unsigned char *signature;
+    size_t signature_len;
+};
+void validate_internal_sig(const void *data,
+                               struct thread_collection *col);
+
+struct seed_derive;
+void lm_ots_generate_randomizer(unsigned char *c, unsigned n,
+                                struct seed_derive *seed);
+
+#endif /* HSS_INTERNAL_H_ */
diff --git a/src/sig_stfl/lms/external/hss_keygen.c b/src/sig_stfl/lms/external/hss_keygen.c
new file mode 100644
index 0000000000..ac471a952a
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_keygen.c
@@ -0,0 +1,368 @@
+#include <stdlib.h>
+#include <string.h>
+#include "common_defs.h"
+#include "hss.h"
+#include "hss_internal.h"
+#include "hss_aux.h"
+#include "endian.h"
+#include "hash.h"
+#include "hss_thread.h"
+#include "lm_common.h"
+#include "lm_ots_common.h"
+
+/* Count the number of 1 bits at the end (lsbits) of the integer */
+/* Do it in the obvious way; straightline code may be faster (no */
+/* unpredictable jumps, which are costly), but that would be less scrutable */
+static int trailing_1_bits(merkle_index_t n) {
+    int i;
+    for (i=0; n&1; n>>=1, i++)
+        ;
+    return i;
+}
+
+/*
+ * This creates a private key (and the correspond public key, and optionally
+ * the aux data for that key)
+ * Parameters:
+ * generate_random - the function to be called to generate randomness.  This
+ *       is assumed to be a pointer to a cryptographically secure rng,
+ *       otherwise all security is lost.  This function is expected to fill
+ *       output with 'length' uniformly distributed bits, and return 1 on
+ *       success, 0 if something went wrong
+ * levels - the number of levels for the key pair (2-8)
+ * lm_type - an array of the LM registry entries for the various levels;
+ *      entry 0 is the topmost
+ * lm_ots_type - an array of the LM-OTS registry entries for the various
+ *      levels; again, entry 0 is the topmost
+ * update_private_key, context - the function that is called when the
+ *      private key is generated; it is expected to store it to secure NVRAM
+ *      If this is NULL, then the context pointer is reinterpretted to mean
+ *      where in RAM the private key is expected to be placed
+ * public_key - where to store the public key
+ * len_public_key - length of the above buffer; see hss_get_public_key_len
+ *      if you need a hint.
+ * aux_data - where to store the optional aux data.  This is not required, but
+ *      if provided, can be used to speed up the hss_generate_working_key
+ *      process;
+ * len_aux_data - the length of the above buffer.  This is not fixed length;
+ *      the function will run different time/memory trade-offs based on the
+ *      length provided
+ *
+ * This returns true on success, false on failure
+ */
+bool hss_generate_private_key(
+    bool (*generate_random)(void *output, size_t length),
+    unsigned levels,
+    const param_set_t *lm_type,
+    const param_set_t *lm_ots_type,
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+        void *context,
+    unsigned char *public_key, size_t len_public_key,
+    unsigned char *aux_data, size_t len_aux_data,
+    struct hss_extra_info *info) {
+
+    struct hss_extra_info info_temp = { 0 };
+    if (!info) info = &info_temp;
+
+    if (!generate_random) {
+        /* We *really* need random numbers */
+        info->error_code = hss_error_no_randomness;
+        return false;
+    }
+    if (levels < MIN_HSS_LEVELS || levels > MAX_HSS_LEVELS) {
+        /* parameter out of range */
+        info->error_code = hss_error_bad_param_set;
+        return false;
+    }
+
+    unsigned h0;  /* The height of the root tree */
+    unsigned h;   /* The hash function used */
+    unsigned size_hash;  /* The size of each hash that would appear in the */
+                  /* aux data */
+    if (!lm_look_up_parameter_set(lm_type[0], &h, &size_hash, &h0)) {
+        info->error_code = hss_error_bad_param_set;
+        return false;
+    }
+
+    /* Check the public_key_len */
+    if (4 + 4 + 4 + I_LEN + size_hash > len_public_key) {
+        info->error_code = hss_error_buffer_overflow;
+        /* public key won't fit in the buffer we're given */
+        return false;
+    }
+
+        /* If you provide an aux_data buffer, we have to write something */
+        /* into it (at least, enough to mark it as 'we're not really using */
+        /* aux data) */
+    if (aux_data && len_aux_data == 0) {
+        /* not enough aux data buffer to mark it as 'not really used' */
+        info->error_code = hss_error_bad_aux;
+        return false;
+    }
+
+    unsigned len_ots_pub = lm_ots_get_public_key_len(lm_ots_type[0]);
+    if (len_ots_pub == 0) {
+        info->error_code = hss_error_bad_param_set;
+        return false;
+    }
+
+    unsigned char private_key[ PRIVATE_KEY_LEN ];
+
+        /* First step: format the private key */
+    put_bigendian( private_key + PRIVATE_KEY_INDEX, 0,
+                   PRIVATE_KEY_INDEX_LEN );
+    if (!hss_compress_param_set( private_key + PRIVATE_KEY_PARAM_SET,
+                   levels, lm_type, lm_ots_type,
+                   PRIVATE_KEY_PARAM_SET_LEN )) {
+        info->error_code = hss_error_bad_param_set;
+        return false;
+    }
+    if (!(*generate_random)( private_key + PRIVATE_KEY_SEED,
+                   PRIVATE_KEY_SEED_LEN )) {
+        info->error_code = hss_error_bad_randomness;
+        return false;
+    }
+
+        /* Now make sure that the private key is written to NVRAM */
+    if (update_private_key) {
+        if (!(*update_private_key)( private_key, PRIVATE_KEY_LEN, context)) {
+            /* initial write of private key didn't take */
+            info->error_code = hss_error_private_key_write_failed;
+            hss_zeroize( private_key, sizeof private_key );
+            return false;
+        }
+    } else {
+        if (context == 0) {
+            /* We weren't given anywhere to place the private key */
+            info->error_code = hss_error_no_private_buffer;
+            hss_zeroize( private_key, sizeof private_key );
+            return false;
+        }
+        memcpy( context, private_key, PRIVATE_KEY_LEN );
+    }
+
+    /* Figure out what would be the best trade-off for the aux level */
+    struct expanded_aux_data *expanded_aux_data = 0, aux_data_storage;
+    if (aux_data != NULL) {
+        aux_level_t aux_level = hss_optimal_aux_level( len_aux_data, lm_type,
+                                       lm_ots_type, NULL );
+        hss_store_aux_marker( aux_data, aux_level );
+
+        /* Set up the aux data pointers */
+        expanded_aux_data = hss_expand_aux_data( aux_data, len_aux_data,
+                                    &aux_data_storage, size_hash, 0 );
+    }
+
+    unsigned char I[I_LEN];
+    unsigned char seed[SEED_LEN];
+    if (!hss_generate_root_seed_I_value( seed, I, private_key+PRIVATE_KEY_SEED)) {
+        info->error_code = hss_error_internal;
+        hss_zeroize( private_key, sizeof private_key );
+        return false;
+    }
+
+    /* Now, it's time to generate the public key, which means we need to */
+    /* compute the entire top level Merkle tree */
+
+    /* First of all, figure out the appropriate level to compute up to */
+    /* in parallel.  We'll do the lower of the bottom-most level that */
+    /* appears in the aux data, and 4*log2 of the number of core we have */
+    unsigned num_cores = hss_thread_num_tracks(info->num_threads);
+    unsigned level;
+    unsigned char *dest = 0;  /* The area we actually write to */
+    void *temp_buffer = 0;  /* The buffer we need to free when done */
+    for (level = h0-1; level > 2; level--) {
+            /* If our bottom-most aux data is at this level, we want it */
+        if (expanded_aux_data && expanded_aux_data->data[level]) {
+                /* Write directly into the aux area */
+            dest = expanded_aux_data->data[level];
+            break;
+        }
+
+            /* If going to a higher levels would mean that we wouldn't */
+            /* effectively use all the cores we have, use this level */
+        if (((unsigned)1<<level) < 4*num_cores) {
+                /* We'll write into a temp area; malloc the space */
+            size_t temp_buffer_size = (size_t)size_hash << level;
+            temp_buffer = malloc(temp_buffer_size);
+            if (!temp_buffer) {
+                /* Couldn't malloc it; try again with s smaller buffer */
+                continue;
+            }
+                /* Use this buffer */
+            dest = temp_buffer;
+            break;
+        }
+    }
+
+    /* Worse comes the worse, if we can't malloc anything, use a */
+    /* small backup buffer */
+    unsigned char worse_case_buffer[ 4*MAX_HASH ];
+    if (!dest) {
+        dest = worse_case_buffer;
+        /* level == 2 if we reach here, so the buffer is big enough */
+    }
+
+    /*
+     * Now, issue all the work items to generate the intermediate hashes
+     * These intermediate passes are potentially computed in parallel;
+     * allowing that is why we use this funky thread_collection and details
+     * structure
+     */
+    struct thread_collection *col = hss_thread_init(info->num_threads);
+
+    struct intermed_tree_detail details;
+        /* Set the values in the details structure that are constant */
+    details.seed = seed;
+    details.lm_type = lm_type[0];
+    details.lm_ots_type = lm_ots_type[0];
+    details.h = h;
+    details.tree_height = h0;
+    details.I = I;
+    enum hss_error_code got_error = hss_error_none;  /* This flag is set */
+                                                     /* on an error */
+    details.got_error = &got_error;
+
+    merkle_index_t j;
+        /* # of nodes at this level */
+    merkle_index_t level_nodes = (merkle_index_t)1 << level;
+        /* the index of the node we're generating right now */
+    merkle_index_t node_num = level_nodes;
+        /*
+         * We'd prefer not to issue a separate work item for every node; we
+         * might be doing millions of node (if we have a large aux data space)
+         * and we end up malloc'ing a large structure for every work order.
+         * So, if we do have a large number of requires, aggregate them
+         */
+    merkle_index_t increment = level_nodes / (10 * num_cores);
+#define MAX_INCREMENT 20000
+    if (increment > MAX_INCREMENT) increment = MAX_INCREMENT;
+    if (increment == 0) increment = 1;
+    for (j=0; j < level_nodes; ) {
+        unsigned this_increment;
+        if (level_nodes - j < increment) {
+            this_increment = level_nodes - j;
+        } else {
+            this_increment = increment;
+       }
+
+        /* Set the particulars of this specific work item */
+        details.dest = dest + j*size_hash;
+        details.node_num = node_num;
+        details.node_count = this_increment;
+
+        /* Issue a separate work request for every node at this level */
+        hss_thread_issue_work(col, hss_gen_intermediate_tree,
+                              &details, sizeof details );
+
+        j += this_increment;
+        node_num += this_increment;
+    }
+    /* Now wait for all those work items to complete */
+    hss_thread_done(col);
+
+    hss_zeroize( seed, sizeof seed );
+
+    /* Check if something went wrong.  It really shouldn't have, however if */
+    /* something returns an error code, we really should try to handle it */
+    if (got_error != hss_error_none) {
+        /* We failed; give up */
+        info->error_code = got_error;
+        hss_zeroize( private_key, sizeof private_key );
+        if (update_private_key) {
+            (void)(*update_private_key)(private_key, PRIVATE_KEY_LEN, context);
+        } else {
+            hss_zeroize( context, PRIVATE_KEY_LEN );
+        }
+        free(temp_buffer); // IGNORE free-check
+        return false;
+    }
+
+    /* Now, we complete the rest of the tree.  This is actually fairly fast */
+    /* (one hash per node) so we don't bother to parallelize it */
+
+    unsigned char stack[ MAX_HASH * (MAX_MERKLE_HEIGHT+1) ];
+    unsigned char root_hash[ MAX_HASH ];
+
+    /* Generate the top levels of the tree, ending with the root node */
+    merkle_index_t r, leaf_node;
+    for (r=level_nodes, leaf_node = 0; leaf_node < level_nodes; r++, leaf_node++) {
+
+        /* Walk up the stack, combining the current node with what's on */
+        /* the atack */
+        merkle_index_t q = leaf_node;
+
+        /*
+         * For the subtree which this leaf node forms the final piece, put the
+         * destination to where we'll want it, either on the stack, or if this
+         * is the final piece, to where the caller specified
+         */
+        unsigned char *current_buf;
+        unsigned stack_offset = trailing_1_bits( leaf_node );
+        if (stack_offset == level) {
+            current_buf = root_hash;
+        } else {
+            current_buf = &stack[stack_offset * size_hash ];
+        }
+        memcpy( current_buf, dest + leaf_node * size_hash, size_hash );
+
+        unsigned sp;
+        unsigned cur_lev = level;
+        for (sp = 1;; sp++, cur_lev--, q >>= 1) {
+            /* Give the aux data routines a chance to save the */
+            /* intermediate value.  Note that we needn't check for the */
+            /* bottommost level; if we're saving aux data at that level, */
+            /* we've already placed it there */
+            if (sp > 1) {
+                hss_save_aux_data( expanded_aux_data, cur_lev,
+                                   size_hash, q, current_buf );
+            }
+
+            if (sp > stack_offset) break;
+
+
+            hss_combine_internal_nodes( current_buf,
+                            &stack[(sp-1) * size_hash], current_buf,
+                            h, I, size_hash,
+                            r >> sp );
+        }
+    }
+    /* The top entry in the stack is the root value (aka the public key) */
+
+    /* Complete the computation of the aux data */
+    hss_finalize_aux_data( expanded_aux_data, size_hash, h,
+                           private_key+PRIVATE_KEY_SEED );
+
+    /* We have the root value; now format the public key */
+    put_bigendian( public_key, levels, 4 );
+    public_key += 4; len_public_key -= 4;
+    put_bigendian( public_key, lm_type[0], 4 );
+    public_key += 4; len_public_key -= 4;
+    put_bigendian( public_key, lm_ots_type[0], 4 );
+    public_key += 4; len_public_key -= 4;
+    memcpy( public_key, I, I_LEN );
+    public_key += I_LEN; len_public_key -= I_LEN;
+    memcpy( public_key, root_hash, size_hash );
+    public_key += size_hash; len_public_key -= size_hash;
+
+    /* Hey, what do you know -- it all worked! */
+    hss_zeroize( private_key, sizeof private_key ); /* Zeroize local copy of */
+                                                   /* the private key */
+    free(temp_buffer); // IGNORE free-check
+    return true;
+}
+
+/*
+ * The length of the private key
+ */
+size_t hss_get_private_key_len(unsigned levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type) {
+       /* A private key is a 'public object'?  Yes, in the sense that we */
+       /* export it outside this module */
+    LMS_UNUSED(levels);
+    LMS_UNUSED(lm_type);
+    LMS_UNUSED(lm_ots_type);
+    return PRIVATE_KEY_LEN;
+}
diff --git a/src/sig_stfl/lms/external/hss_param.c b/src/sig_stfl/lms/external/hss_param.c
new file mode 100644
index 0000000000..a1c20ab14c
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_param.c
@@ -0,0 +1,153 @@
+#include <string.h>
+#include "hss.h"
+#include "hss_internal.h"
+#include "endian.h"
+#include "hss_zeroize.h"
+
+/*
+ * Convert a parameter set into the compressed version we use within a private
+ * key.  This is the private key that'll end up being updated constantly, and
+ * so we try to make it as small as possible
+ */
+bool hss_compress_param_set( unsigned char *compressed,
+                   int levels,
+                   const param_set_t *lm_type,
+                   const param_set_t *lm_ots_type,
+                   size_t len_compressed ) {
+    int i;
+
+    for (i=0; i<levels; i++) {
+        if (len_compressed == 0) return false;
+        param_set_t a = *lm_type++;
+        param_set_t b = *lm_ots_type++;
+            /* All the parameter sets we support are small */
+            /* Review this format if we need to support larger ones */
+        if (a > 0x0e || b > 0x0e) return false;
+            /* Make sure the parm sets are supported */
+        switch (a) {
+        case LMS_SHA256_N32_H5: case LMS_SHA256_N32_H10:
+        case LMS_SHA256_N32_H15: case LMS_SHA256_N32_H20:
+        case LMS_SHA256_N32_H25:
+            break;
+        default:
+            return false;
+        }
+        switch (b) {
+        case LMOTS_SHA256_N32_W1: case LMOTS_SHA256_N32_W2:
+        case LMOTS_SHA256_N32_W4: case LMOTS_SHA256_N32_W8:
+            break;
+        default:
+            return false;
+        }
+
+        *compressed++ = (a<<4) + b;
+        len_compressed--;
+    }
+
+    while (len_compressed) {
+        *compressed++ = PARM_SET_END;
+        len_compressed--;
+    }
+
+    return true;
+}
+
+/*
+ * This returns the parameter set for a given private key.
+ * This is here to solve a chicken-and-egg problem: the hss_working_key
+ * must be initialized to the same parameter set as the private key,
+ * but (other than this function, or somehow remembering it) there's
+ * no way to retreive the parameter set.
+ *
+ * read_private_key/context will read the private key (if read_private_key is
+ * NULL, context is assumed to point to the private key)
+ *
+ * On success, *levels will be set to the number of levels, and lm_type[]
+ * and lm_ots_type[] will be set to the lm/ots parameter sets
+ *
+ * On success, this returns true; on failure (can't read the private key, or
+ * the private key is invalid), returns false
+ */
+bool hss_get_parameter_set( unsigned *levels,
+                           param_set_t lm_type[ MAX_HSS_LEVELS ],
+                           param_set_t lm_ots_type[ MAX_HSS_LEVELS ],
+                           bool (*read_private_key)(unsigned char *private_key,
+                                       size_t len_private_key, void *context),
+                           void *context) {
+    unsigned char private_key[ PRIVATE_KEY_LEN ];
+    bool success = false;
+
+    if (read_private_key) {
+        if (!read_private_key( private_key, PRIVATE_KEY_SEED, context )) {
+            goto failed;
+        }
+    } else {
+        if (!context) return false;
+        memcpy( private_key, context, PRIVATE_KEY_SEED );
+    }
+
+    /* Scan through the private key to recover the parameter sets */
+    unsigned total_height = 0;
+    unsigned level;
+    for (level=0; level < MAX_HSS_LEVELS; level++) {
+        unsigned char c = private_key[PRIVATE_KEY_PARAM_SET + level];
+        if (c == PARM_SET_END) break;
+            /* Decode this level's parameter set */
+        param_set_t lm = (c >> 4);
+        param_set_t ots = (c & 0x0f);
+            /* Make sure both are supported */
+            /* While we're here, add up the total Merkle height */
+        switch (lm) {
+        case LMS_SHA256_N32_H5:  total_height += 5; break;
+        case LMS_SHA256_N32_H10: total_height += 10; break;
+        case LMS_SHA256_N32_H15: total_height += 15; break;
+        case LMS_SHA256_N32_H20: total_height += 20; break;
+        case LMS_SHA256_N32_H25: total_height += 25; break;
+        default: goto failed;
+        }
+        switch (ots) {
+        case LMOTS_SHA256_N32_W1:
+        case LMOTS_SHA256_N32_W2:
+        case LMOTS_SHA256_N32_W4:
+        case LMOTS_SHA256_N32_W8:
+            break;
+        default: goto failed;
+        }
+        lm_type[level] = lm;
+        lm_ots_type[level] = ots;
+    }
+
+    if (level < MIN_HSS_LEVELS || level > MAX_HSS_LEVELS) goto failed;
+
+    *levels = level;
+
+    /* Make sure that the rest of the private key has PARM_SET_END */
+    unsigned i;
+    for (i = level+1; i<MAX_HSS_LEVELS; i++) {
+        unsigned char c = private_key[PRIVATE_KEY_PARAM_SET + i];
+        if (c != PARM_SET_END) goto failed;
+    }
+
+    /* One final check; make sure that the sequence number listed in the */
+    /* private key is in range */
+
+    if (total_height > 64) total_height = 64; /* (bounded by 2**64) */
+    sequence_t max_count = ((sequence_t)2 << (total_height-1)) - 1;
+        /* height-1 so we don't try to shift by 64, and hit U.B. */
+
+        /* We use the count 0xffff..ffff to signify 'we've used up all our */
+        /* signatures'.  Make sure that is above max_count, even for */
+        /* parameter sets that can literally generate 2**64 signatures (by */
+        /* letting them generate only 2**64-1) */
+    if (total_height == 64) max_count--;
+    sequence_t current_count = get_bigendian(
+                 private_key + PRIVATE_KEY_INDEX, PRIVATE_KEY_INDEX_LEN );
+
+    if (current_count > max_count) goto failed;  /* Private key expired */
+
+    success = true;   /* It worked! */
+failed:
+        /* There might be private keying material here */
+    hss_zeroize( private_key, sizeof private_key );
+    return success;
+}
diff --git a/src/sig_stfl/lms/external/hss_reserve.c b/src/sig_stfl/lms/external/hss_reserve.c
new file mode 100644
index 0000000000..7ef8585560
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_reserve.c
@@ -0,0 +1,194 @@
+#include <string.h>
+#include "common_defs.h"
+#include "hss_internal.h"
+#include "hss_reserve.h"
+#include "endian.h"
+
+/*
+ * Initialize the reservation count to the given value
+ */
+void hss_set_reserve_count(struct hss_working_key *w, sequence_t count) {
+    w->reserve_count = count;
+}
+
+/*
+ * Set the autoreserve count
+ */
+bool hss_set_autoreserve(struct hss_working_key *w,
+            unsigned sigs_to_autoreserve, struct hss_extra_info *info) {
+    if (!w) {
+        if (info) info->error_code = hss_error_got_null;
+        return false;
+    }
+
+    /* Note: we do not check if the working key is in a usable state */
+    /* There are a couple of odd-ball scenarios (e.g. when they've */
+    /* manually allocated the key, but haven't loaded it yet) that we */
+    /* don't have a good reason to disallow */
+
+    w->autoreserve = sigs_to_autoreserve;
+    return true;
+}
+
+/*
+ * This is called when we generate a signature; it checks if we need
+ * to write out a new private key (and advance the reservation); if it
+ * decides it needs to write out a new private key, it also decides how
+ * far it needs to advance it
+ */
+bool hss_advance_count(struct hss_working_key *w, sequence_t cur_count,
+        bool (*update_private_key)(unsigned char *private_key,
+                size_t len_private_key, void *context),
+        void *context,
+        struct hss_extra_info *info, bool *trash_private_key) {
+
+    if (cur_count == w->max_count) {
+        /* We hit the end of the root; this will be the last signature */
+        /* this private key can do */
+        w->status = hss_error_private_key_expired; /* Fail if they try to */
+                                                   /* sign any more */
+        info->last_signature = true;
+            /* Make sure we zeroize the private key */
+        *trash_private_key = true;  /* We can't trash our copy of the */
+                /* private key until after we've generated the signature */
+                /* We can trash the copy in secure storage, though */
+        if (update_private_key) {
+            unsigned char private_key[PRIVATE_KEY_LEN];
+            memset( private_key, PARM_SET_END, PRIVATE_KEY_LEN );
+            if (!update_private_key(private_key, PRIVATE_KEY_LEN, context)) {
+                info->error_code = hss_error_private_key_write_failed;
+                return false;
+            }
+        } else {
+            memset( context, PARM_SET_END, PRIVATE_KEY_LEN );
+        }
+        return true;
+    }
+    sequence_t new_count = cur_count + 1;
+
+    if (new_count > w->reserve_count) {
+        /* We need to advance the reservation */
+
+        /* Check if we have enough space to do the entire autoreservation */
+        if (w->max_count - new_count > w->autoreserve) {
+            new_count += w->autoreserve;
+        } else {
+            /* If we don't have enough space, reserve what we can */
+            new_count = w->max_count;
+        }
+
+        put_bigendian( w->private_key + PRIVATE_KEY_INDEX, new_count,
+                       PRIVATE_KEY_INDEX_LEN );
+        if (update_private_key) {
+            if (!update_private_key(w->private_key, PRIVATE_KEY_INDEX_LEN,
+                                   context)) {
+                 /* Oops, we couldn't write the private key; undo the */
+                 /* reservation advance (and return an error) */
+                 info->error_code = hss_error_private_key_write_failed;
+                 put_bigendian( w->private_key + PRIVATE_KEY_INDEX,
+                       w->reserve_count, PRIVATE_KEY_INDEX_LEN );
+                return false;
+            }
+        } else {
+            put_bigendian( context, new_count, PRIVATE_KEY_INDEX_LEN );
+        }
+        w->reserve_count = new_count;
+    }
+
+    return true;
+}
+
+/*
+ * This will make sure that (at least) N signatures are reserved; that is, we
+ * won't need to actually call the update function for the next N signatures
+ * generated
+ *
+ * This can be useful if the update_private_key function is expensive.
+ *
+ * Note that if, N (or more) signatures are already reserved, this won't do
+ * anything.
+ */
+bool hss_reserve_signature(
+    struct hss_working_key *w,
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+    void *context,
+    unsigned sigs_to_reserve,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };
+    if (!info) info = &temp_info;
+    if (!w) {
+        info->error_code = hss_error_got_null;
+        return false;
+    }
+    if (w->status != hss_error_none) {
+        info->error_code = w->status;;
+        return false;
+    }
+
+    if (sigs_to_reserve > w->max_count) {
+        info->error_code = hss_error_not_that_many_sigs_left;
+        return false; /* Very funny */
+    }
+
+    /*
+     * If we're given a raw private key, make sure it's the one we're
+     * thinking of.
+     * I have no idea why someone would reserve signatures if they have
+     * a raw private key (which is cheap to update), however there's no
+     * reason we shouldn't support it
+     */
+    if (!update_private_key) {
+        if (0 != memcmp( context, w->private_key, PRIVATE_KEY_LEN)) {
+            info->error_code = hss_error_key_mismatch;
+            return false;   /* Private key mismatch */
+        }
+    }
+
+    /* Figure out what the current count is */
+    sequence_t current_count = 0;
+    unsigned i;
+    for (i = 0; i<w->levels; i++) {
+        struct merkle_level *tree = w->tree[i];
+            /* -1 because the current_index counts the signatures to the */
+            /* current next level */
+        current_count = (current_count << tree->level) +
+                                                  tree->current_index - 1;
+    }
+    current_count += 1;   /* The bottom-most tree isn't advanced */
+
+    sequence_t new_reserve_count;  /* This is what the new reservation */
+                     /* setting would be (if we accept the reservation) */
+    if (current_count > w->max_count - sigs_to_reserve) {
+        /* Not that many sigantures left */
+        /* Reserve as many as we can */
+        new_reserve_count = w->max_count;
+    } else {
+        new_reserve_count = current_count + sigs_to_reserve;
+    }
+
+    if (new_reserve_count <= w->reserve_count) {
+        /* We already have (at least) that many reserved; do nothing */
+        return true;
+    }
+
+    /* Attempt to update the count in the private key */
+    put_bigendian( w->private_key + PRIVATE_KEY_INDEX, new_reserve_count,
+                   PRIVATE_KEY_INDEX_LEN );
+    /* Update the copy in NV storage */
+    if (update_private_key) {
+        if (!update_private_key(w->private_key, PRIVATE_KEY_INDEX_LEN,
+                                                                  context)) {
+             /* Oops, couldn't update it */
+             put_bigendian( w->private_key + PRIVATE_KEY_INDEX,
+                        w->reserve_count, PRIVATE_KEY_INDEX_LEN );
+             info->error_code = hss_error_private_key_write_failed;
+             return false;
+        }
+    } else {
+        memcpy( context, w->private_key, PRIVATE_KEY_INDEX_LEN );
+    }
+    w->reserve_count = new_reserve_count;
+
+    return true;
+}
diff --git a/src/sig_stfl/lms/external/hss_reserve.h b/src/sig_stfl/lms/external/hss_reserve.h
new file mode 100644
index 0000000000..3b101c1130
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_reserve.h
@@ -0,0 +1,21 @@
+#if !defined( HSS_RESERVE_H_ )
+#define HSS_RESERVE_H_
+
+/*
+ * This is the internal include file for the reservation functions for this
+ * subsystem. It should not be used by applications
+ */
+
+#include "common_defs.h"
+
+struct hss_working_key;
+
+void hss_set_reserve_count(struct hss_working_key *w, sequence_t count);
+
+bool hss_advance_count(struct hss_working_key *w, sequence_t new_count,
+        bool (*update_private_key)(unsigned char *private_key,
+                size_t len_private_key, void *context),
+        void *context,
+        struct hss_extra_info *info, bool *trash_private_key);
+
+#endif /* HSS_RESERVE_H_ */
diff --git a/src/sig_stfl/lms/external/hss_sign.c b/src/sig_stfl/lms/external/hss_sign.c
new file mode 100644
index 0000000000..359e59df7b
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_sign.c
@@ -0,0 +1,736 @@
+/*
+ * This is an implementation of the HSS signature scheme from LMS
+ * This is the part that actually generates the signature
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "common_defs.h"
+#include "hss.h"
+#include "hash.h"
+#include "endian.h"
+#include "hss_internal.h"
+#include "hss_aux.h"
+#include "hss_thread.h"
+#include "hss_reserve.h"
+#include "lm_ots.h"
+#include "lm_ots_common.h"
+#include "hss_derive.h"
+
+/*
+ * This adds one leaf to the building and next subtree.
+ */
+enum subtree_build_status {
+    subtree_got_error,      /* Oops, something broke */
+    subtree_more_to_do,     /* Processed node, still more to do */
+    subtree_did_last_node,  /* Processed last node */
+    subtree_all_done        /* We're good */
+};
+static enum subtree_build_status subtree_add_next_node(
+                     struct subtree *subtree,
+                     struct merkle_level *tree,
+                     int next_tree,
+                     struct thread_collection *col) {
+    unsigned subtree_size = (subtree->level>0 ? tree->subtree_size :
+                                                tree->top_subtree_size);
+    unsigned log_leafs = subtree_size + subtree->levels_below;
+    merkle_index_t max_index = (merkle_index_t)1 << log_leafs;
+         /* Check if there is anything more to do */
+    if (subtree->current_index == max_index) return subtree_all_done;
+    unsigned hash_size = tree->hash_size;
+    unsigned char cur_val[MAX_HASH];
+
+    /* Compute the leaf node */
+    merkle_index_t i;
+    unsigned ots_len = lm_ots_get_public_key_len(tree->lm_ots_type);
+    unsigned char pub_key[ LEAF_MAX_LEN ];
+    const unsigned char *I = (next_tree ? tree->I_next : tree->I);
+    memcpy( pub_key + LEAF_I, I, I_LEN );
+    SET_D( pub_key + LEAF_D, D_LEAF );
+    merkle_index_t r = subtree->left_leaf + subtree->current_index;
+    merkle_index_t q = r | ((merkle_index_t)1 << tree->level);
+    put_bigendian( pub_key + LEAF_R, q, 4);
+
+    const unsigned char *seed = (next_tree ? tree->seed_next : tree->seed);
+    struct seed_derive derive;
+    if (!hss_seed_derive_init( &derive, tree->lm_type, tree->lm_ots_type,
+                       I, seed )) return subtree_got_error;
+    hss_seed_derive_set_q(&derive, r);
+    if (!lm_ots_generate_public_key(tree->lm_ots_type, I,
+                   r, &derive, pub_key + LEAF_PK, ots_len)) {
+        hss_seed_derive_done(&derive);
+        return subtree_got_error;
+    }
+    hss_seed_derive_done(&derive);
+
+    /* Hash it to form the leaf node */
+    union hash_context ctx;
+    hss_hash_ctx( cur_val, tree->h, &ctx, pub_key, LEAF_LEN(hash_size));
+
+        /* Where in the subtree we store the values */
+    merkle_index_t subtree_index = subtree->current_index +
+                                           ((merkle_index_t)1 << log_leafs);
+    enum subtree_build_status status = subtree_more_to_do;
+
+    /* Walk up the stack, and then up the tree */
+    for (i=0;; i++) {
+        if (i >= subtree->levels_below) {
+            /* This node is within the subtree; save it */
+            memcpy( &subtree->nodes[ (subtree_index-1) * hash_size ], cur_val, hash_size );
+        }
+        if (subtree_index == 1) { /* Hit the root */
+            status = subtree_did_last_node;
+            break;
+        }
+        if ((q & 1) == 0) break;   /* Hit a left node */
+        q >>= 1;
+
+        /* This is a right node; combine it with the left node */
+        unsigned char *left_node;
+        if (i >= subtree->levels_below) {
+            /* The left node is in the tree */
+            left_node = &subtree->nodes[ (subtree_index-2) * hash_size ];
+        } else {
+            /* The left node is on the stack */
+            left_node = subtree->stack + (i * hash_size);
+        }
+        hss_combine_internal_nodes( cur_val,
+                                left_node, cur_val,
+                                tree->h, I, hash_size,
+                                q);
+        subtree_index >>= 1;
+    }
+
+    /* If we haven't got out of the stack, put the value there */
+    if (i < subtree->levels_below) {
+        if (col) hss_thread_before_write(col);
+        memcpy( subtree->stack + (i * hash_size), cur_val, hash_size );
+        if (col) hss_thread_after_write(col);
+    }
+
+    /* Ok, we've done another node */
+    subtree->current_index += 1;
+
+    return status;
+}
+
+/*
+ * This steps the next tree by one.  We need to do this 2**tree->level times,
+ * and then the next tree will be ready
+ */
+static int hss_step_next_tree (struct merkle_level *tree,
+                               const struct hss_working_key *w,
+                               struct thread_collection *col) {
+    struct subtree *prev_subtree = 0;
+    struct subtree *subtree = 0;
+    int j;
+
+    LMS_UNUSED(w);
+    /* Search for the subtree to update */
+    for (j = tree->sublevels-1; j>=0; j--) {
+        subtree = tree->subtree[j][NEXT_TREE];
+        if (subtree->current_index < MAX_SUBINDEX) break;
+        prev_subtree = subtree;
+   }
+   unsigned height_subtree = (j == 0) ? tree->top_subtree_size :
+                                        tree->subtree_size;
+   if (j >= 0) {
+       /* For subtrees other than the bottom one, we get the first */
+       /* node 'for free' (as it's the root of the previous subtree */
+       if (subtree->current_index == 0 && prev_subtree) {
+           /* For the initial node of the subtree, reuse the root */
+           /* of the previous one */
+           unsigned hash_size = tree->hash_size;
+           memcpy( &subtree->nodes[ hash_size * (((merkle_index_t)1<<height_subtree)-1) ],
+                   &prev_subtree->nodes[ 0 ],
+                   hash_size );
+           subtree->current_index = ((merkle_index_t)1 << subtree->levels_below);
+       }
+
+       /* Add the next node */
+       switch (subtree_add_next_node( subtree, tree, 1, col )) {
+       case subtree_got_error: default: return 0; /* Huh? */
+       case subtree_more_to_do:
+            break;
+       case subtree_did_last_node:
+       case subtree_all_done:
+            /* Mark this subtree as 'all processed' */
+            subtree->current_index = MAX_SUBINDEX;
+            break;
+       }
+    }
+
+    return 1;
+}
+
+/*
+ * Generate the next Merkle signature for a given level
+ */
+static int generate_merkle_signature(
+                     unsigned char *signature, unsigned signature_len,
+                     struct merkle_level *tree,
+                     const struct hss_working_key *w,
+                     const void *message, size_t message_len) {
+    /* First off, write the index value */
+    LMS_UNUSED(w);
+    if (signature_len < 4) return 0;
+    merkle_index_t current_index = tree->current_index;
+    put_bigendian( signature, current_index, 4 );
+    signature += 4; signature_len -= 4;
+
+    /* Write the OTS signature */
+    size_t ots_sig_size = lm_ots_get_signature_len( tree->lm_ots_type );
+    if (ots_sig_size == 0 || ots_sig_size > signature_len) return 0;
+    if (message == NULL) {
+        /* Internal interface: if message = NULL, we're supposed to */
+        /* generate everything *except* the OTS signature */
+        memset( signature, 0, ots_sig_size );
+    } else {
+        struct seed_derive derive;
+        if (!hss_seed_derive_init( &derive,
+                            tree->lm_type, tree->lm_ots_type,
+                            tree->I, tree->seed )) return 0;
+        hss_seed_derive_set_q(&derive, current_index);
+        bool success = lm_ots_generate_signature( tree->lm_ots_type, tree->I,
+                                    current_index, &derive,
+                                    message, message_len, false,
+                                    signature, ots_sig_size);
+        hss_seed_derive_done(&derive);
+        if (!success) return 0;
+    }
+    signature += ots_sig_size; signature_len -= ots_sig_size;
+
+    /* Write the LM parameter set */
+    if (signature_len < 4) return 0;
+    put_bigendian( signature, tree->lm_type, 4 );
+    signature += 4; signature_len -= 4;
+
+    /* Now, write the authentication path */
+    int i, j;
+    merkle_index_t index = current_index;
+    unsigned n = tree->hash_size;
+    for (i = tree->sublevels-1; i>=0; i--) {
+        int height = (i == 0) ? tree->top_subtree_size : tree->subtree_size;
+        struct subtree *subtree = tree->subtree[i][ACTIVE_TREE];
+        merkle_index_t subtree_index = (index &
+                                            (((merkle_index_t)1 << height) - 1)) +
+                                       ((merkle_index_t)1 << height);
+        for (j = height-1; j>=0; j--) {
+            if (signature_len < n) return 0;
+            memcpy( signature, subtree->nodes + n * ((subtree_index^1) - 1), n );
+            signature += n; signature_len -= n;
+            subtree_index >>= 1;
+        }
+        index >>= height;
+    }
+
+    /* Mark that we've generated a signature */
+    tree->current_index = current_index + 1;
+
+    return 1;
+}
+
+/*
+ * This signed the root of tree with the parent; it places both the signature
+ * and the public key into signed_key
+ */
+bool hss_create_signed_public_key(unsigned char *signed_key,
+                                    size_t len_signature,
+                                    struct merkle_level *tree,
+                                    struct merkle_level *parent,
+                                    struct hss_working_key *w) {
+    /* Where we place the public key */
+    unsigned char *public_key = signed_key + len_signature;
+
+    /* Place the public key there */
+    put_bigendian( public_key + 0, tree->lm_type, 4 );
+    put_bigendian( public_key + 4, tree->lm_ots_type, 4 );
+    memcpy( public_key + 8, tree->I, I_LEN );
+    unsigned hash_size = tree->hash_size;
+        /* This is where the root hash is */
+    memcpy( public_key + 8 + I_LEN,
+                   tree->subtree[0][ACTIVE_TREE]->nodes,
+                   hash_size );
+    unsigned len_public_key = 8 + I_LEN + hash_size;
+
+        /* Now, generate the signature */
+    if (!generate_merkle_signature( signed_key, len_signature,
+                         parent, w, public_key, len_public_key)) {
+        return false;
+    }
+
+    parent->update_count = UPDATE_NEXT;  /* The parent has generated a */
+                              /* signature; it's now eligible for another */
+                              /* round of updates */
+
+    return true;
+}
+
+struct gen_sig_detail {
+    unsigned char *signature;
+    size_t signature_len;
+    const unsigned char *message;
+    size_t message_len;
+    struct hss_working_key *w;
+    enum hss_error_code *got_error;
+};
+/* This does the actual signature generation */
+/* It is (potentially) run within a thread */
+static void do_gen_sig( const void *detail, struct thread_collection *col) {
+    const struct gen_sig_detail *d = detail;
+    size_t signature_len = d->signature_len;
+    unsigned char *signature = d->signature;
+    struct hss_working_key *w = d->w;
+    unsigned levels = w->levels;
+
+        /* The number of signed public keys */
+    if (signature_len < 4) goto failed;
+    put_bigendian( signature, levels - 1, 4 );
+    signature += 4; signature_len -= 4;
+        /* The signed public keys */
+    unsigned i;
+    for (i=1; i<levels; i++) {
+            /* Note: we've already generated the signatures for the */
+            /* nonbottom trees, and so their current count will already be */
+            /* advanced */
+        size_t len_signed_pk = w->signed_pk_len[i];
+        if (signature_len < len_signed_pk) goto failed;
+        memcpy( signature, w->signed_pk[i], len_signed_pk );
+        signature += len_signed_pk; signature_len -= len_signed_pk;
+    }
+        /* And finally the signature of the actual message */
+    if (signature_len < w->siglen[levels-1]) goto failed; /* Oops, not enough room */
+
+    const unsigned char *message = d->message;
+    size_t message_len = d->message_len;
+
+    if (!generate_merkle_signature(signature, signature_len,
+              w->tree[ levels-1 ], w, message, message_len)) {
+        goto failed;
+    }
+
+    /* Success! */
+    return;
+
+failed:
+    /* Report failure */
+    hss_thread_before_write(col);
+    *d->got_error = hss_error_internal;
+    hss_thread_after_write(col);
+}
+
+struct step_next_detail {
+    struct hss_working_key *w;
+    struct merkle_level *tree;
+    enum hss_error_code *got_error;
+};
+/* This steps the next tree */
+/* It is (potentially) run within a thread */
+static void do_step_next( const void *detail, struct thread_collection *col) {
+    const struct step_next_detail *d = detail;
+    struct hss_working_key *w = d->w;
+    struct merkle_level *tree = d->tree;
+
+    if (!hss_step_next_tree( tree, w, col )) {
+        /* Report failure */
+        hss_thread_before_write(col);
+        *d->got_error = hss_error_internal;
+        hss_thread_after_write(col);
+    }
+}
+
+struct step_building_detail {
+    struct merkle_level *tree;
+    struct subtree *subtree;
+    enum hss_error_code *got_error;
+};
+/* This steps the building tree */
+/* It is (potentially) run within a thread */
+static void do_step_building( const void *detail,
+                                            struct thread_collection *col) {
+    const struct step_building_detail *d = detail;
+    struct merkle_level *tree = d->tree;
+    struct subtree *subtree = d->subtree;
+
+    switch (subtree_add_next_node( subtree, tree, 0, col )) {
+    case subtree_got_error: default:
+        /* Huh? Report failure */
+        hss_thread_before_write(col);
+        *d->got_error = hss_error_internal;
+        hss_thread_after_write(col);
+        break;
+    case subtree_more_to_do:
+    case subtree_did_last_node:
+    case subtree_all_done:
+         break;
+    }
+}
+
+struct update_parent_detail {
+    struct hss_working_key *w;
+    enum hss_error_code *got_error;
+};
+/*
+ * This gives an update to the parent (non-bottom Merkle trees)
+ */
+static void do_update_parent( const void *detail,
+                                            struct thread_collection *col) {
+    const struct update_parent_detail *d = detail;
+    struct hss_working_key *w = d->w;
+    unsigned levels = w->levels;
+    unsigned current_level = levels - 2;  /* We start with the first */
+                                          /* non-bottom level */
+    for (;;) {
+        struct merkle_level *tree = w->tree[current_level];
+        switch (tree->update_count) {
+        case UPDATE_DONE: return;   /* No more updates needed */
+        case UPDATE_NEXT:           /* Our job is to update the next tree */
+            tree->update_count = UPDATE_PARENT;
+            if (current_level == 0) return; /* No next tree to update */
+            if (!hss_step_next_tree( tree, w, col )) goto failed;
+            return;
+        case UPDATE_PARENT:         /* Our job is to update our parent */
+            tree->update_count = UPDATE_BUILDING + 0;
+            if (current_level == 0) return; /* No parent to update */
+            current_level -= 1;
+            continue;
+        default: {
+            /* Which building tree we need to update */
+            unsigned level_to_update =
+                              (tree->update_count - UPDATE_BUILDING) + 1;
+            if (level_to_update >= tree->sublevels) {
+                /* We've completed all the updates we need to do (until */
+                /* the next time we need to sign something) */
+                tree->update_count = UPDATE_DONE;
+                return;
+            }
+
+            /* Next time, update the next BUILDING subtree */
+            tree->update_count += 1;
+
+            struct subtree *subtree =
+                              tree->subtree[level_to_update][BUILDING_TREE];
+
+            /* The number of leaves in this tree */
+            merkle_index_t tree_leaves = (merkle_index_t)1 << tree->level;
+
+            /* Check if we'd actually use the building tree */
+            if (subtree->left_leaf >= tree_leaves) {
+                    /* We'll never use it; don't bother updating it */
+                return;
+            }
+
+            /* We'll use the BUILDING_TREE, actually add a node */
+            switch (subtree_add_next_node( subtree, tree, 0, col )) {
+            case subtree_got_error: default: goto failed; /* Huh? */
+            case subtree_did_last_node:
+            case subtree_all_done:
+            case subtree_more_to_do:
+                /* We're done everything we need to do for this step */
+                return;
+            }
+        }
+        }
+    }
+
+failed:
+    /* Huh? Report failure */
+    hss_thread_before_write(col);
+    *d->got_error = hss_error_internal;
+    hss_thread_after_write(col);
+}
+
+/*
+ * Code to actually generate the signature
+ */
+bool hss_generate_signature(
+    struct hss_working_key *w,
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+    void *context,
+    const void *message, size_t message_len,
+    unsigned char *signature, size_t signature_buf_len,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };
+    if (!info) info = &temp_info;
+    unsigned i;
+    bool trash_private_key = false;
+
+    info->last_signature = false;
+
+    if (!w) {
+         info->error_code = hss_error_got_null;
+         goto failed;
+    }
+    if (w->status != hss_error_none) {
+        info->error_code = w->status;
+        goto failed;
+    }
+
+    /* If we're given a raw private key, make sure it's the one we're */
+    /* thinking of */
+    if (!update_private_key) {
+        if (0 != memcmp( context, w->private_key, PRIVATE_KEY_LEN)) {
+            info->error_code = hss_error_key_mismatch;
+            return false;   /* Private key mismatch */
+        }
+    }
+
+    /* Check if the buffer we were given is too short */
+    if (w->signature_len > signature_buf_len) {
+        /* The signature would overflow the buffer */
+        info->error_code = hss_error_buffer_overflow;
+        goto failed;
+    }
+
+    unsigned levels = w->levels;
+    /*
+     * Compile the current count
+     */
+    sequence_t current_count = 0;
+    for (i=0; i < levels; i++) {
+        struct merkle_level *tree = w->tree[i];
+        current_count <<= tree->level;
+            /* We subtract 1 because the nonbottom trees are already advanced */
+        current_count += (sequence_t)tree->current_index - 1;
+    }
+    current_count += 1;   /* Bottom most tree isn't already advanced */
+
+    /* Ok, try to advance the private key */
+    if (!hss_advance_count(w, current_count,
+                               update_private_key, context, info,
+                               &trash_private_key)) {
+        /* hss_advance_count fills in the error reason */
+        goto failed;
+    }
+
+       /* Ok, now actually generate the signature */
+
+    /* We'll be doing several things in parallel */
+    struct thread_collection *col = hss_thread_init(info->num_threads);
+    enum hss_error_code got_error = hss_error_none;
+
+    /* Generate the signature */
+    {
+        struct gen_sig_detail gen_detail;
+        gen_detail.signature = signature;
+        gen_detail.signature_len = w->signature_len;
+        gen_detail.message = message;
+        gen_detail.message_len = message_len;
+        gen_detail.w = w;
+        gen_detail.got_error = &got_error;
+
+        hss_thread_issue_work(col, do_gen_sig, &gen_detail, sizeof gen_detail);
+    }
+
+    /* Update the bottom level next tree */
+    if (levels > 1) {
+        struct step_next_detail step_detail;
+        step_detail.w = w;
+        step_detail.tree = w->tree[levels-1];
+        step_detail.got_error = &got_error;
+
+        hss_thread_issue_work(col, do_step_next, &step_detail, sizeof step_detail);
+    }
+
+    /* Issue orders to step each of the building subtrees in the bottom tree */
+    int skipped_a_level = 0;   /* Set if the below issued didn't issue an */
+                               /* order for at least one level */
+    {
+        struct merkle_level *tree = w->tree[levels-1];
+        merkle_index_t updates_before_end = tree->max_index - tree->current_index + 1;
+        int h_subtree = tree->subtree_size;
+        for (i=1; i<tree->sublevels; i++) {
+            struct subtree *subtree = tree->subtree[i][BUILDING_TREE];
+                /* Check if there is a building tree */
+            if (updates_before_end < (merkle_index_t)1 <<
+                                         (subtree->levels_below + h_subtree)) {
+                /* No; we're at the last subtree within this tree */
+                skipped_a_level = 1;
+                continue;
+            }
+            struct step_building_detail step_detail;
+            step_detail.tree = tree;
+            step_detail.subtree = subtree;
+            step_detail.got_error = &got_error;
+
+            hss_thread_issue_work(col, do_step_building, &step_detail, sizeof step_detail);
+
+        }
+            /* If there's only one sublevel, act as if we always skipped a sublevel */
+        if (tree->sublevels == 1) skipped_a_level = 1;
+    }
+
+    /*
+     * And, if we're allowed to give the parent a chance to update, and
+     * there's a parent with some updating that needs to be done, schedule
+     * that to be done
+     */
+    if (skipped_a_level &&
+        levels > 1 && w->tree[levels-2]->update_count != UPDATE_DONE) {
+        struct update_parent_detail detail;
+        detail.w = w;
+        detail.got_error = &got_error;
+        hss_thread_issue_work(col, do_update_parent, &detail, sizeof detail);
+    }
+
+    /* Wait for all of them to finish */
+    hss_thread_done(col);
+
+    /* Check if any of them reported a failure */
+    if (got_error != hss_error_none) {
+        info->error_code = got_error;
+        goto failed;
+    }
+
+    current_count += 1;  /* The new count is one more than what is */
+                         /* implied by the initial state of the Merkle trees */
+
+    /*
+     * Now, we scan to see if we exhausted a Merkle tree, and need to update it
+     * At the same time, we check to see if we need to advance the subtrees
+     */
+    sequence_t cur_count = current_count;
+    unsigned merkle_levels_below = 0;
+    int switch_merkle = w->levels;
+    struct merkle_level *tree;
+    for (i = w->levels; i>=1; i--, merkle_levels_below += tree->level) {
+        tree = w->tree[i-1];
+
+        if (0 == (cur_count & (((sequence_t)1 << (merkle_levels_below + tree->level))-1))) {
+            /* We exhausted this tree */
+            if ((i-1) == 0) {
+                /* We've run out of signatures; we've already caught this */
+                /* above; just make *sure* we've marked the key as */
+                /* unusable, and give up */
+                w->status = hss_error_private_key_expired;
+                break;
+            }
+
+            /* Remember we'll need to switch to the NEXT_TREE */
+            switch_merkle = i-1;
+            continue;
+        }
+
+        /* Check if we need to advance any of the subtrees */
+        unsigned subtree_levels_below = 0;
+        unsigned j;
+        for (j = tree->sublevels-1; j>0; j--) {
+            subtree_levels_below += tree->subtree_size;
+            if (0 != (cur_count & (((sequence_t)1 << (merkle_levels_below + subtree_levels_below))-1))) {
+                /* We're in the middle of this subtree */
+                goto done_advancing;
+            }
+
+            /* Switch to the building subtree */
+            struct subtree *next = tree->subtree[j][BUILDING_TREE];
+            struct subtree *prev = tree->subtree[j][ACTIVE_TREE];
+            unsigned char *stack = next->stack;  /* Stack stays with */
+                                                 /* building tree */
+            tree->subtree[j][ACTIVE_TREE] = next;
+                /* We need to reset the parameters on the new building subtree */
+            prev->current_index = 0;
+            prev->left_leaf += (merkle_index_t)2 << subtree_levels_below;
+            tree->subtree[j][BUILDING_TREE] = prev;
+            next->stack = NULL;
+            prev->stack = stack;
+        }
+    }
+done_advancing:
+    /* Check if we used up any Merkle trees; if we have, switch to the */
+    /* NEXT_TREE (which we've built in our spare time) */
+    for (i = switch_merkle; i < w->levels; i++) {
+        struct merkle_level *tree_l = w->tree[i];
+        struct merkle_level *parent = w->tree[i-1];
+        unsigned j;
+
+        /* Rearrange the subtrees */
+        for (j=0; j<tree->sublevels; j++) {
+            /* Make the NEXT_TREE active; replace it with the current active */
+            struct subtree *active = tree_l->subtree[j][NEXT_TREE];
+            struct subtree *next = tree_l->subtree[j][ACTIVE_TREE];
+            unsigned char *stack = active->stack;  /* Stack stays with */
+                                                 /* next tree */
+
+            active->left_leaf = 0;
+            next->current_index = 0;
+            next->left_leaf = 0;
+            tree_l->subtree[j][ACTIVE_TREE] = active;
+            tree_l->subtree[j][NEXT_TREE] = next;
+            active->stack = NULL;
+            next->stack = stack;
+            if (j > 0) {
+                /* Also reset the building tree */
+                struct subtree *building = tree->subtree[j][BUILDING_TREE];
+                building->current_index = 0;
+                merkle_index_t size_subtree = (merkle_index_t)1 <<
+                                (tree->subtree_size + building->levels_below);
+                building->left_leaf = size_subtree;
+            }
+        }
+
+        /* Copy in the value of seed, I we'll use for the new tree */
+        memcpy( tree_l->seed, tree->seed_next, SEED_LEN );
+        memcpy( tree_l->I, tree->I_next, I_LEN );
+
+        /* Compute the new next I, which is derived from either the parent's */
+        /* I or the parent's I_next value */
+        merkle_index_t index = parent->current_index;
+        if (index == parent->max_index) {
+            hss_generate_child_seed_I_value(tree->seed_next, tree->I_next,
+                                       parent->seed_next, parent->I_next, 0,
+                                       parent->lm_type,
+                                       parent->lm_ots_type);
+        } else {
+            hss_generate_child_seed_I_value( tree->seed_next, tree->I_next,
+                                       parent->seed, parent->I, index+1,
+                                       parent->lm_type,
+                                       parent->lm_ots_type);
+         }
+
+        tree_l->current_index = 0;  /* We're starting this from scratch */
+
+         /* Generate the signature of the new level */
+         if (!hss_create_signed_public_key( w->signed_pk[i], w->siglen[i-1],
+                                            tree_l, parent, w )) {
+            info->error_code = hss_error_internal;
+            goto failed;
+        }
+    }
+
+    /* And we've set things up for the next signature... */
+
+    if (trash_private_key) {
+        memset( w->private_key, PARM_SET_END, PRIVATE_KEY_LEN );
+    }
+
+    return true;
+
+failed:
+
+    if (trash_private_key) {
+        memset( w->private_key, PARM_SET_END, PRIVATE_KEY_LEN );
+    }
+
+    /* On failure, make sure that we don't return anything that might be */
+    /* misconstrued as a real signature */
+    memset( signature, 0, signature_buf_len );
+    return false;
+}
+
+/*
+ * Get the signature length
+ */
+size_t hss_get_signature_len_from_working_key(struct hss_working_key *w) {
+    if (!w || w->status != hss_error_none) return 0;
+
+    int levels = w->levels;
+    if (levels > MAX_HSS_LEVELS) return 0;
+    param_set_t lm[MAX_HSS_LEVELS], ots[MAX_HSS_LEVELS];
+    int i;
+    for (i=0; i<levels; i++) {
+        lm[i] = w->tree[i]->lm_type;
+        ots[i] = w->tree[i]->lm_ots_type;
+    }
+
+    return hss_get_signature_len(levels, lm, ots);
+}
diff --git a/src/sig_stfl/lms/external/hss_sign_inc.c b/src/sig_stfl/lms/external/hss_sign_inc.c
new file mode 100644
index 0000000000..e455b5cd2b
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_sign_inc.c
@@ -0,0 +1,218 @@
+/*
+ * This is the code that implements the hierarchical part of the LMS hash
+ * based signatures; in this case, incremental signing
+ */
+#include <string.h>
+#include "hss.h"
+#include "common_defs.h"
+#include "hss_verify_inc.h"
+#include "lm_verify.h"
+#include "lm_common.h"
+#include "lm_ots.h"
+#include "lm_ots_verify.h"
+#include "hash.h"
+#include "endian.h"
+#include "hss_internal.h"
+#include "hss_sign_inc.h"
+#include "hss_derive.h"
+
+/*
+ * Start the process of creating an HSS signature incrementally. Parameters:
+ * ctx - The state we'll use to track the incremental signature
+ * working_key - the in-memory version of the in-memory private key
+ * update_private_key - function to call to update the master private key
+ * context - context pointer for above
+ * siganture - the buffer to hold the signature
+ * signature_len - the length of the buffer
+ * this_is_the_last_signature - if non-NULL, this will be set if this
+ *    signature is the last for this private key
+ */
+bool hss_sign_init(
+    struct hss_sign_inc *ctx,
+    struct hss_working_key *w,
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+    void *context,
+    unsigned char *signature, size_t signature_len,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };;
+    if (!info) info = &temp_info;
+
+    if (!ctx) {
+        info->error_code = hss_error_got_null;
+        return false;
+    }
+    ctx->status = hss_error_ctx_uninitialized; /* Until we hear otherwise, */
+                                       /* we got a failure */
+
+    if (!w) {
+        info->error_code = hss_error_got_null;
+        return false;
+    }
+    if (w->status != hss_error_none) {
+        info->error_code = w->status;
+        return false;
+    }
+
+    struct merkle_level *bottom = w->tree[ w->levels - 1 ];
+
+    unsigned char I[I_LEN];
+    memcpy( I, bottom->I, I_LEN );
+
+    /* Compute the value of C we'll use */
+    merkle_index_t q = bottom->current_index;
+    ctx->q = q;
+    int h = bottom->h;
+    ctx->h = h;
+
+    struct seed_derive derive;
+    if (!hss_seed_derive_init( &derive, bottom->lm_type, bottom->lm_ots_type,
+                       bottom->I, bottom->seed )) return false;
+    hss_seed_derive_set_q(&derive, q);
+    lm_ots_generate_randomizer( ctx->c, bottom->hash_size, &derive );
+    hss_seed_derive_done(&derive);
+
+    /*
+     * Ask the signature generation process to do everything *except*
+     * the bottom level OTS signature
+     */
+    bool success = hss_generate_signature( w,
+                            update_private_key, context,
+                            NULL, 0,  /* <--- we don't have the message yet */
+                            signature, signature_len, info );
+    if (!success) {
+        /* On failure, hss_generate_signature fills in the failure reason */
+        ctx->status = info->error_code;
+        hss_zeroize( &ctx->c, sizeof ctx->c );  /* People don't get to */
+                              /* learn what randomizer we would have used */
+        return false;
+    }
+
+    /* Now, initialize the context */
+    hss_init_hash_context( h, &ctx->hash_ctx );
+    {
+        unsigned char prefix[ MESG_PREFIX_MAXLEN ];
+        memcpy( prefix + MESG_I, I, I_LEN );
+        unsigned q_bin[4]; put_bigendian( q_bin, q, 4 );
+        memcpy( prefix + MESG_Q, q_bin, 4 ); /* q */
+        SET_D( prefix + MESG_D, D_MESG );
+        int n = bottom->hash_size;
+        memcpy( prefix + MESG_C, ctx->c, n );  /* C */
+        hss_update_hash_context(h, &ctx->hash_ctx, prefix, MESG_PREFIX_LEN(n) );
+    }
+
+    /* It succeeded so far... */
+    ctx->status = hss_error_none;
+    return true;
+}
+
+/* This adds another piece of the message to validate */
+bool hss_sign_update(
+    struct hss_sign_inc *ctx,
+    const void *message_segment,
+    size_t len_message_segment) {
+    if (!ctx || ctx->status != hss_error_none) return false;
+
+    hss_update_hash_context(ctx->h, &ctx->hash_ctx,
+                            message_segment, len_message_segment );
+
+    return true;
+}
+
+/* We've added all the pieces of the messages, now do the validation */
+bool hss_sign_finalize(
+    struct hss_sign_inc *ctx,
+    const struct hss_working_key *working_key,
+    unsigned char *signature,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };
+    if (!info) info = &temp_info;
+
+    if (!ctx) {
+        info->error_code = hss_error_got_null;
+        return false;
+    }
+    if (ctx->status != hss_error_none) {
+        info->error_code = ctx->status;
+        return false;
+    }
+
+    /* Success or fail, we can't use the context any more */
+    ctx->status = hss_error_ctx_already_used;
+
+    int L = working_key->levels;
+
+    /* Step through the signature, looking for the place to put the OTS */
+    /* signature, and (while we're at it) recovering the I and seed values */
+    const unsigned char *I = working_key->tree[0]->I;
+    const unsigned char *seed = working_key->tree[0]->seed;
+        /* Note: we alternate buffers during generation in case */
+        /* hss_generate_child_seed_I_value doesn't allow new values to */
+        /* overwrite old ones */
+    unsigned char I_buff[2][I_LEN];
+    unsigned char seed_buff[2][SEED_LEN];
+
+    /* Q: should we double check the various fixed fields of the signatures */
+    /* (e.g. the number of signed keys, the parameter sets? */
+
+    signature += 4;
+
+    int i;
+    for (i=0; i<L-1; i++) {
+        merkle_index_t q = get_bigendian( signature, 4 );
+        if (q > working_key->tree[i]->max_index) {
+            hss_zeroize( seed_buff, sizeof seed_buff );
+            return 0;
+        }
+        if (!hss_generate_child_seed_I_value( seed_buff[i&1], I_buff[i&1],
+                                         seed, I, q,
+                                         working_key->tree[i]->lm_type,
+                                         working_key->tree[i]->lm_ots_type )) {
+            hss_zeroize( seed_buff, sizeof seed_buff );
+            info->error_code = hss_error_internal;
+            return false;
+        }
+
+        seed = seed_buff[i&1];
+        I = I_buff[i&1];
+
+        /* Step to the end of this signed key */
+        signature += lm_get_signature_len( working_key->tree[i]->lm_type,
+                                            working_key->tree[i]->lm_ots_type);
+        signature += lm_get_public_key_len(working_key->tree[i+1]->lm_type);
+    }
+
+    /* Now, signature points to where the bottom LMS signature should go */
+        /* It starts with the q value */
+    put_bigendian( signature, ctx->q, 4 );
+    signature += 4;
+        /* And then the LM-OTS signature */
+
+    /* Copy in the C value into the signature */
+    memcpy( signature+4, ctx->c, 32 );
+
+    /* Generate the final hash */
+    unsigned char hash[ MAX_HASH ];
+    hss_finalize_hash_context( ctx->h, &ctx->hash_ctx, hash );
+
+    /* And the final OTS signature based on that hash */
+    param_set_t lm_type = working_key->tree[i]->lm_type;
+    param_set_t ots_type = working_key->tree[i]->lm_ots_type;
+    struct seed_derive derive;
+    bool success = hss_seed_derive_init( &derive, lm_type, ots_type,
+                          I, seed );
+    if (success) {
+        hss_seed_derive_set_q( &derive, ctx->q );
+        success = lm_ots_generate_signature(
+               ots_type, I, ctx->q, &derive, hash, 0, true,
+               signature, lm_ots_get_signature_len( ots_type ));
+
+        hss_seed_derive_done( &derive );
+    }
+    if (!success) {
+        info->error_code = hss_error_internal;
+    }
+
+    hss_zeroize( seed_buff, sizeof seed_buff );
+    return success;
+}
diff --git a/src/sig_stfl/lms/external/hss_sign_inc.h b/src/sig_stfl/lms/external/hss_sign_inc.h
new file mode 100644
index 0000000000..426d271abd
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_sign_inc.h
@@ -0,0 +1,81 @@
+#if !defined( HSS_SIGN_INC_H_ )
+#define HSS_SIGN_INC_H_
+#include <stdbool.h>
+#include <stddef.h>
+#include "hash.h"
+#include "common_defs.h"
+
+/*
+ * These are the functions to sign a message incrementally.
+ * That is, we assume that we don't have the entire message at
+ * once, instead, we have it in pieces (for example, the signature
+ * is of a multigigabyte file)
+ *
+ * Usage:
+ *    struct hss_sign_inc ctx;
+ *    bool success = hss_sign_init( &ctx, working_key,
+ *            update_private_key, private_key_context,
+ *            signature, signature_buffer_len,
+ *            &lsat_signature );
+ *    hss_sign_update( &ctx, message_part_1, len_1 );
+ *    hss_sign_update( &ctx, message_part_2, len_2 );
+ *    hss_sign_update( &ctx, message_part_3, len_3 );
+ *    success = hss_sign_finalize( &ctx, working_key, signature );
+ *    if (success) printf( "We generated the signature\n" );
+ *
+ * This is in its own include file because we need to import some
+ * 'not-generally-for-general-consumption' include files to make
+ * it work (as they're in the hss_sign_inc structure)
+ */
+
+/*
+ * This is the context structure that holds the intermedate results of an
+ * in-process signature
+ * It's a application-visible structure for ease of use: the application can
+ * allocate it as an automatic, and if the application aborts in the middle of
+ * signing, it doesn't cause a memory leak
+ */
+struct hss_sign_inc {
+    enum hss_error_code status; /* Either hss_error_none if we're in */
+                       /* process, or the reason why we'd fail */
+
+    int h;             /* The hash function */
+    merkle_index_t q;  /* The index of the bottom level signature */
+    union hash_context hash_ctx; /* For the running hash we use */
+
+    unsigned char c[MAX_HASH];  /* The C value we used */
+};
+
+struct hss_extra_info;
+
+/* Starts off the process of incrementally signing a message */
+/* If it detects a failure, this returns false */
+/* Handing the return code is optional; if this fails, the finalization */
+/* step will fail too */
+bool hss_sign_init(
+    struct hss_sign_inc *ctx,
+    struct hss_working_key *working_key,
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
+    void *context,
+    unsigned char *signature, size_t signature_len,
+    struct hss_extra_info *info);
+
+/* This adds another piece of the message to sign */
+/* Again, the result code is optional */
+bool hss_sign_update(
+    struct hss_sign_inc *ctx,
+    const void *message_segment,
+    size_t len_message_segment);
+
+/* This finalizes the signature generation */
+/* This returns true if the signature was generated properly */
+/* We ask the caller to pass in the working key again, we need to review */
+/* the private key (we don't want to place it in the context) */
+bool hss_sign_finalize(
+    struct hss_sign_inc *ctx,
+    const struct hss_working_key *working_key,
+    unsigned char *signature,
+    struct hss_extra_info *info);
+
+#endif /* HSS_SIGN_INC_H_ */
diff --git a/src/sig_stfl/lms/external/hss_thread.h b/src/sig_stfl/lms/external/hss_thread.h
new file mode 100644
index 0000000000..fbf572ad4b
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_thread.h
@@ -0,0 +1,135 @@
+#if !defined( HSS_THREAD_H_ )
+#define HSS_THREAD_H_
+/*
+ * This is our internal abstraction of multithreading; this allows the
+ * "application" (in this case, the HSS code) to issue multiple requests that
+ * can potentially run on different threads, in a way that doesn't depend on
+ * the actual threading capability of the OS.  If we don't actually have
+ * multiple threads avaiable (either because the OS doesn't provide us with
+ * multiple threads, or we hit an internal error trying to generate new
+ * threads), this will just have the main thread do all the work (and hence
+ * the application doesn't have to worry its pretty little head about error
+ * handling, or whether we actually implement threads in the first place)
+ *
+ * This is designed to handle this sort of task: we have a series of
+ * computational problems to do; each can be done independently of the others,
+ * and each problem results in a fairly short answer.  All the children do is
+ * computation; there's no I/O or any other interaction with the OS at all.
+ *
+ * The general paradigm is:
+ * - The main thread generates a thread collection (via the hss_thread_init
+ *   call)
+ * - The main thread then issues a series of tasks (via the
+ *   hss_thread_issue_work call).  This may spawn off other threads (which
+ *   will then call the function passed); alternatively, the main thread may
+ *   call the function.
+ * - The main thread then waits for all the tasks to be done (via the
+ *   hss_thread_done call)
+ * The function(s) passed to the hss_thread_issue_work call will be completed
+ * by the time hss_thread_done returns
+ */
+#include <stdlib.h>
+
+/* This is our abstract object that stands for a set of threads */
+struct thread_collection;
+
+/*
+ * This is called to initialize a set of threads, and returns the identifier.
+ * Note that this cannot fail; if it returns 0, it's not a failure; instead,
+ * it's a valid return (which essentially means we're running in nonthreaded
+ * mode)
+ * The integer passed is a recommendation on the number of threads
+ */
+struct thread_collection *hss_thread_init(int);
+
+/*
+ * This issues another work item to our collection of threads.  At some point
+ * (between when hss_thread_issue_work is called and when hss_thread_done
+ * returns), we'll have function called, with a pointer to a copy of the detail
+ * structure.  function may be called by this thread, or it may be called by a
+ * different one.
+ *
+ * The passed detail structure will not be referenced after this returns, and
+ * hence it is safe if the caller modifies (or frees) it afterwards.  If the
+ * function isn't completed by the time hss_thread_issue_work returns, we'll
+ * squirrel away a copy of detail (which is why we ask the caller to
+ * pass size_detail_structure; so we know how much to copy)
+ *
+ * We suggest that the application issue the work orders in largest-to-smallest
+ * order.  The ordering doesn't matter for correctness (the API makes no
+ * guarrantees about when the requests will be completed), however we suggest
+ * this for expected performance reasons.  hss_thread_done will not return
+ * until all threads are done; what we want to avoid is scenarios where all but
+ * one of the threads are done, and that last thread is working on an expensive
+ * function; that would slow things down, and the entire point of this thread
+ * library is to speed things up.  Assigning work items to threads optimally is
+ * an NP-hard problem, however the simple heuristic of packing 'largest first'
+ * works fairly well in practice (and is easy to implement). The thread library
+ * does try to make a best effort attempt to preserve the issue order (assuming
+ * no intermediate malloc or thread spawn issues; in those cases, the library
+ * prioritizes correctness over efficiency)
+ */
+void hss_thread_issue_work(struct thread_collection *col,
+            void (*function)(const void *detail,
+                             struct thread_collection *col),
+            const void *detail, size_t size_detail_structure);
+
+/*
+ * This waits for all the work items we have issued (via hss_thread_issue_work)
+ * to be completed (that is, 'function' has returned, and cleans up the
+ * collection
+ *
+ * col must not be used after this; if it was malloc'ed, this will free it
+ */
+void hss_thread_done(struct thread_collection *col);
+
+/*
+ * This should be called before a thread writes to common data
+ *
+ * We do this because we sometimes have different threads write data to
+ * adjacent memory locations; if the compiler has the CPU do a
+ * read/modify/write to the entire word (or however the CPU has memory
+ * organized), this could cause a race condition.  Forcing those writes to be
+ * serialized avoids the issue; such a race condition would actually be fairly
+ * unlikely, but would be a *really* difficult bug to track down if it did
+ * occur, so it makes sense to go the extra mile to avoid the possibility
+ *
+ * Doing this locking also means that the working thread can safely do things
+ * such as incrementing a global [1] counter to report its results, should
+ * that be appropriate
+ *
+ * We don't bother doing this if we're writing into a malloc'ed region, *if*
+ * we're the only thread that will be writing into that specific region; we
+ * assume that the malloc infrastructure will separate distinct malloc'ed
+ * regions enough to avoid such race conditions
+ *
+ * [1] actually, automatic to the main thread; there are no literal globals
+ *     in this package, apart from the verbose debugging flag
+ */
+void hss_thread_before_write(struct thread_collection *collect);
+
+/*
+ * This should be called after a thread writes to common data; it releases
+ * the lock
+ */
+void hss_thread_after_write(struct thread_collection *collect);
+
+/*
+ * This gives the application guidance for how many worker threads we have
+ * available, that is, how many work items we can expect to run at once
+ *
+ * This is used to decide the level of granularity we need; we we have only 2
+ * cores, there's no point is splitting the job up to 50 separate requests;
+ * however if there are 100 cores, we want (if possible) to do at least 100
+ *
+ * The issue with having not enough requests is that we will have idle threads
+ * (which could potentially do useful work, if we are able to divide the work
+ * further).  The issue with having too many requests is that the requests use
+ * up some memory, and we'd prefer not to use up too much memory (we don't
+ * fail on malloc failure, however we do drop back to a single threaded model)
+ *
+ * The value passed is the value we'll pass to hss_thread_init
+ */
+unsigned hss_thread_num_tracks(int num_threads);
+
+#endif /* HSS_THREAD_H_ */
diff --git a/src/sig_stfl/lms/external/hss_thread_pthread.c b/src/sig_stfl/lms/external/hss_thread_pthread.c
new file mode 100644
index 0000000000..b5f64d3764
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_thread_pthread.c
@@ -0,0 +1,298 @@
+#include "hss_thread.h"
+
+#include <pthread.h>
+#include <string.h>
+
+/*
+ * This is an implementation of our threaded abstraction using the
+ * POSIX pthread API
+ *
+ * C11 has a similar (but not precisely identical) API to the one that POSIX
+ * defines (at least for what we do; all we need is thread create/join and
+ * mutex's, which *any* thread library should provide).  I'd code up the
+ * support for that API as well (using the same base logic, with typedef's and
+ * helper inlines to isolate the differences), however I don't have a C11
+ * implementation handy to test it
+ */
+
+#define MAX_THREAD 16   /* Number try to create more than 16 threads, no */
+                        /* matter what the application tries to tell us */
+#define DEFAULT_THREAD 16 /* The number of threads to run if the */
+                        /* application doesn't tell us otherwise (e.g. */
+                        /* passes in 0) */
+
+#define MIN_DETAIL 16   /* So the alignment kludge we do doesn't waste space */
+
+/* The information we track about a thread we may have launched */
+struct thread_state {
+    pthread_t thread_id;
+    enum { never_was, alive, dead } state;
+};
+
+struct work_item {
+    struct work_item *link;    /* They're in a linked list */
+
+    void (*function)(const void *detail,   /* Function to call */
+                             struct thread_collection *col);
+
+        /* These two items are used to pass the thread state to the thread */
+        /* if this is the first work item for the thread to process */
+    struct thread_collection *col; /* The parent thread_collection */
+    struct thread_state *state; /* The pointer into the thread collection */
+                               /* state for the state of this thread */
+
+       /* The detail structure that we pass to the function */
+       /* We'll malloc enough space to hold the entire structure */
+    union {                    /* union here so that the detail array is */
+        void *align1;          /* correctly aligned for various datatypes */
+        long long align2;
+        void (*align3)(void);
+        unsigned char detail[MIN_DETAIL];
+    } x;
+};
+
+struct thread_collection {
+    pthread_mutex_t lock;       /* Must be locked before this structure is */
+                                /* accessed if there might be a thread */
+    pthread_mutex_t write_lock; /* Must be locked before common user data is */
+                                /* written */
+
+    unsigned num_thread;
+    unsigned current_ptr;       /* There two are here to avoid O(N) table */
+    unsigned num_alive;         /* scanning in the most common scenarios */
+
+        /* Information about the worker threads we may have created */
+    struct thread_state threads[MAX_THREAD];
+
+        /*
+         * Queue (FIFO) of work items submitted, and which can't be processed
+         * immedately.  We do a FIFO, rather than a stack, so that we perform
+         * the requests in the order they were issued (which isn't something
+         * the interface guarantees; however it doesn't interfere with the
+         * request ordering we ask applications to make)
+         */
+    struct work_item *top_work_queue;
+    struct work_item *end_work_queue;
+};
+
+/*
+ * Allocate a thread control structure
+ */
+struct thread_collection *hss_thread_init(int num_thread) {
+    if (num_thread == 0) num_thread = DEFAULT_THREAD;
+    if (num_thread <= 1) return 0;  /* Not an error: an indication to run */
+                                    /* single threaded */
+    if (num_thread > MAX_THREAD) num_thread = MAX_THREAD;
+
+    struct thread_collection *col = malloc( sizeof *col );
+    if (!col) return 0;  /* On malloc failure, run single threaded */
+
+    col->num_thread = num_thread;
+
+    if (0 != pthread_mutex_init( &col->lock, 0 )) {
+        free(col); // IGNORE free-check
+        return 0;
+    }
+
+    if (0 != pthread_mutex_init( &col->write_lock, 0 )) {
+        pthread_mutex_destroy( &col->lock );
+        free(col); // IGNORE free-check
+        return 0;
+    }
+
+    col->current_ptr = 0;
+    col->num_alive = 0;
+    int i;
+    for (i=0; i<num_thread; i++) {
+        col->threads[i].state = never_was;
+    }
+    col->top_work_queue = 0;
+    col->end_work_queue = 0;
+
+    return col;
+}
+
+/*
+ * This is the base routine that a worker thread runs
+ */
+static void *worker_thread( void *arg ) {
+    struct work_item *w = arg;  /* The initial work item */
+    struct thread_collection *col = w->col;
+    struct thread_state *state = w->state;
+
+    for (;;) {
+        /* Perform the work item in front of us */
+        (w->function)(w->x.detail, col);
+
+        /* Ok, we did that */
+        free(w); // IGNORE free-check
+
+        /* Check if there's anything else to do */
+        pthread_mutex_lock( &col->lock );
+
+        w = col->top_work_queue;
+        if (w) {
+            /* More work; pull it off the queue */
+            col->top_work_queue = w->link;
+            if (w == col->end_work_queue) col->end_work_queue = 0;
+
+            /* And go handle it */
+            pthread_mutex_unlock( &col->lock );
+            continue;
+        }
+
+        /* No more work for us to do; post our obituary */
+        state->state = dead;
+        col->num_alive -= 1;
+        pthread_mutex_unlock( &col->lock );
+
+        /* And that's all folks */
+        return 0;
+    }
+}
+
+/*
+ * This adds function/details to the list of things that need to be done
+ * It either creates a thread to do it, or (if we're maxed out) add it to
+ * our honey-do list (or, as last resort, just does it itself)
+ */
+void hss_thread_issue_work(struct thread_collection *col,
+            void (*function)(const void *detail,
+                             struct thread_collection *col),
+            const void *detail, size_t size_detail_structure) {
+
+    /* If we're running in single-threaded mode */
+    if (!col) {
+        function( detail, col );
+        return;
+    }
+
+    /* Allocate a work structure to hold this request */
+    size_t extra_space;
+    if (size_detail_structure < MIN_DETAIL) extra_space = 0;
+    else extra_space = size_detail_structure - MIN_DETAIL;
+    struct work_item *w = malloc(sizeof *w + extra_space);
+
+    if (!w) {
+        /* Can't allocate the work structure; fall back to single-threaded */
+        function( detail, col );
+        return;
+    }
+    w->col = col;
+    w->function = function;
+    memcpy( w->x.detail, detail, size_detail_structure );
+
+    unsigned num_thread = col->num_thread;
+
+    pthread_mutex_lock( &col->lock );
+
+    /* Check if we can spawn a new thread */
+    if (col->num_alive < num_thread) {
+        /* There's supposed to be room for another */
+        /* Look for the empty slot */
+        unsigned i, j;
+        j = col->current_ptr;  /* Do round-robin (so we don't bang on */
+                               /* slot 0 whenever we try to start a thread) */
+        for (i=0; i<num_thread; i++, j = (j+1) % num_thread) {
+            struct thread_state *p = &col->threads[j];
+            switch (p->state) {
+            case alive: continue; /* This one's busy */
+            case dead:
+                {
+                /* This one just died; grab its status (not that we care, */
+                /* however that'll tell the thread library it can clean up) */
+                pthread_t thread_id = p->thread_id;
+                void *status;   /* Ignored, but we need to place thread */
+                                /* status somewhere */
+                pthread_mutex_unlock( &col->lock );
+                pthread_join( thread_id, &status );
+                pthread_mutex_lock( &col->lock );
+                p->state = never_was;
+                }
+                /* FALL THROUGH */
+            case never_was:
+                /* Now, we can spawn a new thread */
+                w->state = p;
+                if (0 != pthread_create( &p->thread_id,
+                                         NULL, worker_thread, w )) {
+                    /* Hmmm, couldn't spawn it; fall back */
+                    default: /* On error condition */
+                    pthread_mutex_unlock( &col->lock );
+                    free(w); // IGNORE free-check
+                    function( detail, col );
+                    return;
+                }
+
+                /* We've kicked off the thread */
+                p->state = alive;
+                col->num_alive += 1;
+                    /* For the next request, start scanning at the next */
+                    /* thread object */
+                col->current_ptr = (j+1) % num_thread;
+                pthread_mutex_unlock( &col->lock );
+                return;
+            }
+        }
+        col->num_alive = num_thread; /* Hmmmm, everything was alive??? */
+    }
+
+    /* We can't create any more threads; enqueue this (and someone will get */
+    /* to it) */
+    w->link = 0;
+    if (col->end_work_queue) {
+        col->end_work_queue->link = w;
+    }
+    col->end_work_queue = w;
+    if (!col->top_work_queue) col->top_work_queue = w;
+
+    pthread_mutex_unlock( &col->lock );
+}
+
+/*
+ * This will wait for all the work items we'e issued to complete
+ */
+void hss_thread_done(struct thread_collection *col) {
+    if (!col) return;
+
+    unsigned i;
+    pthread_mutex_lock( &col->lock );
+    for (i=0; i<col->num_thread; i++) {
+        /*
+         * Wait for each thread that we have spawned.
+         * We're the only one that will spawn them, and so we don't have to
+         * worry about any new ones appearing while we scan through the list
+         */
+        if (col->threads[i].state != never_was) {
+            void *status;
+            pthread_t thread_id = col->threads[i].thread_id;
+            pthread_mutex_unlock( &col->lock );
+            pthread_join( thread_id, &status );
+            pthread_mutex_lock( &col->lock );
+        }
+    }
+    pthread_mutex_unlock( &col->lock );
+
+    /* Ok, all the threads have finished; tear things down */
+
+    pthread_mutex_destroy( &col->lock );
+    pthread_mutex_destroy( &col->write_lock );
+    free(col);  // IGNORE free-check
+}
+
+void hss_thread_before_write(struct thread_collection *col) {
+    if (!col) return;
+    pthread_mutex_lock( &col->write_lock );
+}
+
+void hss_thread_after_write(struct thread_collection *col) {
+    if (!col) return;
+    pthread_mutex_unlock( &col->write_lock );
+}
+
+
+unsigned hss_thread_num_tracks(int num_thread) {
+    if (num_thread == 0) num_thread = DEFAULT_THREAD;
+    if (num_thread <= 1) return 1;
+    if (num_thread >= MAX_THREAD) return MAX_THREAD;
+    return num_thread;
+}
diff --git a/src/sig_stfl/lms/external/hss_thread_single.c b/src/sig_stfl/lms/external/hss_thread_single.c
new file mode 100644
index 0000000000..d844385293
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_thread_single.c
@@ -0,0 +1,63 @@
+#include "hss_thread.h"
+#include "config.h"
+
+/*
+ * This is a trivial implementation of our threading abstraction.
+ * It's used if we don't have any threading support
+ */
+
+/*
+ * This requests that an object that tracks the threads be created.  We have
+ * no threads, hence we don't need such an object
+ */
+struct thread_collection *hss_thread_init(int num_thread) {
+    LMS_UNUSED(num_thread);
+    return 0;
+}
+
+/*
+ * This asks that function be called sometime between now, and when
+ * hss_thread_done is called.  We just go ahead, and do it now
+ */
+void hss_thread_issue_work(struct thread_collection *collect,
+            void (*function)(const void *detail,
+                             struct thread_collection *col),
+            const void *detail, size_t size_detail_structure) {
+    LMS_UNUSED(size_detail_structure);
+        /* If we were asked to make sure something is done, just do it */
+    function( detail, collect );
+}
+
+/*
+ * This asks for all the work requests we've issued to completed, and that
+ * the collection object be freed.  We did all the work when it was
+ * requested, and we never allocated a collection object in the first place
+ */
+void hss_thread_done(struct thread_collection *collect) {
+    LMS_UNUSED(collect);
+}
+
+/*
+ * A thread calls this when it will write into a common area (so that no
+ * other thread will access it at the same time).  No threads means that
+ * there is no need to lock
+ */
+void hss_thread_before_write(struct thread_collection *collect) {
+    LMS_UNUSED(collect);
+}
+
+/*
+ * This releases the above lock
+ */
+void hss_thread_after_write(struct thread_collection *collect) {
+    LMS_UNUSED(collect);
+}
+
+/*
+ * This tells the application that we really have only one thread
+ * (the main one)
+ */
+unsigned hss_thread_num_tracks(int num_thread) {
+    LMS_UNUSED(num_thread);
+    return 1;
+}
diff --git a/src/sig_stfl/lms/external/hss_verify.c b/src/sig_stfl/lms/external/hss_verify.c
new file mode 100644
index 0000000000..089bdbd1ef
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_verify.c
@@ -0,0 +1,196 @@
+/*
+ * This is the code that implements the hierarchical part of the LMS hash
+ * based signatures
+ */
+#include <string.h>
+#include "common_defs.h"
+#include "hss_verify.h"
+#include "lm_verify.h"
+#include "lm_common.h"
+#include "lm_ots_verify.h"
+#include "hash.h"
+#include "endian.h"
+#include "hss_thread.h"
+#include "hss_internal.h"
+#include "hss.h"
+
+/* The HSS public key consists of: */
+/* Number of levels (1-8) (4 bytes) */
+/* The top level LM public key */
+
+/* The HSS signature consists of: */
+/* A word giving the number of levels - 1 == L-1 */
+/* L-1 iterations of (i = 1..L-1): */
+/*    - LMS Signature of public key i (signed by the pub key of level i-1) */
+/*    - LMS Public key (of level i) */
+/* - LMS Signature of the message, signed by the bottomost pub key */
+
+/* This is the routine that runs on a thread to validate an LMS signature */
+void validate_internal_sig(const void *data,
+                               struct thread_collection *col) {
+    const struct verify_detail *d = data;
+
+    bool success = lm_validate_signature(d->public_key,
+                                         d->message, d->message_len, false,
+                                         d->signature, d->signature_len);
+
+    if (!success) {
+        /* Drat, it failed; call the failure in */
+        hss_thread_before_write(col);
+        *d->got_error = hss_error_bad_signature;
+        hss_thread_after_write(col);
+    }
+}
+
+/*
+ * Validate an HSS signature, using a public key.  Parameters:
+ * public_key - pointer to the public key
+ * message - the mmessage that was supposedly signed
+ * message_len - the size of the message
+ * siganture - the signature we're checking
+ * signature_len - the length of the signature
+ *
+ * This returns true if everything checks out and the signature verifies
+ * false on error (whether the error is because the signature didn't verify,
+ * or we hit some sort of error on the way)
+ */
+bool hss_validate_signature(
+    const unsigned char *public_key,
+    const void *message, size_t message_len,
+    const unsigned char *signature, size_t signature_len,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };
+    if (!info) info = &temp_info;
+    unsigned i;
+
+    /* Get the number of levels the signature claims */
+    if (signature_len < 4) {
+         info->error_code = hss_error_bad_signature;
+         return false;
+    }
+    uint_fast32_t levels = get_bigendian( signature, 4 ) + 1;
+        /* +1 because what's in the signature is levels-1 */
+    signature += 4; signature_len -= 4;
+    if (levels < MIN_HSS_LEVELS || levels > MAX_HSS_LEVELS ||
+                               levels != get_bigendian( public_key, 4 )) {
+        info->error_code = hss_error_bad_signature;
+        return false;
+    }
+
+    /* Compare that to what the public key says */
+    uint_fast32_t pub_levels = get_bigendian( public_key, 4 );
+    if (levels != pub_levels) {
+        /* Signature and public key don't agree */
+        info->error_code = hss_error_bad_signature;
+        return false;
+    }
+    /* We'll use the LMS public key embedded in the HSS public key as the */
+    /* key to use to validate the top level signature */
+    public_key += 4;
+
+    struct thread_collection *col = hss_thread_init(info->num_threads);
+    enum hss_error_code got_error = hss_error_none;
+    struct verify_detail detail;
+    detail.got_error = &got_error;
+
+    /* Parse through the signature, kicking off the tasks to validate */
+    /* individual LMS signatures within it as we go */
+    for (i=0; i<levels-1; i++) {
+        /*
+         * At this point of time, the current position in the signature
+         * looks like (or, rather, is *supposed to look like*) this:
+         *     <Signature A><Public Key B><Other stuff>
+         * where:
+         * - Signature A is the LMS signature of Public Key B
+         * - Public Key B is the message we're verifying (and will be
+         *   interpreted as a public key in the next iteration)
+         * public_key points to Public Key A, which is the public key that
+         * we use to verify Signature A
+         */
+
+        /* Get the length of Signature A */
+        param_set_t lm_type = get_bigendian( public_key, 4 );
+        param_set_t lm_ots_type = get_bigendian( public_key+4, 4 );
+        unsigned l_siglen = lm_get_signature_len(lm_type, lm_ots_type);
+        if (l_siglen == 0 || l_siglen > signature_len) {
+            info->error_code = hss_error_bad_signature;
+             goto failed;
+        }
+
+        /* Retain a pointer to Signature A, and advance the current */
+        /* pointer to Public Key B */
+        const unsigned char *l_sig = signature;
+        signature += l_siglen; signature_len -= l_siglen;
+
+        /* The next thing is the next level public key (Public Key B) */
+        /* which we need to validate) */
+        if (signature_len < 4) {
+            info->error_code = hss_error_bad_signature;
+            goto failed;
+        }
+        /*
+         * Get how long Public Key B would be, assuming it is a valid
+         * public key.  If it's not a valid public key (that is, if
+         * someone other than the valid signer modified it), then
+         * Signature A will not validate, and so we'll catch that
+         */
+        lm_type = get_bigendian( signature, 4 );
+        unsigned l_pubkeylen = lm_get_public_key_len(lm_type);
+        if (l_pubkeylen == 0 || l_pubkeylen > signature_len) {
+            info->error_code = hss_error_bad_signature;
+            goto failed;
+        }
+
+        /* Retain a pointer to Public Key B, and advance the current */
+        /* pointer past it (to the data the next iteration cares about) */
+        const unsigned char *l_pubkey = signature;
+        signature += l_pubkeylen; signature_len -= l_pubkeylen;
+
+        /* Now, schedule the validation of Signature A */
+        detail.public_key = public_key;    /* Public key A */
+        detail.message = l_pubkey;         /* Public key B, that is, */
+                                           /* the message to validate */
+        detail.message_len = l_pubkeylen;
+        detail.signature = l_sig;          /* Signature A */
+        detail.signature_len = l_siglen;
+        hss_thread_issue_work( col, validate_internal_sig,
+                               &detail, sizeof detail );
+
+        /* We validated this level's public key (or, at least, scheduled */
+        /* it, if it turns out not to validate, we'll catch it below) */
+        /* Use the current Public Key B as the next level's Public Key A */
+        public_key = l_pubkey;
+    }
+
+    /*
+     * We're at the bottom level; now, the current position in the signature
+     * looks like (or, rather, is *supposed to look like*) this:
+     *     <Signature A>
+     * where:
+     * - Signature A is the bottom signature, which signs the actual
+     *   message
+     * public_key points to the bottom level public key, which is used to
+     * validate the signature
+     *
+     * Just go ahead and schedule the validation
+     */
+    detail.public_key = public_key;    /* Public key to use */
+    detail.message = message;          /* The user's message that needs */
+    detail.message_len = message_len;  /* validation */
+    detail.signature = signature;      /* Bottom level LMS signature */
+    detail.signature_len = signature_len;
+    hss_thread_issue_work( col, validate_internal_sig,
+                           &detail, sizeof detail );
+
+    /* Wait for all the threads to complete */
+    hss_thread_done(col);
+
+    /* It succeeded if none of the threads reported an error */
+    if (got_error == hss_error_none) return true;
+    info->error_code = got_error;
+    return false;
+
+failed:           /* If we get an intermediate failure */
+    hss_thread_done(col);
+    return false;
+}
diff --git a/src/sig_stfl/lms/external/hss_verify.h b/src/sig_stfl/lms/external/hss_verify.h
new file mode 100644
index 0000000000..7a29deb275
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_verify.h
@@ -0,0 +1,23 @@
+#if !defined( HSS_VERIFY_H_ )
+#define HSS_VERIFY_H_
+
+#include <stdbool.h>
+
+struct hss_extra_info;
+/*
+ * This is the function to validate a signature; return true if it validates,
+ * false if it doesn't
+ *
+ * public_key is the pointer to the public key
+ *
+ * message, message_len is the message to validate
+ *
+ * signature, signature_len is the signature to validate
+ */
+bool hss_validate_signature(
+    const unsigned char *public_key,
+    const void *message, size_t message_len,
+    const unsigned char *signature, size_t signature_len,
+    struct hss_extra_info *info);
+
+#endif /* HSS_VERIFY_H_ */
diff --git a/src/sig_stfl/lms/external/hss_verify_inc.c b/src/sig_stfl/lms/external/hss_verify_inc.c
new file mode 100644
index 0000000000..451082f8de
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_verify_inc.c
@@ -0,0 +1,203 @@
+/*
+ * This is the code that implements the hierarchical part of the LMS hash
+ * based signatures; in this case, incremental verification
+ */
+#include <string.h>
+#include "common_defs.h"
+#include "hss_verify_inc.h"
+#include "lm_verify.h"
+#include "lm_common.h"
+#include "lm_ots_verify.h"
+#include "hash.h"
+#include "endian.h"
+#include "hss_thread.h"
+#include "hss_internal.h"
+#include "lm_ots_common.h"
+#include "hss.h"
+
+/*
+ * Start the process of validating an HSS signature incrementally. Parameters:
+ * ctx - The state we'll use to track the incremental validation
+ * public_key - pointer to the public key
+ * siganture - the signature we're checking
+ * signature_len - the length of the signature
+ */
+bool hss_validate_signature_init(
+    struct hss_validate_inc *ctx,
+    const unsigned char *public_key,
+    const unsigned char *signature, size_t signature_len,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };
+    if (!info) info = &temp_info;
+    unsigned i;
+    if (!ctx) {
+        info->error_code = hss_error_got_null;
+        return false;
+    }
+    ctx->status = hss_error_ctx_uninitialized; /* Until we hear otherwise, */
+                                       /* we got a failure */
+
+    const unsigned char *orig_signature = signature;
+;
+    /* Get the number of levels the signature claims */
+    if (signature_len < 4) {
+        ctx->status = info->error_code = hss_error_bad_signature;
+        return false;
+    }
+    uint_fast32_t levels = get_bigendian( signature, 4 ) + 1;
+        /* +1 because what's in the signature is levels-1 */
+    signature += 4; signature_len -= 4;
+    if (levels < MIN_HSS_LEVELS || levels > MAX_HSS_LEVELS ||
+                               levels != get_bigendian( public_key, 4 )) {
+        ctx->status = info->error_code = hss_error_bad_signature;
+        return false;
+    }
+    uint_fast32_t pub_levels = get_bigendian( public_key, 4 );
+    if (levels != pub_levels) {
+        /* Signature and public key don't agree */
+        ctx->status = info->error_code = hss_error_bad_signature;
+        return false;
+    }
+    public_key += 4;
+
+    /* Validate the upper levels of the signature */
+    struct thread_collection *col = NULL;
+    if (levels > 1) {
+        col = hss_thread_init(info->num_threads);
+        enum hss_error_code got_error = hss_error_none;
+        struct verify_detail detail;
+        detail.got_error = &got_error;
+
+        /* Scan through the signature, kicking off the tasks to validate it */
+        /* as we go.  Note that we don't validate the bottom level yet */
+        for (i=0; i<levels-1; i++) {
+            /* The next thing is the signature of this public key */
+            param_set_t lm_type = get_bigendian( public_key, 4 );
+            param_set_t lm_ots_type = get_bigendian( public_key+4, 4 );
+            unsigned l_siglen = lm_get_signature_len(lm_type, lm_ots_type);
+            if (l_siglen == 0 || l_siglen > signature_len) goto failed;
+            const unsigned char *l_sig = signature;
+            signature += l_siglen; signature_len -= l_siglen;
+
+            /* The next thing is the next level public key (which we need */
+            /* to validate) */
+            if (signature_len < 4) goto failed;
+            lm_type = get_bigendian( signature, 4 );
+            unsigned l_pubkeylen = lm_get_public_key_len(lm_type);
+            if (l_pubkeylen == 0 || l_pubkeylen > signature_len) goto failed;
+            const unsigned char *l_pubkey = signature;
+            signature += l_pubkeylen; signature_len -= l_pubkeylen;
+
+            /* Validate the signature of this level's public key */
+            detail.public_key = public_key;
+            detail.message = l_pubkey;
+            detail.message_len = l_pubkeylen;
+            detail.signature = l_sig;
+            detail.signature_len = l_siglen;
+            hss_thread_issue_work( col, validate_internal_sig,
+                                   &detail, sizeof detail );
+
+            /* We validated this level's public key (or, at least, */
+            /* scheduled it, if it turns out not to validate, we'll catch */
+            /* it below), use it to validate the next level */
+            public_key = l_pubkey;
+        }
+
+        /* Wait for all the threads to complete */
+        hss_thread_done(col);
+        col = NULL;
+
+        if (got_error != hss_error_none) {
+            ctx->status = info->error_code = got_error;
+            return false;
+        }
+    }
+
+    ctx->signature_offset = signature - orig_signature;
+    ctx->signature_len = signature_len;
+
+    /* We have the public key in front of us; stash a copy */
+    /* Right now, we have a fixed length public key */
+    /* If that changes, we'll need to investigate the parmaeter set */
+    memcpy( ctx->final_public_key, public_key, 8 + I_LEN + MAX_HASH );
+
+    /* Now, initialize the context */
+    param_set_t ots_type = get_bigendian( public_key+4, 4 );
+
+    unsigned h, n;
+    if (!lm_ots_look_up_parameter_set(ots_type, &h, &n, NULL, NULL, NULL)) {
+        /* Because we're checking in parallel, this may be caused by */
+        /* a bad signature */
+        ctx->status = info->error_code = hss_error_bad_signature;
+        return false;
+    }
+    ctx->h = h;
+    hss_init_hash_context( h, &ctx->hash_ctx );
+    {
+        unsigned char prefix[ MESG_PREFIX_MAXLEN ];
+        memcpy( prefix + MESG_I, ctx->final_public_key+8, I_LEN );
+        memcpy( prefix + MESG_Q, signature, 4 ); /* q */
+        SET_D( prefix + MESG_D, D_MESG );
+        memcpy( prefix + MESG_C, signature+8, n );  /* C */
+        hss_update_hash_context(h, &ctx->hash_ctx, prefix, MESG_PREFIX_LEN(n) );
+    }
+
+    /* It succeeded so far... */
+    ctx->status = hss_error_none;
+    return true;
+
+failed:           /* If we get an intermediate failure */
+    if (col) hss_thread_done(col);
+    ctx->status = info->error_code = hss_error_bad_signature;
+    return false;
+}
+
+/* This adds another piece of the message to validate */
+bool hss_validate_signature_update(
+    struct hss_validate_inc *ctx,
+    const void *message_segment,
+    size_t len_message_segment) {
+    if (!ctx || ctx->status != hss_error_none) return false;
+
+    hss_update_hash_context(ctx->h, &ctx->hash_ctx,
+                            message_segment, len_message_segment );
+
+    return true;
+}
+
+/* We've added all the pieces of the messages, now do the validation */
+bool hss_validate_signature_finalize(
+    struct hss_validate_inc *ctx,
+    const unsigned char *signature,
+    struct hss_extra_info *info) {
+    struct hss_extra_info temp_info = { 0 };
+    if (!info) info = &temp_info;
+
+    if (!ctx) {
+        info->error_code = hss_error_got_null;
+        return false;
+    }
+    if (ctx->status != hss_error_none) {
+        info->error_code = ctx->status;
+        return false;
+    }
+
+    /* Success or fail, we can't use the context any more */
+    ctx->status = hss_error_ctx_already_used;
+
+    /* Generate the final hash */
+    unsigned char hash[ MAX_HASH ];
+    unsigned h = ctx->h;
+    hss_finalize_hash_context( h, &ctx->hash_ctx, hash );
+
+    /* It passes iff the final signature validates */
+    if (lm_validate_signature(
+            ctx->final_public_key,
+            hash, sizeof hash, true,
+            signature + ctx->signature_offset, ctx->signature_len)) {
+        return true;
+    }
+
+    info->error_code = hss_error_bad_signature;
+    return false;
+}
diff --git a/src/sig_stfl/lms/external/hss_verify_inc.h b/src/sig_stfl/lms/external/hss_verify_inc.h
new file mode 100644
index 0000000000..147308b23c
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_verify_inc.h
@@ -0,0 +1,82 @@
+#if !defined( HSS_VERIFY_INC_H_ )
+#define HSS_VERIFY_INC_H_
+#include <stdbool.h>
+#include <stddef.h>
+#include "hash.h"
+#include "common_defs.h"
+#include "hss.h"
+
+/*
+ * These are the functions to validate a signature incrementally.
+ * That is, we assume that we don't have the entire message at
+ * once, instead, we have it in pieces (for example, the signature
+ * is of a multigigabyte file)
+ *
+ * Usage:
+ *    struct hss_validate_inc ctx;
+ *    bool success = hss_validate_init( &ctx, public_key, signature );
+ *    hss_validate_update( &ctx, message_part_1, len_1 );
+ *    hss_validate_update( &ctx, message_part_2, len_2 );
+ *    hss_validate_update( &ctx, message_part_3, len_3 );
+ *    success = hss_validate_finalize( &ctx, signature );
+ *    if (success) printf( "The signature validated\n" );
+ *
+ * This is in its own include file because we need to import some
+ * 'not-generally-for-general-consumption' include files to make
+ * it work (as they're in the hss_validate_inc structure)
+ */
+
+/*
+ * This is the context structure that holds the intermedate results of an
+ * in-process validation
+ * It's a application-visible structure for ease of use: the application can
+ * allocate it as an automatic, and if the application aborts in the middle of
+ * the validation, it doesn't cause a memory leak
+ */
+struct hss_validate_inc {
+    enum hss_error_code status; /* Either hss_error_none if we're in */
+                       /* process, or the reason why we'd fail */
+    size_t signature_offset; /* Offset of the final signature within the */
+                      /* HSS signature */
+    size_t signature_len; /* Length of the final signature */
+
+    unsigned h;         /* Hash function used */
+
+        /* The final public key.  We need this at finalization time, */
+        /* however they might not be in the signature (L=1 case) */
+    unsigned char final_public_key[8 + I_LEN + MAX_HASH];
+
+    union hash_context hash_ctx; /* For the running hash we use */
+};
+
+struct hss_extra_info;
+
+/* Starts off the process of incrementally validating a signature */
+/* If it detects a failure, this returns false */
+/* Handing the return code is optional; if this fails, the finalization */
+/* step will fail too */
+bool hss_validate_signature_init(
+    struct hss_validate_inc *ctx,
+    const unsigned char *public_key,
+    const unsigned char *signature, size_t signature_len,
+    struct hss_extra_info *info);
+
+/* This adds another piece of the message to validate */
+/* Again, the result code is optional */
+bool hss_validate_signature_update(
+    struct hss_validate_inc *ctx,
+    const void *message_segment,
+    size_t len_message_segment);
+
+/* This finalizes the signature validation */
+/* This returns true if the signature validates (and we didn't detect any */
+/* intermediate failures) */
+/* We ask the caller to pass in the signature again, because we'd prefer */
+/* not having to place the final LMS signature in the ctx structure; that'd */
+/* make it larger than we'd like */
+bool hss_validate_signature_finalize(
+    struct hss_validate_inc *ctx,
+    const unsigned char *signature,
+    struct hss_extra_info *info);
+
+#endif /* HSS_VERIFY_INC_H_ */
diff --git a/src/sig_stfl/lms/external/hss_zeroize.c b/src/sig_stfl/lms/external/hss_zeroize.c
new file mode 100644
index 0000000000..f2bd334903
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_zeroize.c
@@ -0,0 +1,49 @@
+#include "hss_zeroize.h"
+#include <string.h>
+
+/*
+ * This is a function to zeroize a section of memory
+ *
+ * We do this because when we release a section of memory (either because it's
+ * a local variable going out of scope, or we free it), it's possible that
+ * the memory will retain its contents after another allocation (possibly
+ * done by someone outside this module).  So, to avoid this potential security
+ * issue, we scrub the memory (at least, the parts that have data that would
+ * make it possible to forge if it leaked) before releasing it.
+ *
+ * Now, there's a bunch of things we don't mind being exposed (e.g. internal
+ * node values of Merkle trees), so we don't use this everywhere; only where
+ * it is needed
+ *
+ * We use this, rather than having routines simply call memset, to avoid
+ * potential problems with overenthusiastic optimizers.  Generally, we zeroize
+ * an area immediately before it goes out of scope or we free it, however an
+ * optimizer might conclude "they're about to release the memory, there's no
+ * need to write to it first"
+ *
+ * For similar reasons, this function is in its own source file (so that a
+ * compiler optimizer who doesn't examine more than one source at a time can't
+ * eliminate it).  If we are worried about optimizers who can be even more
+ * enthusiastic, there are other things we can try; however we're not going to
+ * worry about that right now
+ */
+void hss_zeroize( void *area, size_t len ) {
+#if defined( __STDC_LIB_EXT1__ )
+    /*
+     * C11 defines a version of memset that does precisely what we want, and is
+     * guaranteed not to be molested by the optimizer
+     * Note that the first 'len' is supposed to be the length of the buffer
+     * we're cleaning and the second 'len' is the area to clear.  Since we
+     * expect the caller to ask us to clear the entire area (and hence gives
+     * us only one length), we use the same for both
+     */
+    memset_s( area, len, 0, len );
+#else
+    /*
+     * Fallback code for pre-C11 versions
+     */
+    volatile unsigned char *p = area;
+
+    while (len--) *p++ = 0;
+#endif
+}
diff --git a/src/sig_stfl/lms/external/hss_zeroize.h b/src/sig_stfl/lms/external/hss_zeroize.h
new file mode 100644
index 0000000000..702d91137b
--- /dev/null
+++ b/src/sig_stfl/lms/external/hss_zeroize.h
@@ -0,0 +1,10 @@
+#if !defined( HSS_ZEROIZE_H_ )
+#define HSS_ZEROIZE_H_
+
+#include <stdlib.h>
+
+/* Zeroize an area, that is, scrub it from holding any potentially secret */
+/* information */
+void hss_zeroize( void *area, size_t len );
+
+#endif /* HSS_ZEROIZE_H_ */
diff --git a/src/sig_stfl/lms/external/lm_common.c b/src/sig_stfl/lms/external/lm_common.c
new file mode 100644
index 0000000000..e3eb56f0f0
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_common.c
@@ -0,0 +1,79 @@
+/*
+ * This is the code that implements the tree part of the LMS hash
+ * based signatures
+ */
+#include <string.h>
+#include "lm_common.h"
+#include "hash.h"
+#include "common_defs.h"
+#include "lm_ots_common.h"
+
+/*
+ * Internal utility to convert encoded parameter sets into what they represent
+ */
+bool lm_look_up_parameter_set(param_set_t parameter_set,
+     unsigned *h, unsigned *n, unsigned *height) {
+    unsigned v_h, v_n, v_height;
+    switch (parameter_set) {
+    case LMS_SHA256_N32_H5:
+        v_h = HASH_SHA256; v_n = 32; v_height = 5; break;
+    case LMS_SHA256_N32_H10:
+        v_h = HASH_SHA256; v_n = 32; v_height = 10; break;
+    case LMS_SHA256_N32_H15:
+        v_h = HASH_SHA256; v_n = 32; v_height = 15; break;
+    case LMS_SHA256_N32_H20:
+        v_h = HASH_SHA256; v_n = 32; v_height = 20; break;
+    case LMS_SHA256_N32_H25:
+        v_h = HASH_SHA256; v_n = 32; v_height = 25; break;
+    default: return false;
+    }
+
+    if (h) *h = v_h;
+    if (n) *n = v_n;
+    if (height) *height = v_height;
+
+    return true;
+}
+
+/* The LM public key consists of: */
+#define LM_PUB_PARM_SET    0       /* The parameter set (4 bytes) */
+#define LM_PUB_OTS_PARM_SET 4      /* The OTS parameter set (4 bytes) */
+#define LM_PUB_I            8      /* Our nonce (I) value (16 bytes) */
+/* The root value comes here */
+
+/*
+ * XDR requires us to pad the I value out to a multiple of 4
+ * This computes how long the field will be after padding
+ * That is, it rounds len_I up to the next multiple of 4
+ */
+#define padded_length(len_I) (((len_I) + 3) & ~3)
+
+/* The public key just consists of the parameter sets, plus I, plus root hash */
+size_t lm_get_public_key_len(param_set_t lm_type) {
+    unsigned n;
+    if (!lm_look_up_parameter_set( lm_type, 0, &n, 0))
+        return 0;
+
+    return LM_PUB_I + padded_length(I_LEN) + n;
+}
+
+/*
+ * The amount of space we use for signature
+ */
+size_t lm_get_signature_len(param_set_t lm_type,
+                            param_set_t lm_ots_type) {
+    unsigned n, height;
+    if (!lm_look_up_parameter_set( lm_type, 0, &n, &height ))
+        return 0;
+
+    int ots_sig_len = lm_ots_get_signature_len(lm_ots_type);
+    if (ots_sig_len == 0)
+        return 0;
+
+    /*
+     * The LM signature consists of the type code, the diversification factor,
+     * the LM-OTS signature (which includes the OTS type code), and the
+     * authentication path (which is an array of height hashes)
+     */
+    return 4 + 4 + ots_sig_len + n*height;
+}
diff --git a/src/sig_stfl/lms/external/lm_common.h b/src/sig_stfl/lms/external/lm_common.h
new file mode 100644
index 0000000000..027eda2214
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_common.h
@@ -0,0 +1,20 @@
+#if !defined(LM_COMMON_H_)
+#define LM_COMMON_H_
+
+#include <stddef.h>
+#include "common_defs.h"
+
+size_t lm_get_public_key_len(param_set_t lm_type);
+size_t lm_get_signature_len(param_set_t lm_type,
+                            param_set_t lm_ots_type);
+
+bool lm_look_up_parameter_set(param_set_t parameter_set,
+     unsigned *h, unsigned *n, unsigned *height);
+
+/* The format of an LM public key; it consists of: */
+#define LM_PUB_PARM_SET    0       /* The parameter set (4 bytes) */
+#define LM_PUB_OTS_PARM_SET 4      /* The OTS parameter set (4 bytes) */
+#define LM_PUB_I            8      /* Our nonce (I) value (32 or 64 bytes) */
+/* The root value comes here */
+
+#endif /* LM_COMMON_H_ */
diff --git a/src/sig_stfl/lms/external/lm_ots.h b/src/sig_stfl/lms/external/lm_ots.h
new file mode 100644
index 0000000000..4fcf690342
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_ots.h
@@ -0,0 +1,64 @@
+#if !defined( LM_OTS_H_ )
+#define LM_OTS_H_
+
+#include "common_defs.h"
+#include <stddef.h>
+
+/*
+ * These are routines that implement the OTS signature scheme.  These routines
+ * never actually form a "private key"; instead, the signer passes the 'seed'
+ * (and public data) to form the public key and to do the actual signature.
+ * We do this because the LM routines are actually better suited for doing
+ * seed management.
+ */
+struct seed_derive;
+
+/*
+ * Compute the public key.  Note that it doesn't compute a 'private key';
+ * the signature algorithm gets that data when we pass the parameters again
+ * Parameters:
+ * lm_ots_type - The parameter set
+ * I - The I public identifier to use
+ * q - The diversification string, passed as a 4 byte integer
+ * seed - The structure used to generate seeds
+ * public_key - Where to place the public key
+ * public_key_len - The length of the above buffer
+ * This returns true on success
+ */
+bool lm_ots_generate_public_key(
+    param_set_t lm_ots_type,
+    const unsigned char *I, /* Public key identifier */
+    merkle_index_t q,       /* Diversification string, 4 bytes value */
+    struct seed_derive *seed,
+    unsigned char *public_key, size_t public_key_len);
+
+/*
+ * Sign a message.  Warning: the caller is expected to make sure that it signs
+ * only one message with a given seed/I/q set
+ * Parameters:
+ * lm_ots_type - The parameter set
+ * I - The I public identifier to use
+ * q - The diversification string, passed as a 4 byte integer
+ * seed - The structure used to generate seeds
+ * message - Message to sign
+ * message_len - Length of the message
+ * prehashed - Set if the message hashing has already taken place
+ * signature - Where to place the signature
+ * signature_len - The length of the above buffer
+ * This returns true on success
+ */
+bool lm_ots_generate_signature(
+    param_set_t lm_ots_type,
+    const unsigned char *I,
+    merkle_index_t q,
+    struct seed_derive *seed,
+    const void *message, size_t message_len, bool prehashed,
+    unsigned char *signature, size_t signature_len);
+
+/* The include file for the verification routine */
+#include "lm_ots_verify.h"
+
+/* The include file for the common access routines */
+#include "lm_ots_common.h"
+
+#endif /* LM_OTS_H_ */
diff --git a/src/sig_stfl/lms/external/lm_ots_common.c b/src/sig_stfl/lms/external/lm_ots_common.c
new file mode 100644
index 0000000000..45672e18b2
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_ots_common.c
@@ -0,0 +1,99 @@
+/*
+ * This is the code that implements the one-time-signature part of the LMS hash
+ * based signatures
+ */
+#include "lm_ots_common.h"
+#include "common_defs.h"
+#include "hash.h"
+
+/*
+ * Convert the external name of a parameter set into the set of values we care
+ * about
+ */
+bool lm_ots_look_up_parameter_set(param_set_t parameter_set,
+     unsigned *h, unsigned *n, unsigned *w, unsigned *p, unsigned *ls) {
+    unsigned v_h, v_n, v_w, v_p, v_ls;
+    switch (parameter_set) {
+    case LMOTS_SHA256_N32_W1:
+        v_h = HASH_SHA256; v_n = 32; v_w = 1; v_p = 265; v_ls = 7; break;
+    case LMOTS_SHA256_N32_W2:
+        v_h = HASH_SHA256; v_n = 32; v_w = 2; v_p = 133; v_ls = 6; break;
+    case LMOTS_SHA256_N32_W4:
+        v_h = HASH_SHA256; v_n = 32; v_w = 4; v_p = 67; v_ls = 4; break;
+    case LMOTS_SHA256_N32_W8:
+        v_h = HASH_SHA256; v_n = 32; v_w = 8; v_p = 34; v_ls = 0; break;
+    default: return false;
+    }
+
+    if (h) *h = v_h;
+    if (n) *n = v_n;
+    if (w) *w = v_w;
+    if (p) *p = v_p;
+    if (ls) *ls = v_ls;
+
+    return true;
+}
+
+/* The public key just consists of the bare hash */
+size_t lm_ots_get_public_key_len(param_set_t lm_ots_type) {
+    unsigned n;
+    if (!lm_ots_look_up_parameter_set( lm_ots_type, 0, &n, 0, 0, 0 ))
+        return 0;
+
+    return n;
+}
+
+/* Return the length of a signature */
+size_t lm_ots_get_signature_len(param_set_t lm_ots_type) {
+    unsigned n, p;
+
+    if (!lm_ots_look_up_parameter_set( lm_ots_type, 0, &n, 0, &p, 0 ))
+        return 0;
+
+    return 4 + n + p*n;
+}
+
+/* Return the number of hashes we need to compute to generate a public key */
+unsigned lm_ots_hashes_per_public_key(param_set_t lm_ots_type) {
+    unsigned wint, num_dig;
+    if (!lm_ots_look_up_parameter_set(lm_ots_type,
+                               NULL, NULL, &wint, &num_dig, NULL)) {
+        return 0;
+    }
+
+    /* Total number of hash invocations:
+     * For each digit, we expand the seed (1), and then perform (2**wint-1)
+     *    haashes to obtain the end of the chain
+     * Then, we hash all the ends of the chains together
+     * If we were to return the number of hash compression operations,
+     * the final 1 would be a bit larger
+     */
+    return num_dig * (1 << wint) + 1;
+}
+
+/* Todo: some of these values depend only on w; why do we need to recompute */
+/* them each time??? */
+unsigned lm_ots_coef(const unsigned char *Q, unsigned i, unsigned w) {
+    unsigned index = (i * w) / 8;    /* Which byte holds the coefficient */
+                                     /* we want */
+    unsigned digits_per_byte = 8/w;
+    unsigned shift = w * (~i & (digits_per_byte-1)); /* Where in the byte */
+                                     /* the coefficient is */
+    unsigned mask = (1<<w) - 1;      /* How to mask off the parts we're not */
+                                     /* interested in */
+
+    return (Q[index] >> shift) & mask;
+}
+
+/* This returns the Winternitz checksum to append to the hash */
+unsigned lm_ots_compute_checksum(const unsigned char *Q, unsigned Q_len,
+                                 unsigned w, unsigned ls) {
+    unsigned sum = 0;
+    unsigned i;
+    unsigned u = 8 * Q_len / w;
+    unsigned max_digit = (1<<w) - 1;
+    for (i=0; i<u; i++) {
+        sum += max_digit - lm_ots_coef( Q, i, w );
+    }
+    return sum << ls;
+}
diff --git a/src/sig_stfl/lms/external/lm_ots_common.h b/src/sig_stfl/lms/external/lm_ots_common.h
new file mode 100644
index 0000000000..12530dd6dd
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_ots_common.h
@@ -0,0 +1,16 @@
+#if !defined( LM_OTS_COMMON_H_ )
+#define LM_OTS_COMMON_H_
+
+#include <stddef.h>
+#include "common_defs.h"
+
+bool lm_ots_look_up_parameter_set(param_set_t parameter_set,
+     unsigned *h, unsigned *n, unsigned *w, unsigned *p, unsigned *ls);
+size_t lm_ots_get_public_key_len(param_set_t lm_ots_type);
+size_t lm_ots_get_signature_len(param_set_t lm_ots_type);
+unsigned lm_ots_hashes_per_public_key(param_set_t lm_ots_type);
+unsigned lm_ots_compute_checksum(const unsigned char *Q, unsigned Q_len,
+                                 unsigned w, unsigned ls);
+unsigned lm_ots_coef(const unsigned char *Q, unsigned i, unsigned w);
+
+#endif /* LM_OTS_COMMON_H_ */
diff --git a/src/sig_stfl/lms/external/lm_ots_sign.c b/src/sig_stfl/lms/external/lm_ots_sign.c
new file mode 100644
index 0000000000..ee8f56b0a2
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_ots_sign.c
@@ -0,0 +1,168 @@
+/*
+ * This is the code that implements the one-time-signature part of the LMS hash
+ * based signatures
+ */
+#include <string.h>
+#include "common_defs.h"
+#include "lm_ots.h"
+#include "lm_ots_common.h"
+#include "hash.h"
+#include "endian.h"
+#include "hss_zeroize.h"
+#include "hss_derive.h"
+#include "hss_internal.h"
+
+bool lm_ots_generate_public_key(
+    param_set_t lm_ots_type,
+    const unsigned char *I, /* Public key identifier */
+    merkle_index_t q,       /* Diversification string, 4 bytes value */
+    struct seed_derive *seed,
+    unsigned char *public_key, size_t public_key_len) {
+
+    /* Look up the parameter set */
+    unsigned h, n, w, p, ls;
+    LMS_UNUSED(public_key_len);
+    if (!lm_ots_look_up_parameter_set( lm_ots_type, &h, &n, &w, &p, &ls ))
+        return false;
+
+    /* Start the hash that computes the final value */
+    union hash_context public_ctx;
+    hss_init_hash_context(h, &public_ctx);
+    {
+        unsigned char prehash_prefix[ PBLC_PREFIX_LEN ];
+        memcpy( prehash_prefix + PBLC_I, I, I_LEN );
+        put_bigendian( prehash_prefix + PBLC_Q, q, 4 );
+        SET_D( prehash_prefix + PBLC_D, D_PBLC );
+        hss_update_hash_context(h, &public_ctx, prehash_prefix,
+                                PBLC_PREFIX_LEN );
+    }
+
+    /* Now generate the public key */
+    /* This is where we spend the majority of the time during key gen and */
+    /* signing operations; it would make sense to attempt to try to take */
+    /* advantage of parallel (SIMD) hardware; even if we use it nowhere */
+    /* else, we'd get a significant speed up */
+    unsigned i, j;
+
+    unsigned char buf[ ITER_MAX_LEN ];
+    memcpy( buf + ITER_I, I, I_LEN );
+    put_bigendian( buf + ITER_Q, q, 4 );
+    union hash_context ctx;
+
+    hss_seed_derive_set_j( seed, 0 );
+
+    for (i=0; i<p; i++) {
+        hss_seed_derive( buf + ITER_PREV, seed, i < p-1 );
+        put_bigendian( buf + ITER_K, i, 2 );
+        /* We'll place j in the buffer below */
+        for (j=0; j < (unsigned)(1<<w) - 1; j++) {
+            buf[ITER_J] = j;
+
+            hss_hash_ctx( buf + ITER_PREV, h, &ctx, buf, ITER_LEN(n) );
+        }
+        /* Include that in the hash */
+        hss_update_hash_context(h, &public_ctx, buf + ITER_PREV, n );
+    }
+
+    /* And the result of the running hash is the public key */
+    hss_finalize_hash_context( h, &public_ctx, public_key );
+
+    hss_zeroize( &ctx, sizeof ctx );
+
+    return true;
+}
+
+/*
+ * This generates the randomizer C.  We assume seed has been initialized to
+ * the expected q value
+ */
+void lm_ots_generate_randomizer(unsigned char *c, unsigned n,
+                                struct seed_derive *seed) {
+    unsigned char randomizer[ SEED_LEN ];
+
+    hss_seed_derive_set_j( seed, SEED_RANDOMIZER_INDEX );
+
+    hss_seed_derive( randomizer, seed, false );
+
+    memcpy( c, randomizer, n );
+}
+
+
+bool lm_ots_generate_signature(
+    param_set_t lm_ots_type,
+    const unsigned char *I, /* Public key identifier */
+    merkle_index_t q,       /* Diversification string, 4 bytes value */
+    struct seed_derive *seed,
+    const void *message, size_t message_len, bool prehashed,
+    unsigned char *signature, size_t signature_len) {
+
+    /* Look up the parameter set */
+    unsigned h, n, w, p, ls;
+    if (!lm_ots_look_up_parameter_set( lm_ots_type, &h, &n, &w, &p, &ls ))
+        return false;
+
+    /* Check if we have enough room */
+    if (signature_len < 4 + n + p*n) return false;
+
+    /* Export the parameter set to the signature */
+    put_bigendian( signature, lm_ots_type, 4 );
+
+    union hash_context ctx;
+    /* Select the randomizer.  Note: we do this determanistically, because
+     * upper levels of the HSS tree sometimes sign the same message with the
+     * same index (between multiple reboots), hence we want to make sure that
+     * the randomizer for a particualr index is the same
+     * Also, if we're prehashed, we assume the caller has already selected it,
+     * and placed it into the siganture */
+
+    if (!prehashed) {
+        lm_ots_generate_randomizer( signature+4, n, seed);
+    }
+
+    /* Compute the initial hash */
+    unsigned char Q[MAX_HASH + 2];
+    if (!prehashed) {
+        hss_init_hash_context(h, &ctx);
+
+        /* First, we hash the message prefix */
+        unsigned char prefix[MESG_PREFIX_MAXLEN];
+        memcpy( prefix + MESG_I, I, I_LEN );
+        put_bigendian( prefix + MESG_Q, q, 4 );
+        SET_D( prefix + MESG_D, D_MESG );
+        memcpy( prefix + MESG_C, signature+4, n );
+        hss_update_hash_context(h, &ctx, prefix, MESG_PREFIX_LEN(n) );
+
+            /* Then, the message */
+        hss_update_hash_context(h, &ctx, message, message_len );
+        hss_finalize_hash_context( h, &ctx, Q );
+    } else {
+        memcpy( Q, message, n );
+    }
+
+    /* Append the checksum to the randomized hash */
+    put_bigendian( &Q[n], lm_ots_compute_checksum(Q, n, w, ls), 2 );
+
+    unsigned i;
+    unsigned char tmp[ITER_MAX_LEN];
+
+    /* Preset the parts of tmp that don't change */
+    memcpy( tmp + ITER_I, I, I_LEN );
+    put_bigendian( tmp + ITER_Q, q, 4 );
+
+    hss_seed_derive_set_j( seed, 0 );
+    for (i=0; i<p; i++) {
+        put_bigendian( tmp + ITER_K, i, 2 );
+        hss_seed_derive( tmp + ITER_PREV, seed, i<p-1 );
+        unsigned a = lm_ots_coef( Q, i, w );
+        unsigned j;
+        for (j=0; j<a; j++) {
+            tmp[ITER_J] = j;
+            hss_hash_ctx( tmp + ITER_PREV, h, &ctx, tmp, ITER_LEN(n) );
+        }
+        memcpy( &signature[ 4 + n + n*i ], tmp + ITER_PREV, n );
+    }
+
+    hss_zeroize( &ctx, sizeof ctx );
+
+    return true;
+}
diff --git a/src/sig_stfl/lms/external/lm_ots_verify.c b/src/sig_stfl/lms/external/lm_ots_verify.c
new file mode 100644
index 0000000000..91576474b5
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_ots_verify.c
@@ -0,0 +1,122 @@
+/*
+ * This is the code that implements the one-time-signature part of the LMS hash
+ * based signatures
+ */
+#include <string.h>
+#include "lm_ots_verify.h"
+#include "lm_ots_common.h"
+#include "hash.h"
+#include "endian.h"
+#include "common_defs.h"
+
+/*
+ * This validate a OTS signature for a message.  It doesn't actually use the
+ * public key explicitly; instead, it just produces the root key, based on the
+ * message; the caller is assumed to compare it to the expected value
+ * Parameters:
+ * - computed_public_key - where to place the reconstructed root.  It is
+ *      assumed that the caller has allocated enough space
+ * - I: the nonce value ("I") to use
+ * - q: diversification string
+ * - message - the message to verify
+ * - message_len - the length of the message
+ * - message_prehashed - true if the message has already undergone the initial
+ *              (D_MESG) hash
+ * - signature - the signature
+ * - signature_len - the length of the signature
+ * - parameter_set - what we expect the parameter set to be
+ *
+ * This returns true on successfully recomputing a root value; whether it is
+ * the right one is something the caller would need to verify
+ */
+bool lm_ots_validate_signature_compute(
+    unsigned char *computed_public_key,
+    const unsigned char *I, merkle_index_t q,
+    const void *message, size_t message_len, bool message_prehashed,
+    const unsigned char *signature, size_t signature_len,
+    param_set_t expected_parameter_set) {
+    if (signature_len < 4) return false;  /* Ha, ha, very funny... */
+
+    /* We don't trust the parameter set that's in the signature; verify it */
+    param_set_t parameter_set = get_bigendian( signature, 4 );
+    if (parameter_set != expected_parameter_set) {
+        return false;
+    }
+
+    unsigned h, n, w, p, ls;
+    if (!lm_ots_look_up_parameter_set( parameter_set, &h, &n, &w, &p, &ls ))
+        return false;
+
+    if (signature_len != 4 + n * (p+1)) return false;
+
+    const unsigned char *C = signature + 4;
+    const unsigned char *y = C + n;
+
+    unsigned char Q[MAX_HASH + 2];
+    if (message_prehashed) {
+        memcpy( Q, message, n );
+     } else {
+        union hash_context ctx;
+        /* Compute the initial hash */
+        hss_init_hash_context(h, &ctx);
+            /* Hash the message prefix */
+        {
+            unsigned char prefix[ MESG_PREFIX_MAXLEN ];
+            memcpy( prefix + MESG_I, I, I_LEN );
+            put_bigendian( prefix + MESG_Q, q, 4 );
+            SET_D( prefix + MESG_D, D_MESG );
+            memcpy( prefix + MESG_C, C, n );
+            hss_update_hash_context(h, &ctx, prefix, MESG_PREFIX_LEN(n) );
+        }
+            /* Then, the message */
+        hss_update_hash_context(h, &ctx, message, message_len );
+
+        hss_finalize_hash_context( h, &ctx, Q );
+    }
+
+    /* Append the checksum to the randomized hash */
+    put_bigendian( &Q[n], lm_ots_compute_checksum(Q, n, w, ls), 2 );
+
+    /* And, start building the parts for the final hash */
+    union hash_context final_ctx;
+    hss_init_hash_context(h, &final_ctx);
+    {
+        unsigned char prehash_prefix[ PBLC_PREFIX_LEN ];
+        memcpy( prehash_prefix + PBLC_I, I, I_LEN );
+        put_bigendian( prehash_prefix + PBLC_Q, q, 4 );
+        SET_D( prehash_prefix + PBLC_D, D_PBLC );
+        hss_update_hash_context(h, &final_ctx, prehash_prefix,
+                                PBLC_PREFIX_LEN );
+    }
+
+    unsigned i;
+    unsigned char tmp[ITER_MAX_LEN];
+
+    /* Preset the parts of tmp that don't change */
+    memcpy( tmp + ITER_I, I, I_LEN );
+    put_bigendian( tmp + ITER_Q, q, 4 );
+
+    unsigned max_digit = (1<<w) - 1;
+    for (i=0; i<p; i++) {
+        put_bigendian( tmp + ITER_K, i, 2 );
+        memcpy( tmp + ITER_PREV, y + i*n, n );
+        unsigned a = lm_ots_coef( Q, i, w );
+        unsigned j;
+        for (j=a; j<max_digit; j++) {
+            union hash_context ctx;
+            tmp[ITER_J] = j;
+            hss_hash_ctx( tmp + ITER_PREV, h, &ctx, tmp, ITER_LEN(n) );
+        }
+
+        hss_update_hash_context(h, &final_ctx, tmp + ITER_PREV, n );
+    }
+
+    /* Ok, finalize the public key hash */
+    hss_finalize_hash_context( h, &final_ctx, computed_public_key );
+
+    /*
+     * We succeeded in computing a root value; the caller will need to decide
+     * if the root we computed is actually the correct one
+     */
+    return true;
+}
diff --git a/src/sig_stfl/lms/external/lm_ots_verify.h b/src/sig_stfl/lms/external/lm_ots_verify.h
new file mode 100644
index 0000000000..439f0f94a6
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_ots_verify.h
@@ -0,0 +1,23 @@
+#if !defined( LM_OTS_VERIFY_H_ )
+#define LM_OTS_VERIFY_H_
+
+#include <stddef.h>
+#include "common_defs.h"
+
+/*
+ * This validates an OTS signature, but instead of producing a SUCCESS/FAILURE
+ * return, it generates the root value (which the caller is expected to check).
+ * It can return false (failure), for things such as unrecognized parameter
+ * set It also makes sure that the parameter set of the signature is that
+ * value (as we need to make sure that the attacker didn't substitute a
+ * weaker one)
+ */
+bool lm_ots_validate_signature_compute(
+    unsigned char *computed_public_key,
+    const unsigned char *I,
+    merkle_index_t q,       /* Diversification string, 4 bytes value */
+    const void *message, size_t message_len, bool prehashed,
+    const unsigned char *signature, size_t signature_len,
+    param_set_t expected_parameter_set);
+
+#endif /* LM_OTS_VERIFY_H_ */
diff --git a/src/sig_stfl/lms/external/lm_verify.c b/src/sig_stfl/lms/external/lm_verify.c
new file mode 100644
index 0000000000..46b3627885
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_verify.c
@@ -0,0 +1,107 @@
+/*
+ * This is the code that implements the tree part of the LMS hash
+ * based signatures
+ */
+#include <string.h>
+#include "lm_verify.h"
+#include "lm_common.h"
+#include "lm_ots_common.h"
+#include "lm_ots_verify.h"
+#include "hash.h"
+#include "endian.h"
+#include "common_defs.h"
+
+/*
+ * XDR requires us to pad the I value out to a multiple of 4
+ * This computes how long the field will be after padding
+ * That is, it rounds len_I up to the next multiple of 4
+ */
+#define padded_length(len_I) (((len_I) + 3) & ~3)
+
+/*
+ * This validate an LM signature for a message.  It does take an XDR-encoded
+ * signature, and verify against it.
+ * Parameters:
+ * - public_key - the XDR-encoded public ley
+ * - message - the message to verify
+ * - message_len - the length of the message
+ * - signature - the signature
+ * - signature_len - the length of the signature
+ *
+ * This returns true if the signature verifies
+ */
+bool lm_validate_signature(
+    const unsigned char *public_key,
+    const void *message, size_t message_len, bool prehashed,
+    const unsigned char *signature, size_t signature_len) {
+    union hash_context ctx;
+
+    param_set_t lm_type = get_bigendian( public_key + LM_PUB_PARM_SET, 4 );
+    param_set_t ots_type = get_bigendian( public_key + LM_PUB_OTS_PARM_SET, 4 );
+
+    unsigned h, n, height;
+    if (!lm_look_up_parameter_set(lm_type, &h, &n, &height)) return false;
+
+    unsigned char computed_public_key[MAX_HASH];
+
+    const unsigned char *I = public_key + LM_PUB_I;
+
+    if (signature_len < 8) return false;
+    merkle_index_t count = get_bigendian( signature, 4 );
+    signature += 4; signature_len -= 4;  /* 4 bytes, rather then 8 */
+        /*  the OTS type is expected to be a part of the OTS signature, */
+        /* which lm_ots_validate_signature_compute will expect */
+
+    /* Compute the OTS root */
+    size_t ots_publen = lm_ots_get_public_key_len(ots_type);
+    size_t ots_siglen = lm_ots_get_signature_len(ots_type);
+    if (ots_publen == 0 || ots_siglen == 0) return false;
+    if (signature_len < ots_siglen) return false;
+
+    unsigned char ots_sig[LEAF_MAX_LEN];
+    if (!lm_ots_validate_signature_compute(ots_sig + LEAF_PK, I, count,
+                  message, message_len, prehashed,
+                  signature, ots_siglen, ots_type)) return false;
+    signature += ots_siglen; signature_len -= ots_siglen;
+
+    /* Get the parameter set declared in the sigature; make sure it matches */
+    /* what we expect */
+    if (signature_len < 4) return false;
+    param_set_t parameter_set = get_bigendian( signature, 4 );
+    if (parameter_set != lm_type) return false;
+    signature += 4; signature_len -= 4;
+
+    merkle_index_t count_nodes = (merkle_index_t)1 << height;
+
+    if (signature_len != n * height) return false;  /* We expect the auth */
+                                    /* path to be there as the last element */
+    if (count >= count_nodes) return false;  /* Index out of range */
+    merkle_index_t node_num = count + count_nodes;
+
+    memcpy( ots_sig + LEAF_I, I, I_LEN );
+    put_bigendian( ots_sig + LEAF_R, node_num, 4 );
+    SET_D( ots_sig + LEAF_D, D_LEAF );
+    hss_hash_ctx( computed_public_key, h, &ctx, ots_sig, LEAF_LEN(n) );
+
+    unsigned char prehash[ INTR_MAX_LEN ];
+    memcpy( prehash + INTR_I, I, I_LEN );
+    SET_D( prehash + INTR_D, D_INTR );
+    while (node_num > 1) {
+        if (node_num % 2) {
+            memcpy( prehash + INTR_PK + 0, signature, n );
+            memcpy( prehash + INTR_PK + n, computed_public_key, n );
+        } else {
+            memcpy( prehash + INTR_PK + 0, computed_public_key, n );
+            memcpy( prehash + INTR_PK + n, signature, n );
+        }
+        signature += n;
+        node_num /= 2;
+        put_bigendian( prehash + INTR_R, node_num, 4 );
+        hss_hash_ctx( computed_public_key, h, &ctx, prehash, INTR_LEN(n) );
+    }
+
+    /* Now, check to see if the root we computed matches the root we should have */
+    unsigned offset = LM_PUB_I + padded_length(I_LEN);
+
+    return 0 == memcmp( computed_public_key, public_key + offset, n );
+}
diff --git a/src/sig_stfl/lms/external/lm_verify.h b/src/sig_stfl/lms/external/lm_verify.h
new file mode 100644
index 0000000000..7f48767fcb
--- /dev/null
+++ b/src/sig_stfl/lms/external/lm_verify.h
@@ -0,0 +1,12 @@
+#if !defined(LM_VERIFY_H_)
+#define LM_VERIFY_H_
+
+#include <stddef.h>
+#include <stdbool.h>
+
+bool lm_validate_signature(
+    const unsigned char *public_key,
+    const void *message, size_t message_len, bool prehashed,
+    const unsigned char *signature, size_t signature_len);
+
+#endif /* LM_VERIFY_H_ */
diff --git a/src/sig_stfl/lms/external/sha256.c b/src/sig_stfl/lms/external/sha256.c
new file mode 100644
index 0000000000..fb18892a31
--- /dev/null
+++ b/src/sig_stfl/lms/external/sha256.c
@@ -0,0 +1,183 @@
+/*
+ * SHA-256
+ * Implementation derived from LibTomCrypt (Tom St Denis)
+ *
+ * LibTomCrypt is a library that provides various cryptographic
+ * algorithms in a highly modular and flexible manner.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@gmail.com, http://libtomcrypt.org
+ */
+
+#include <string.h>
+#include "sha256.h"
+#include "endian.h"
+
+#if !USE_OPENSSL && !defined(EXT_SHA256_H)
+
+/* If we don't have OpenSSL, here's a SHA256 implementation */
+#define SHA256_FINALCOUNT_SIZE  8
+#define SHA256_K_SIZE	        64
+static const unsigned long K[SHA256_K_SIZE] = {
+    0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
+    0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
+    0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
+    0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+    0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL,
+    0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL,
+    0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL,
+    0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+    0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL,
+    0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL,
+    0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL,
+    0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+    0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+};
+
+/* Various logical functions */
+
+/* Rotate x right by rot bits */
+static unsigned long RORc(unsigned long x, int rot) {
+    rot &= 31; if (rot == 0) return x;
+    unsigned long right = ((x&0xFFFFFFFFUL)>>rot );
+    unsigned long left  = ((x&0xFFFFFFFFUL)<<(32-rot) );
+    return (right|left) & 0xFFFFFFFFUL;
+}
+#define Ch(x,y,z)       (z ^ (x & (y ^ z)))
+#define Maj(x,y,z)      (((x | y) & z) | (x & y))
+#define S(x, n)         RORc((x),(n))
+#define R(x, n)         (((x)&0xFFFFFFFFUL)>>(n))
+#define Sigma0(x)       (S(x, 2) ^ S(x, 13) ^ S(x, 22))
+#define Sigma1(x)       (S(x, 6) ^ S(x, 11) ^ S(x, 25))
+#define Gamma0(x)       (S(x, 7) ^ S(x, 18) ^ R(x, 3))
+#define Gamma1(x)       (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+
+static void sha256_compress (SHA256_CTX * ctx, const void *buf)
+{
+    unsigned long S0, S1, S2, S3, S4, S5, S6, S7, W[SHA256_K_SIZE], t0, t1, t;
+    int i;
+    const unsigned char *p;
+
+    /* copy state into S */
+    S0 = ctx->h[0];
+    S1 = ctx->h[1];
+    S2 = ctx->h[2];
+    S3 = ctx->h[3];
+    S4 = ctx->h[4];
+    S5 = ctx->h[5];
+    S6 = ctx->h[6];
+    S7 = ctx->h[7];
+
+    /*
+     * We've been asked to perform the hash computation on this 512-bit string.
+     * SHA256 interprets that as an array of 16 bigendian 32 bit numbers; copy
+     * it, and convert it into 16 unsigned long's of the CPU's native format
+     */
+    p = buf;
+    for (i=0; i<16; i++) {
+        W[i] = get_bigendian( p, 4 );
+        p += 4;
+    }
+
+    /* fill W[16..63] */
+    for (i = 16; i < SHA256_K_SIZE; i++) {
+        W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
+    }
+
+    /* Compress */
+#define RND(a,b,c,d,e,f,g,h,i)                         \
+     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i];   \
+     t1 = Sigma0(a) + Maj(a, b, c);                    \
+     d += t0;                                          \
+     h  = t0 + t1;
+
+     for (i = 0; i < SHA256_K_SIZE; ++i) {
+         RND(S0,S1,S2,S3,S4,S5,S6,S7,i);
+         t = S7; S7 = S6; S6 = S5; S5 = S4;
+         S4 = S3; S3 = S2; S2 = S1; S1 = S0; S0 = t;
+     }
+#undef RND
+
+    /* feedback */
+    ctx->h[0] += S0;
+    ctx->h[1] += S1;
+    ctx->h[2] += S2;
+    ctx->h[3] += S3;
+    ctx->h[4] += S4;
+    ctx->h[5] += S5;
+    ctx->h[6] += S6;
+    ctx->h[7] += S7;
+}
+
+void SHA256_Init (SHA256_CTX *ctx)
+{
+    ctx->Nl = 0;
+    ctx->Nh = 0;
+    ctx->num = 0;
+    ctx->h[0] = 0x6A09E667UL;
+    ctx->h[1] = 0xBB67AE85UL;
+    ctx->h[2] = 0x3C6EF372UL;
+    ctx->h[3] = 0xA54FF53AUL;
+    ctx->h[4] = 0x510E527FUL;
+    ctx->h[5] = 0x9B05688CUL;
+    ctx->h[6] = 0x1F83D9ABUL;
+    ctx->h[7] = 0x5BE0CD19UL;
+}
+
+void SHA256_Update (SHA256_CTX *ctx, const void *src, unsigned int count)
+{
+    unsigned new_count = (ctx->Nl + (count << 3)) & 0xffffffff;
+    if (new_count < ctx->Nl) {
+        ctx->Nh += 1;
+    }
+    ctx->Nl = new_count;
+
+    while (count) {
+        unsigned int this_step = 64 - ctx->num;
+        if (this_step > count) this_step = count;
+        memcpy( ctx->data + ctx->num, src, this_step);
+
+        if (this_step + ctx->num < 64) {
+            ctx->num += this_step;
+            break;
+        }
+
+        src = (const unsigned char *)src + this_step;
+        count -= this_step;
+        ctx->num = 0;
+
+        sha256_compress( ctx, ctx->data );
+    }
+}
+
+/*
+ * Add padding and return the message digest.
+ */
+void SHA256_Final (unsigned char *digest, SHA256_CTX *ctx)
+{
+    unsigned int i;
+    unsigned char finalcount[SHA256_FINALCOUNT_SIZE];
+
+    put_bigendian( &finalcount[0], ctx->Nh, 4 );
+    put_bigendian( &finalcount[4], ctx->Nl, 4 );
+
+    SHA256_Update(ctx, "\200", 1);
+
+    if (ctx->num > 56) {
+        SHA256_Update(ctx, "\0\0\0\0\0\0\0\0", 8);
+    }
+    memset( ctx->data + ctx->num, 0, 56 - ctx->num );
+    ctx->num = 56;
+    SHA256_Update(ctx, finalcount, SHA256_FINALCOUNT_SIZE);  /* Should cause a sha256_compress() */
+
+    /*
+     * The final state is an array of unsigned long's; place them as a series
+     * of bigendian 4-byte words onto the output
+     */
+    for (i=0; i<8; i++) {
+        put_bigendian( digest + 4*i, ctx->h[i], 4 );
+    }
+}
+#endif
diff --git a/src/sig_stfl/lms/external/sha256.h b/src/sig_stfl/lms/external/sha256.h
new file mode 100644
index 0000000000..a5de21c014
--- /dev/null
+++ b/src/sig_stfl/lms/external/sha256.h
@@ -0,0 +1,43 @@
+#if !defined(SHA256_H_)
+#define SHA256_H_
+
+#if defined( EXT_SHA256_H )
+#include EXT_SHA256_H
+#else
+
+#define USE_OPENSSL 0   /* We use the OpenSSL implementation for SHA-256 */
+                        /* (which is quite a bit faster than our portable */
+                        /* C version) */
+
+#if USE_OPENSSL
+
+#include <openssl/sha.h>
+
+#else
+
+/* SHA256 context. */
+typedef struct {
+  unsigned long int h[8];            /* state; this is in the CPU native format */
+  unsigned long Nl, Nh;              /* number of bits processed so far */
+  unsigned num;                      /* number of bytes within the below */
+                                     /* buffer */
+  unsigned char data[64];            /* input buffer.  This is in byte vector format */
+} SHA256_CTX;
+
+void SHA256_Init(SHA256_CTX *);  /* context */
+
+void SHA256_Update(SHA256_CTX *, /* context */
+                  const void *, /* input block */
+                  unsigned int);/* length of input block */
+
+void SHA256_Final(unsigned char *,
+                 SHA256_CTX *);
+#endif
+
+#endif /* EXT_SHA256_H */
+
+#if !defined( SHA256_LEN )
+#define SHA256_LEN 32    /* The length of a SHA256 hash output */
+#endif
+
+#endif /* ifdef(SHA256_H_) */