diff --git a/linux/lib/xz/xz_dec_stream.c b/linux/lib/xz/xz_dec_stream.c
index 876a444..33927e8 100644
--- a/linux/lib/xz/xz_dec_stream.c
+++ b/linux/lib/xz/xz_dec_stream.c
@@ -15,6 +15,12 @@
 #	define IS_CRC64(check_type) false
 #endif
 
+#ifdef XZ_USE_SHA256
+#	define IS_SHA256(check_type) ((check_type) == XZ_CHECK_SHA256)
+#else
+#	define IS_SHA256(check_type) false
+#endif
+
 /* Hash used to validate the Index field */
 struct xz_dec_hash {
 	vli_type unpadded;
@@ -145,9 +151,23 @@ struct xz_dec {
 	struct xz_dec_bcj *bcj;
 	bool bcj_active;
 #endif
+
+#ifdef XZ_USE_SHA256
+	/*
+	 * SHA-256 value in Block
+	 *
+	 * struct xz_sha256 is over a hundred bytes and it's only accessed
+	 * from a few places. By putting the SHA-256 state near the end
+	 * of struct xz_dec (somewhere after the "index" member) reduces
+	 * code size at least on x86 and RISC-V. It's because the first bytes
+	 * of the struct can be accessed with smaller instructions; the
+	 * members that are accessed from many places should be at the top.
+	 */
+	struct xz_sha256 sha256;
+#endif
 };
 
-#ifdef XZ_DEC_ANY_CHECK
+#if defined(XZ_DEC_ANY_CHECK) || defined(XZ_USE_SHA256)
 /* Sizes of the Check field with different Check IDs */
 static const uint8_t check_sizes[16] = {
 	0,
@@ -260,6 +280,11 @@ static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
 		s->crc = xz_crc64(b->out + s->out_start,
 				b->out_pos - s->out_start, s->crc);
 #endif
+#ifdef XZ_USE_SHA256
+	else if (s->check_type == XZ_CHECK_SHA256)
+		xz_sha256_update(b->out + s->out_start,
+				b->out_pos - s->out_start, &s->sha256);
+#endif
 
 	if (ret == XZ_STREAM_END) {
 		if (s->block_header.compressed != VLI_UNKNOWN
@@ -275,7 +300,7 @@ static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
 		s->block.hash.unpadded += s->block_header.size
 				+ s->block.compressed;
 
-#ifdef XZ_DEC_ANY_CHECK
+#if defined(XZ_DEC_ANY_CHECK) || defined(XZ_USE_SHA256)
 		s->block.hash.unpadded += check_sizes[s->check_type];
 #else
 		if (s->check_type == XZ_CHECK_CRC32)
@@ -428,13 +453,14 @@ static enum xz_ret dec_stream_header(struct xz_dec *s)
 
 	s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
 
+	if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)
+			&& !IS_SHA256(s->check_type)) {
 #ifdef XZ_DEC_ANY_CHECK
-	if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
 		return XZ_UNSUPPORTED_CHECK;
 #else
-	if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
 		return XZ_OPTIONS_ERROR;
 #endif
+	}
 
 	return XZ_OK;
 }
@@ -639,6 +665,11 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 			if (ret != XZ_OK)
 				return ret;
 
+#ifdef XZ_USE_SHA256
+			if (s->check_type == XZ_CHECK_SHA256)
+				xz_sha256_reset(&s->sha256);
+#endif
+
 			s->sequence = SEQ_BLOCK_UNCOMPRESS;
 
 			fallthrough;
@@ -685,6 +716,19 @@ static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 				if (ret != XZ_STREAM_END)
 					return ret;
 			}
+#ifdef XZ_USE_SHA256
+			else if (s->check_type == XZ_CHECK_SHA256) {
+				s->temp.size = 32;
+				if (!fill_temp(s, b))
+					return XZ_OK;
+
+				if (!xz_sha256_validate(s->temp.buf,
+							&s->sha256))
+					return XZ_DATA_ERROR;
+
+				s->pos = 0;
+			}
+#endif
 #ifdef XZ_DEC_ANY_CHECK
 			else if (!check_skip(s, b)) {
 				return XZ_OK;
diff --git a/linux/lib/xz/xz_private.h b/linux/lib/xz/xz_private.h
index 949de97..7387401 100644
--- a/linux/lib/xz/xz_private.h
+++ b/linux/lib/xz/xz_private.h
@@ -111,6 +111,30 @@
 #	endif
 #endif
 
+struct xz_sha256 {
+	/* Buffered input data */
+	uint8_t data[64];
+
+	/* Internal state and the final hash value */
+	uint32_t state[8];
+
+	/* Size of the input data */
+	uint64_t size;
+};
+
+/* Reset the SHA-256 state to prepare for a new calculation. */
+XZ_EXTERN void xz_sha256_reset(struct xz_sha256 *s);
+
+/* Update the SHA-256 state with new data. */
+XZ_EXTERN void xz_sha256_update(const uint8_t *buf, size_t size,
+				struct xz_sha256 *s);
+
+/*
+ * Finish the SHA-256 calculation. Compare the result with the first 32 bytes
+ * from buf. Return true if the values are equal and false if they aren't.
+ */
+XZ_EXTERN bool xz_sha256_validate(const uint8_t *buf, struct xz_sha256 *s);
+
 /*
  * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
  * before calling xz_dec_lzma2_run().
diff --git a/linux/lib/xz/xz_sha256.c b/linux/lib/xz/xz_sha256.c
new file mode 100644
index 0000000..078cad2
--- /dev/null
+++ b/linux/lib/xz/xz_sha256.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: 0BSD
+
+/*
+ * SHA-256
+ *
+ * This is based on the XZ Utils version which is based public domain code
+ * from Crypto++ Library 5.5.1 released in 2007: https://www.cryptopp.com/
+ *
+ * Authors: Wei Dai
+ *          Lasse Collin <lasse.collin@tukaani.org>
+ */
+
+#include "xz_private.h"
+
+static inline uint32_t
+rotr_32(uint32_t num, unsigned amount)
+{
+	return (num >> amount) | (num << (32 - amount));
+}
+
+#define blk0(i) (W[i] = get_be32(&data[4 * i]))
+#define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \
+		+ s0(W[(i - 15) & 15]))
+
+#define Ch(x, y, z) (z ^ (x & (y ^ z)))
+#define Maj(x, y, z) ((x & (y ^ z)) + (y & z))
+
+#define a(i) T[(0 - i) & 7]
+#define b(i) T[(1 - i) & 7]
+#define c(i) T[(2 - i) & 7]
+#define d(i) T[(3 - i) & 7]
+#define e(i) T[(4 - i) & 7]
+#define f(i) T[(5 - i) & 7]
+#define g(i) T[(6 - i) & 7]
+#define h(i) T[(7 - i) & 7]
+
+#define R(i, j, blk) \
+	h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] + blk; \
+	d(i) += h(i); \
+	h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
+#define R0(i) R(i, 0, blk0(i))
+#define R2(i) R(i, j, blk2(i))
+
+#define S0(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 9), 11), 2)
+#define S1(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 14), 5), 6)
+#define s0(x) (rotr_32(x ^ rotr_32(x, 11), 7) ^ (x >> 3))
+#define s1(x) (rotr_32(x ^ rotr_32(x, 2), 17) ^ (x >> 10))
+
+static const uint32_t SHA256_K[64] = {
+	0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
+	0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
+	0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
+	0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
+	0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
+	0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
+	0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
+	0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
+	0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
+	0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
+	0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
+	0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
+	0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
+	0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
+	0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
+	0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
+};
+
+static void
+transform(uint32_t state[8], const uint8_t data[64])
+{
+	uint32_t W[16];
+	uint32_t T[8];
+	unsigned int j;
+
+	/* Copy state[] to working vars. */
+	memcpy(T, state, sizeof(T));
+
+	/* The first 16 operations unrolled */
+	R0( 0); R0( 1); R0( 2); R0( 3);
+	R0( 4); R0( 5); R0( 6); R0( 7);
+	R0( 8); R0( 9); R0(10); R0(11);
+	R0(12); R0(13); R0(14); R0(15);
+
+	/* The remaining 48 operations partially unrolled */
+	for (j = 16; j < 64; j += 16) {
+		R2( 0); R2( 1); R2( 2); R2( 3);
+		R2( 4); R2( 5); R2( 6); R2( 7);
+		R2( 8); R2( 9); R2(10); R2(11);
+		R2(12); R2(13); R2(14); R2(15);
+	}
+
+	/* Add the working vars back into state[]. */
+	state[0] += a(0);
+	state[1] += b(0);
+	state[2] += c(0);
+	state[3] += d(0);
+	state[4] += e(0);
+	state[5] += f(0);
+	state[6] += g(0);
+	state[7] += h(0);
+}
+
+XZ_EXTERN void xz_sha256_reset(struct xz_sha256 *s)
+{
+	static const uint32_t initial_state[8] = {
+		0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+		0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
+	};
+
+	memcpy(s->state, initial_state, sizeof(initial_state));
+	s->size = 0;
+}
+
+XZ_EXTERN void xz_sha256_update(const uint8_t *buf, size_t size,
+				struct xz_sha256 *s)
+{
+	size_t copy_start;
+	size_t copy_size;
+
+	/*
+	 * Copy the input data into a properly aligned temporary buffer.
+	 * This way we can be called with arbitrarily sized buffers
+	 * (no need to be a multiple of 64 bytes).
+	 *
+	 * Full 64-byte chunks could be processed directly from buf with
+	 * unaligned access. It seemed to make very little difference in
+	 * speed on x86-64 though. Thus it was omitted.
+	 */
+	while (size > 0) {
+		copy_start = s->size & 0x3F;
+		copy_size = 64 - copy_start;
+		if (copy_size > size)
+			copy_size = size;
+
+		memcpy(s->data + copy_start, buf, copy_size);
+
+		buf += copy_size;
+		size -= copy_size;
+		s->size += copy_size;
+
+		if ((s->size & 0x3F) == 0)
+			transform(s->state, s->data);
+	}
+}
+
+XZ_EXTERN bool xz_sha256_validate(const uint8_t *buf, struct xz_sha256 *s)
+{
+	/*
+	 * Add padding as described in RFC 3174 (it describes SHA-1 but
+	 * the same padding style is used for SHA-256 too).
+	 */
+	size_t i = s->size & 0x3F;
+	s->data[i++] = 0x80;
+
+	while (i != 64 - 8) {
+		if (i == 64) {
+			transform(s->state, s->data);
+			i = 0;
+		}
+
+		s->data[i++] = 0x00;
+	}
+
+	/* Convert the message size from bytes to bits. */
+	s->size *= 8;
+
+	/*
+	 * Store the message size in big endian byte order and
+	 * calculate the final hash value.
+	 */
+	for (i = 0; i < 8; ++i)
+		s->data[64 - 8 + i] = (uint8_t)(s->size >> ((7 - i) * 8));
+
+	transform(s->state, s->data);
+
+	/* Compare if the hash value matches the first 32 bytes in buf. */
+	for (i = 0; i < 8; ++i)
+		if (get_unaligned_be32(buf + 4 * i) != s->state[i])
+			return false;
+
+	return true;
+}
diff --git a/userspace/Makefile b/userspace/Makefile
index 37a62ba..ca24017 100644
--- a/userspace/Makefile
+++ b/userspace/Makefile
@@ -11,11 +11,13 @@
 CC = gcc -std=gnu11
 BCJ_CPPFLAGS = -DXZ_DEC_X86 -DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_ARM64 \
 		-DXZ_DEC_RISCV -DXZ_DEC_POWERPC -DXZ_DEC_IA64 -DXZ_DEC_SPARC
-CPPFLAGS = -DXZ_USE_CRC64 -DXZ_DEC_ANY_CHECK -DXZ_DEC_CONCATENATED
+CPPFLAGS = -DXZ_USE_CRC64 -DXZ_USE_SHA256 -DXZ_DEC_ANY_CHECK \
+		-DXZ_DEC_CONCATENATED
 CFLAGS = -ggdb3 -O2 -pedantic -Wall -Wextra -Wdeclaration-after-statement
 RM = rm -f
 VPATH = ../linux/include/linux ../linux/lib/xz
-COMMON_SRCS = xz_crc32.c xz_crc64.c xz_dec_stream.c xz_dec_lzma2.c xz_dec_bcj.c
+COMMON_SRCS = xz_crc32.c xz_crc64.c xz_sha256.c xz_dec_stream.c \
+		xz_dec_lzma2.c xz_dec_bcj.c
 COMMON_OBJS = $(COMMON_SRCS:.c=.o)
 XZMINIDEC_OBJS = xzminidec.o
 BYTETEST_OBJS = bytetest.o
diff --git a/userspace/boottest.c b/userspace/boottest.c
index 068f81c..5816dbf 100644
--- a/userspace/boottest.c
+++ b/userspace/boottest.c
@@ -24,8 +24,12 @@ static void error(/*const*/ char *msg)
  */
 #undef XZ_DEC_ANY_CHECK
 
-/* Disable the CRC64 support even if it was enabled in the Makefile. */
+/*
+ * Disable the CRC64 and SHA-256 support even if they were enabled
+ * in the Makefile.
+ */
 #undef XZ_USE_CRC64
+#undef XZ_USE_SHA256
 
 #include "../linux/lib/decompress_unxz.c"
 
diff --git a/userspace/xz_config.h b/userspace/xz_config.h
index a455cae..d7d4031 100644
--- a/userspace/xz_config.h
+++ b/userspace/xz_config.h
@@ -124,12 +124,15 @@ static inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
 #endif
 
 /*
- * Use get_unaligned_le32() also for aligned access for simplicity. On
- * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
- * could save a few bytes in code size.
+ * To keep things simpler, use the generic unaligned methods also for
+ * aligned access. The only place where performance could matter is
+ * SHA-256 but files using SHA-256 aren't common.
  */
 #ifndef get_le32
 #	define get_le32 get_unaligned_le32
 #endif
+#ifndef get_be32
+#	define get_be32 get_unaligned_be32
+#endif
 
 #endif