diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 7ab7cb96dcd904..f1518fd3be9973 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -30,26 +30,25 @@ ENTRY(__asm_copy_from_user)
 	 * t0 - end of uncopied dst
 	 */
 	add	t0, a0, a2
-	bgtu	a0, t0, 5f
 
 	/*
 	 * Use byte copy only if too small.
+	 * SZREG holds 4 for RV32 and 8 for RV64
+	 * a3 - 2*SZREG is minimum size for word_copy
+	 *      SZREG for aligning dst + SZREG for word_copy
 	 */
-	li	a3, 8*SZREG /* size must be larger than size in word_copy */
-	neg	t1, a0
-	andi	t1, t1, SZREG-1
-	add	a3, a3, t1
+	li	a3, 2*SZREG
 	bltu	a2, a3, .Lbyte_copy_tail
 
 	/*
-	 * Copy first bytes until dst is align to word boundary.
+	 * Copy first bytes until dst is aligned to word boundary.
 	 * a0 - start of dst
 	 * t1 - start of aligned dst
 	 */
 	addi	t1, a0, SZREG-1
 	andi	t1, t1, ~(SZREG-1)
 	/* dst is already aligned, skip */
-	beq	a0, t1, .Lskip_first_bytes
+	beq	a0, t1, .Lskip_align_dst
 1:
 	/* a5 - one byte for copying data */
 	fixup lb      a5, 0(a1), 10f
@@ -58,7 +57,7 @@ ENTRY(__asm_copy_from_user)
 	addi	a0, a0, 1	/* dst */
 	bltu	a0, t1, 1b	/* t1 - start of aligned dst */
 
-.Lskip_first_bytes:
+.Lskip_align_dst:
 	/*
 	 * Now dst is aligned.
 	 * Use shift-copy if src is misaligned.
@@ -69,16 +68,46 @@ ENTRY(__asm_copy_from_user)
 	andi	a3, a1, SZREG-1
 	bnez	a3, .Lshift_copy
 
+.Lcheck_size_bulk:
+	/*
+	 * Evaluate the size to choose word_copy or word_copy_unlrolled
+	 * The word_copy_unlrolled requires larger than 8*SZREG
+	 */
+	li	a3, 8*SZREG
+	add	a4, a0, a3
+	bltu	a4, t0, .Lword_copy_unlrolled
+
 .Lword_copy:
-        /*
-	 * Both src and dst are aligned, unrolled word copy
+	/*
+	 * Both src and dst are aligned
+	 * word copy with every SZREG iteration
 	 *
 	 * a0 - start of aligned dst
 	 * a1 - start of aligned src
-	 * a3 - a1 & mask:(SZREG-1)
 	 * t0 - end of aligned dst
 	 */
-	addi	t0, t0, -(8*SZREG-1) /* not to over run */
+	bgeu	a0, t0, .Lbyte_copy_tail /* check if end of copy */
+	addi	t0, t0, -(SZREG) /* not to over run */
+1:
+	REG_L	a5, 0(a1)
+	addi	a1, a1, SZREG
+	REG_S	a5, 0(a0)
+	addi	a0, a0, SZREG
+	bltu	a0, t0, 1b
+
+	addi	t0, t0, SZREG /* revert to original value */
+	j	.Lbyte_copy_tail
+
+.Lword_copy_unlrolled:
+	/*
+	 * Both src and dst are aligned
+	 * unrolled word copy with every 8*SZREG iteration
+	 *
+	 * a0 - start of aligned dst
+	 * a1 - start of aligned src
+	 * t0 - end of aligned dst
+	 */
+	addi	t0, t0, -(8*SZREG) /* not to over run */
 2:
 	fixup REG_L   a4,        0(a1), 10f
 	fixup REG_L   a5,    SZREG(a1), 10f
@@ -100,8 +129,13 @@ ENTRY(__asm_copy_from_user)
 	addi	a1, a1, 8*SZREG
 	bltu	a0, t0, 2b
 
-	addi	t0, t0, 8*SZREG-1 /* revert to original value */
-	j	.Lbyte_copy_tail
+	addi	t0, t0, 8*SZREG /* revert to original value */
+
+	/*
+	 * Remaining might large enough for word_copy to reduce slow byte
+	 * copy
+	 */
+	j	.Lcheck_size_bulk
 
 .Lshift_copy:
 
@@ -110,7 +144,7 @@ ENTRY(__asm_copy_from_user)
 	 * For misaligned copy we still perform aligned word copy, but
 	 * we need to use the value fetched from the previous iteration and
 	 * do some shifts.
-	 * This is safe because reading less than a word size.
+	 * This is safe because reading is less than a word size.
 	 *
 	 * a0 - start of aligned dst
 	 * a1 - start of src
@@ -132,7 +166,7 @@ ENTRY(__asm_copy_from_user)
 	li	a5, SZREG*8
 	sub	t4, a5, t3
 
-	/* Load the first word to combine with seceond word */
+	/* Load the first word to combine with second word */
 	fixup REG_L   a5, 0(a1), 10f
 
 3:
@@ -164,7 +198,7 @@ ENTRY(__asm_copy_from_user)
 	 * a1 - start of remaining src
 	 * t0 - end of remaining dst
 	 */
-	bgeu	a0, t0, 5f
+	bgeu	a0, t0, .Lend_copy_user  /* check if end of copy */
 4:
 	fixup lb      a5, 0(a1), 10f
 	addi	a1, a1, 1	/* src */
@@ -172,7 +206,7 @@ ENTRY(__asm_copy_from_user)
 	addi	a0, a0, 1	/* dst */
 	bltu	a0, t0, 4b	/* t0 - end of dst */
 
-5:
+.Lend_copy_user:
 	/* Disable access to user memory */
 	csrc CSR_STATUS, t6
 	li	a0, 0