Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rv64: fully support vector extention version 1.0 #194

Merged
merged 3 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/cpu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ typedef struct Decode {
#ifdef CONFIG_RVV
// for vector
int v_width;
int v_nf;
int v_lsumop;
uint32_t vm;
uint32_t src_vmode;
rtlreg_t tmp_reg[4];
Expand Down
54 changes: 53 additions & 1 deletion include/rtl/fp.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,17 @@
#define __RTL_FP_H__

enum {
FPCALL_W16,
FPCALL_W32,
FPCALL_W64,
FPCALL_W128,
FPCALL_W80,
FPCALL_W16_to_32,
FPCALL_W32_to_64,
FPCALL_SRC1_W16_to_32,
FPCALL_SRC2_W16_to_32,
FPCALL_SRC1_W32_to_64,
FPCALL_SRC2_W32_to_64,
};

enum {
Expand All @@ -44,8 +51,20 @@ enum {
FPCALL_SUB,
FPCALL_MUL,
FPCALL_DIV,

FPCALL_SQRT,
FPCALL_RSQRT7,
FPCALL_REC7,
FPCALL_CLASS,

FPCALL_MADD,
FPCALL_NMADD,
FPCALL_MSUB,
FPCALL_NMSUB,
FPCALL_MACC,
FPCALL_NMACC,
FPCALL_MSAC,
FPCALL_NMSAC,

FPCALL_I32ToF,
FPCALL_U32ToF,
Expand All @@ -62,15 +81,48 @@ enum {

FPCALL_NEED_RM, // separator

FPCALL_UADD,

FPCALL_MAX,
FPCALL_MIN,
FPCALL_LE,
FPCALL_EQ,
FPCALL_LT,
FPCALL_GE,
FPCALL_NE,
FPCALL_GT,

FPCALL_SGNJ,
FPCALL_SGNJN,
FPCALL_SGNJX,

FPCALL_FToU,
FPCALL_FToS,
FPCALL_FToUT,
FPCALL_FToST,
FPCALL_UToF,
FPCALL_SToF,

FPCALL_FToDU,
FPCALL_FToDS,
FPCALL_FToDUT,
FPCALL_FToDST,
FPCALL_UToDF,
FPCALL_SToDF,
FPCALL_FToDF,

FPCALL_DFToU,
FPCALL_DFToS,
FPCALL_DFToUT,
FPCALL_DFToST,
FPCALL_DUToF,
FPCALL_DSToF,
FPCALL_DFToF,
FPCALL_DFToFR
};

#define FPCALL_CMD(op, w) (((op) << 16) | (w))
#define FPCALL_OP(cmd) ((cmd) >> 16)
#define FPCALL_W(cmd) ((cmd) & 0x3)
#define FPCALL_W(cmd) ((cmd) & 0xf)

#endif
1 change: 1 addition & 0 deletions include/rtl/rtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum {
HOSTCALL_INV, // invalid opcode
HOSTCALL_PIO, // port I/O
HOSTCALL_FP, // floating point operation
HOSTCALL_VFP, // vector floating point operation
HOSTCALL_CSR, // system registers / control status registers
HOSTCALL_TRAP, // trap by interrupts/exceptions
HOSTCALL_PRIV, // privilege instructions
Expand Down
263 changes: 263 additions & 0 deletions src/engine/interpreter/fp.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ static inline rtlreg_t unbox(rtlreg_t r) {
? (r & ~BOX_MASK) : defaultNaNF32UI;
}

static inline float16_t rtlToF16(rtlreg_t r) {
float16_t f = { .v = r };
return f;
}

static inline float32_t rtlToVF32(rtlreg_t r) {
float32_t f = { .v = r };
return f;
}

static inline float32_t rtlToF32(rtlreg_t r) {
float32_t f = { .v = (uint32_t)unbox(r) };
return f;
Expand All @@ -38,6 +48,7 @@ static inline float64_t rtlToF64(rtlreg_t r) {
uint32_t isa_fp_get_rm(Decode *s);
void isa_fp_set_ex(uint32_t ex);
void isa_fp_csr_check();
uint32_t isa_fp_get_frm();
#endif // CONFIG_FPU_NONE

def_rtl(fpcall, rtlreg_t *dest, const rtlreg_t *src1, const rtlreg_t *src2, uint32_t cmd) {
Expand Down Expand Up @@ -127,6 +138,258 @@ def_rtl(fpcall, rtlreg_t *dest, const rtlreg_t *src1, const rtlreg_t *src2, uint
#endif // CONFIG_FPU_NONE
}

def_rtl(vfpcall, rtlreg_t *dest, const rtlreg_t *src1, const rtlreg_t *src2, uint32_t cmd) {
#ifndef CONFIG_FPU_NONE
uint32_t w = FPCALL_W(cmd);
uint32_t op = FPCALL_OP(cmd);
isa_fp_csr_check();

softfloat_roundingMode = isa_fp_get_frm();

if (w == FPCALL_W16) {
float16_t fsrc1 = rtlToF16(*src1);
float16_t fsrc2 = rtlToF16(*src2);
switch (op) {
case FPCALL_ADD: *dest = f16_add(fsrc1, fsrc2).v; break;
case FPCALL_SUB: *dest = f16_sub(fsrc1, fsrc2).v; break;
case FPCALL_MUL: *dest = f16_mul(fsrc1, fsrc2).v; break;
case FPCALL_DIV: *dest = f16_div(fsrc1, fsrc2).v; break;
case FPCALL_MIN: *dest = f16_min(fsrc1, fsrc2).v; break;
case FPCALL_MAX: *dest = f16_max(fsrc1, fsrc2).v; break;

case FPCALL_UADD:
*dest = f16_add(fsrc1, fsrc2).v;
uint64_t classify = f16_classify(rtlToF16(*dest));
if (classify & 0x300) *dest = defaultNaNF16UI;
break;

case FPCALL_SQRT: *dest = f16_sqrt(fsrc1).v; break;
case FPCALL_RSQRT7: *dest = f16_rsqrte7(fsrc1).v; break;
case FPCALL_REC7: *dest = f16_recip7(fsrc1).v; break;
case FPCALL_CLASS: *dest = f16_classify(fsrc1); break;

case FPCALL_MADD: *dest = f16_mulAdd(rtlToF16(*dest), fsrc1, fsrc2).v; break;
case FPCALL_NMADD: *dest = f16_mulAdd(f16_neg(rtlToF16(*dest)), fsrc1, f16_neg(fsrc2)).v; break;
case FPCALL_MSUB: *dest = f16_mulAdd(rtlToF16(*dest), fsrc1, f16_neg(fsrc2)).v; break;
case FPCALL_NMSUB: *dest = f16_mulAdd(f16_neg(rtlToF16(*dest)), fsrc1, fsrc2).v; break;
case FPCALL_MACC: *dest = f16_mulAdd(fsrc1, fsrc2, rtlToF16(*dest)).v; break;
case FPCALL_NMACC: *dest = f16_mulAdd(f16_neg(fsrc2), fsrc1, f16_neg(rtlToF16(*dest))).v; break;
case FPCALL_MSAC: *dest = f16_mulAdd(fsrc1, fsrc2, f16_neg(rtlToF16(*dest))).v; break;
case FPCALL_NMSAC: *dest = f16_mulAdd(f16_neg(fsrc1), fsrc2, rtlToF16(*dest)).v; break;

case FPCALL_LE: *dest = f16_le(fsrc1, fsrc2); break;
case FPCALL_LT: *dest = f16_lt(fsrc1, fsrc2); break;
case FPCALL_EQ: *dest = f16_eq(fsrc1, fsrc2); break;
case FPCALL_GE: *dest = f16_le(fsrc2, fsrc1); break;
case FPCALL_GT: *dest = f16_lt(fsrc2, fsrc1); break;
case FPCALL_NE: *dest = !f16_eq(fsrc1, fsrc2); break;

case FPCALL_SGNJ: *dest = fsgnj16(fsrc1, fsrc2, false, false); break;
case FPCALL_SGNJN: *dest = fsgnj16(fsrc1, fsrc2, true, false); break;
case FPCALL_SGNJX: *dest = fsgnj16(fsrc1, fsrc2, false, true); break;

case FPCALL_FToU: *dest = f16_to_ui16(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToS: *dest = f16_to_i16(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToUT: *dest = f16_to_ui16(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_FToST: *dest = f16_to_i16(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_UToF: *dest = ui32_to_f16(fsrc1.v).v; break;
case FPCALL_SToF: *dest = i32_to_f16(fsrc1.v).v; break;

case FPCALL_FToDU: *dest = f16_to_ui32(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToDS: *dest = f16_to_i32(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToDUT: *dest = f16_to_ui32(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_FToDST: *dest = f16_to_i32(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_UToDF: *dest = ui32_to_f32(fsrc1.v).v; break;
case FPCALL_SToDF:
if ((fsrc1.v & ~0xffffULL) == 0) *dest = i32_to_f32((int32_t)(int16_t)fsrc1.v).v;
else if ((fsrc1.v & ~0xffULL) == 0) *dest = i32_to_f32((int32_t)(int8_t)fsrc1.v).v;
else *dest = i32_to_f32(fsrc1.v).v;
break;
case FPCALL_FToDF: *dest = f16_to_f32(fsrc1).v; break;

case FPCALL_DFToU: *dest = f16_to_ui8(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_DFToS: *dest = f16_to_i8(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_DFToUT: *dest = f16_to_ui8(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_DFToST: *dest = f16_to_i8(fsrc1, softfloat_round_minMag, true); break;

default: panic("op = %d not supported", op);
}
} else if (w == FPCALL_W32 || w == FPCALL_W16_to_32 || w == FPCALL_SRC1_W16_to_32 || w == FPCALL_SRC2_W16_to_32) {
float32_t fsrc1;
float32_t fsrc2;
if (w == FPCALL_W32) {
fsrc1 = rtlToVF32(*src1);
fsrc2 = rtlToVF32(*src2);
} else if (w == FPCALL_SRC1_W16_to_32) {
fsrc1 = f16_to_f32(rtlToF16(*src1));
fsrc2 = rtlToVF32(*src2);
} else if (w == FPCALL_SRC2_W16_to_32) {
fsrc1 = rtlToVF32(*src1);
fsrc2 = f16_to_f32(rtlToF16(*src2));
} else {
fsrc1 = f16_to_f32(rtlToF16(*src1));
fsrc2 = f16_to_f32(rtlToF16(*src2));
}

switch (op) {
case FPCALL_ADD: *dest = f32_add(fsrc1, fsrc2).v; break;
case FPCALL_SUB: *dest = f32_sub(fsrc1, fsrc2).v; break;
case FPCALL_MUL: *dest = f32_mul(fsrc1, fsrc2).v; break;
case FPCALL_DIV: *dest = f32_div(fsrc1, fsrc2).v; break;
case FPCALL_MIN: *dest = f32_min(fsrc1, fsrc2).v; break;
case FPCALL_MAX: *dest = f32_max(fsrc1, fsrc2).v; break;

case FPCALL_UADD:
*dest = f32_add(fsrc1, fsrc2).v;
uint64_t classify = f32_classify(rtlToF32(*dest));
if (classify & 0x300) *dest = defaultNaNF32UI;
break;

case FPCALL_SQRT: *dest = f32_sqrt(fsrc1).v; break;
case FPCALL_RSQRT7: *dest = f32_rsqrte7(fsrc1).v; break;
case FPCALL_REC7: *dest = f32_recip7(fsrc1).v; break;
case FPCALL_CLASS: *dest = f32_classify(fsrc1); break;

case FPCALL_MADD: *dest = f32_mulAdd(rtlToVF32(*dest), fsrc1, fsrc2).v; break;
case FPCALL_NMADD: *dest = f32_mulAdd(f32_neg(rtlToVF32(*dest)), fsrc1, f32_neg(fsrc2)).v; break;
case FPCALL_MSUB: *dest = f32_mulAdd(rtlToVF32(*dest), fsrc1, f32_neg(fsrc2)).v; break;
case FPCALL_NMSUB: *dest = f32_mulAdd(f32_neg(rtlToVF32(*dest)), fsrc1, fsrc2).v; break;
case FPCALL_MACC: *dest = f32_mulAdd(fsrc1, fsrc2, rtlToVF32(*dest)).v; break;
case FPCALL_NMACC: *dest = f32_mulAdd(f32_neg(fsrc2), fsrc1, f32_neg(rtlToVF32(*dest))).v; break;
case FPCALL_MSAC: *dest = f32_mulAdd(fsrc1, fsrc2, f32_neg(rtlToVF32(*dest))).v; break;
case FPCALL_NMSAC: *dest = f32_mulAdd(f32_neg(fsrc1), fsrc2, rtlToVF32(*dest)).v; break;

case FPCALL_LE: *dest = f32_le(fsrc1, fsrc2); break;
case FPCALL_LT: *dest = f32_lt(fsrc1, fsrc2); break;
case FPCALL_EQ: *dest = f32_eq(fsrc1, fsrc2); break;
case FPCALL_GE: *dest = f32_le(fsrc2, fsrc1); break;
case FPCALL_GT: *dest = f32_lt(fsrc2, fsrc1); break;
case FPCALL_NE: *dest = !f32_eq(fsrc1, fsrc2); break;

case FPCALL_SGNJ: *dest = fsgnj32(fsrc1, fsrc2, false, false); break;
case FPCALL_SGNJN: *dest = fsgnj32(fsrc1, fsrc2, true, false); break;
case FPCALL_SGNJX: *dest = fsgnj32(fsrc1, fsrc2, false, true); break;

case FPCALL_FToU: *dest = f32_to_ui32(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToS: *dest = f32_to_i32(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToUT: *dest = f32_to_ui32(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_FToST: *dest = f32_to_i32(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_UToF: *dest = ui32_to_f32(fsrc1.v).v; break;
case FPCALL_SToF: *dest = i32_to_f32(fsrc1.v).v; break;

case FPCALL_FToDU: *dest = f32_to_ui64(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToDS: *dest = f32_to_i64(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToDUT: *dest = f32_to_ui64(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_FToDST: *dest = f32_to_i64(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_UToDF: *dest = ui32_to_f64(fsrc1.v).v; break;
case FPCALL_SToDF:
if ((fsrc1.v & ~0xffffULL ) == 0) *dest = i32_to_f64((int32_t)(int16_t)fsrc1.v).v;
else if ((fsrc1.v & ~0xffULL ) == 0) *dest = i32_to_f64((int32_t)(int8_t)fsrc1.v).v;
else *dest = i32_to_f64(fsrc1.v).v;
break;
case FPCALL_FToDF: *dest = f32_to_f64(fsrc1).v; break;

case FPCALL_DFToU: *dest = f32_to_ui16(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_DFToS: *dest = f32_to_i16(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_DFToUT: *dest = f32_to_ui16(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_DFToST: *dest = f32_to_i16(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_DUToF: *dest = ui32_to_f16(fsrc1.v).v; break;
case FPCALL_DSToF: *dest = i32_to_f16(fsrc1.v).v; break;
case FPCALL_DFToF: *dest = f32_to_f16(fsrc1).v; break;
case FPCALL_DFToFR:
softfloat_roundingMode = softfloat_round_odd;
*dest = f32_to_f16(fsrc1).v;
break;

default: panic("op = %d not supported", op);
}
} else if (w == FPCALL_W64 || w == FPCALL_W32_to_64 || w == FPCALL_SRC1_W32_to_64 || w == FPCALL_SRC2_W32_to_64) {
float64_t fsrc1;
float64_t fsrc2;
if (w == FPCALL_W64) {
fsrc1 = rtlToF64(*src1);
fsrc2 = rtlToF64(*src2);
} else if (w == FPCALL_SRC1_W32_to_64) {
fsrc1 = f32_to_f64(rtlToVF32(*src1));
fsrc2 = rtlToF64(*src2);
} else if (w == FPCALL_SRC2_W32_to_64) {
fsrc1 = rtlToF64(*src1);
fsrc2 = f32_to_f64(rtlToVF32(*src2));
} else {
fsrc1 = f32_to_f64(rtlToVF32(*src1));
fsrc2 = f32_to_f64(rtlToVF32(*src2));
}

switch (op) {
case FPCALL_ADD: *dest = f64_add(fsrc1, fsrc2).v; break;
case FPCALL_SUB: *dest = f64_sub(fsrc1, fsrc2).v; break;
case FPCALL_MUL: *dest = f64_mul(fsrc1, fsrc2).v; break;
case FPCALL_DIV: *dest = f64_div(fsrc1, fsrc2).v; break;
case FPCALL_MAX: *dest = f64_max(fsrc1, fsrc2).v; break;
case FPCALL_MIN: *dest = f64_min(fsrc1, fsrc2).v; break;

case FPCALL_UADD:
*dest = f64_add(fsrc1, fsrc2).v;
uint64_t classify = f64_classify(rtlToF64(*dest));
if (classify & 0x300) *dest = defaultNaNF64UI;
break;

case FPCALL_SQRT: *dest = f64_sqrt(fsrc1).v; break;
case FPCALL_RSQRT7: *dest = f64_rsqrte7(fsrc1).v; break;
case FPCALL_REC7: *dest = f64_recip7(fsrc1).v; break;
case FPCALL_CLASS: *dest = f64_classify(fsrc1); break;

case FPCALL_MADD: *dest = f64_mulAdd(rtlToF64(*dest), fsrc1, fsrc2).v; break;
case FPCALL_NMADD: *dest = f64_mulAdd(f64_neg(rtlToF64(*dest)), fsrc1, f64_neg(fsrc2)).v; break;
case FPCALL_MSUB: *dest = f64_mulAdd(rtlToF64(*dest), fsrc1, f64_neg(fsrc2)).v; break;
case FPCALL_NMSUB: *dest = f64_mulAdd(f64_neg(rtlToF64(*dest)), fsrc1, fsrc2).v; break;
case FPCALL_MACC: *dest = f64_mulAdd(fsrc1, fsrc2, rtlToF64(*dest)).v; break;
case FPCALL_NMACC: *dest = f64_mulAdd(f64_neg(fsrc2), fsrc1, f64_neg(rtlToF64(*dest))).v; break;
case FPCALL_MSAC: *dest = f64_mulAdd(fsrc1, fsrc2, f64_neg(rtlToF64(*dest))).v; break;
case FPCALL_NMSAC: *dest = f64_mulAdd(f64_neg(fsrc1), fsrc2, rtlToF64(*dest)).v; break;

case FPCALL_LE: *dest = f64_le(fsrc1, fsrc2); break;
case FPCALL_LT: *dest = f64_lt(fsrc1, fsrc2); break;
case FPCALL_EQ: *dest = f64_eq(fsrc1, fsrc2); break;
case FPCALL_GE: *dest = f64_le(fsrc2, fsrc1); break;
case FPCALL_GT: *dest = f64_lt(fsrc2, fsrc1); break;
case FPCALL_NE: *dest = !f64_eq(fsrc1, fsrc2); break;

case FPCALL_SGNJ: *dest = fsgnj64(fsrc1, fsrc2, false, false); break;
case FPCALL_SGNJN: *dest = fsgnj64(fsrc1, fsrc2, true, false); break;
case FPCALL_SGNJX: *dest = fsgnj64(fsrc1, fsrc2, false, true); break;

case FPCALL_FToU: *dest = f64_to_ui64(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToS: *dest = f64_to_i64(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToUT: *dest = f64_to_ui64(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_FToST: *dest = f64_to_i64(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_UToF: *dest = ui64_to_f64(fsrc1.v).v; break;
case FPCALL_SToF: *dest = i64_to_f64(fsrc1.v).v; break;

case FPCALL_DFToU: *dest = f64_to_ui32(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_DFToS: *dest = f64_to_i32(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_DFToUT: *dest = f64_to_ui32(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_DFToST: *dest = f64_to_i32(fsrc1, softfloat_round_minMag, true); break;
case FPCALL_DUToF: *dest = ui64_to_f32(fsrc1.v).v; break;
case FPCALL_DSToF: *dest = i64_to_f32(fsrc1.v).v; break;
case FPCALL_DFToF: *dest = f64_to_f32(fsrc1).v; break;
case FPCALL_DFToFR:
softfloat_roundingMode = softfloat_round_max;
*dest = f64_to_f32(fsrc1).v;
break;

default: panic("op = %d not supported", op);
}
}

uint32_t ex = fp_get_exception();
if (ex) {
isa_fp_set_ex(ex);
fp_clear_exception();
}
#endif // CONFIG_FPU_NONE
}

def_rtl(fclass, rtlreg_t *fdest, rtlreg_t *src, int width) {
#ifndef CONFIG_FPU_NONE
if (width == FPCALL_W32) {
Expand Down
Loading
Loading