Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial loongarch port #166

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ elseif (ARCH_ARM32 OR ARCH_AARCH64)
include (${CMAKE_MODULE_PATH}/cflags-arm.cmake)
elseif (ARCH_PPC64EL)
include (${CMAKE_MODULE_PATH}/cflags-ppc64le.cmake)
elseif (ARCH_LOONGARCH64)
include (${CMAKE_MODULE_PATH}/cflags-loongarch64.cmake)
else ()
message(FATAL_ERROR "Unsupported platform")
endif ()
Expand Down Expand Up @@ -264,6 +266,11 @@ elseif (ARCH_PPC64EL)
set (hs_exec_common_SRCS
${hs_exec_common_SRCS}
src/util/arch/ppc64el/cpuid_flags.c)
elseif (ARCH_LOONGARCH64)
set (hs_exec_common_SRCS
${hs_exec_common_SRCS}
src/util/arch/loongarch64/cpuid_flags.c
)
endif ()

set (hs_exec_SRCS
Expand Down Expand Up @@ -427,6 +434,11 @@ set (hs_exec_SRCS
${hs_exec_SRCS}
src/nfa/vermicelli_simd.cpp
src/util/supervector/arch/ppc64el/impl.cpp)
elseif (ARCH_LOONGARCH64)
set (hs_exec_SRCS
${hs_exec_SRCS}
src/nfa/vermicelli_simd.cpp
src/util/supervector/arch/loongarch64/impl.cpp)
endif()

if (ARCH_IA32 OR ARCH_X86_64)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# About Vectorscan

A fork of Intel's Hyperscan, modified to run on more platforms. Currently ARM NEON/ASIMD
and Power VSX are 100% functional. ARM SVE2 support is in ongoing with
A fork of Intel's Hyperscan, modified to run on more platforms. Currently ARM NEON/ASIMD,
Power VSX and LoongArch LSX are 100% functional. ARM SVE2 support is in ongoing with
access to hardware now. More platforms will follow in the future.
Further more, starting 5.4.12 there is now a [SIMDe](https://github.com/simd-everywhere/simde)
port, which can be either used for platforms without official SIMD support,
Expand Down
3 changes: 3 additions & 0 deletions cmake/archdetect.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ else()
elseif(ARCH_PPC64EL)
set(GNUCC_ARCH power8)
set(TUNE_FLAG power8)
elseif(ARCH_LOONGARCH64)
set(GNUCC_ARCH la464)
set(TUNE_FLAG generic)
else()
set(GNUCC_ARCH native)
set(TUNE_FLAG native)
Expand Down
21 changes: 21 additions & 0 deletions cmake/cflags-loongarch64.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CHECK_INCLUDE_FILE_CXX(lsxintrin.h HAVE_C_LOONGARCH64_LSXINTRIN_H)

if (HAVE_C_LOONGARCH64_LSXINTRIN_H)
set (INTRIN_INC_H "lsxintrin.h")
else()
message (FATAL_ERROR "No intrinsics header found for LSX")
endif ()

set(ARCH_C_FLAGS "-mlsx")
set(ARCH_CXX_FLAGS "-mlsx")

set(CMAKE_REQUIRED_FLAGS "${ARCH_C_FLAGS}")
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
int main() {
__m128i a = __lsx_vreplgr2vr_w(1);
(void)a;
}" HAVE_LSX)

if (NOT HAVE_LSX)
message(FATAL_ERROR "LSX support required for LoongArch support")
endif ()
6 changes: 6 additions & 0 deletions cmake/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
/* "Define if building for PPC64EL" */
#cmakedefine ARCH_PPC64EL

/* "Define if building for LOONGARCH64" */
#cmakedefine ARCH_LOONGARCH64

/* "Define if cross compiling for AARCH64" */
#cmakedefine CROSS_COMPILE_AARCH64

Expand Down Expand Up @@ -81,6 +84,9 @@
/* C compiler has arm_neon.h */
#cmakedefine HAVE_C_PPC64EL_ALTIVEC_H

/* C compiler has lsxintrin.h */
#cmakedefine HAVE_C_LOONGARCH64_LSXINTRIN_H

/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
Expand Down
3 changes: 2 additions & 1 deletion cmake/platform.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error no
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_A64)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_ARM)\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32)
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !(defined(__LITTLE_ENDIAN__) && defined(__VSX__))\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL)
CHECK_C_SOURCE_COMPILES("#if !(defined(__loongarch_lp64) || defined( __loongarch64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_LOONGARCH64)
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL OR ARCH_LOONGARCH64)
set(ARCH_64_BIT TRUE)
else()
set(ARCH_32_BIT TRUE)
Expand Down
1 change: 1 addition & 0 deletions src/hs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/arch/x86/cpuid_inline.h"
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
#elif defined(ARCH_LOONGARCH64)
#endif
#include "util/depth.h"
#include "util/popcount.h"
Expand Down
2 changes: 2 additions & 0 deletions src/hs_valid_platform.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,7 @@ hs_error_t HS_CDECL hs_valid_platform(void) {
}
#elif defined(ARCH_PPC64EL) || defined(VS_SIMDE_BACKEND)
return HS_SUCCESS;
#elif defined(ARCH_LOONGARCH64)
return HS_SUCCESS;
#endif
}
75 changes: 75 additions & 0 deletions src/nfa/loongarch64/shufti.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2023, Loongson Technology
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

/** \file
* \brief Shufti: character class acceleration.
*/

template <uint16_t S>
static really_inline
const SuperVector<S> blockSingleMask(SuperVector<S> mask_lo, SuperVector<S> mask_hi, SuperVector<S> chars) {
const SuperVector<S> low4bits = SuperVector<S>::dup_u8(0xf);

SuperVector<S> c_lo = chars & low4bits;
SuperVector<S> c_hi = chars.template vshr_8_imm<4>();
c_lo = mask_lo.template pshufb<false>(c_lo);
c_hi = mask_hi.template pshufb<false>(c_hi);

return (c_lo & c_hi) > (SuperVector<S>::Zeroes());
}

template <uint16_t S>
static really_inline
SuperVector<S> blockDoubleMask(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi, SuperVector<S> mask2_lo, SuperVector<S> mask2_hi, SuperVector<S> chars) {

const SuperVector<S> low4bits = SuperVector<S>::dup_u8(0xf);
SuperVector<S> chars_lo = chars & low4bits;
chars_lo.print8("chars_lo");
SuperVector<S> chars_hi = chars.template vshr_64_imm<4>() & low4bits;
chars_hi.print8("chars_hi");
SuperVector<S> c1_lo = mask1_lo.template pshufb<true>(chars_lo);
c1_lo.print8("c1_lo");
SuperVector<S> c1_hi = mask1_hi.template pshufb<true>(chars_hi);
c1_hi.print8("c1_hi");
SuperVector<S> t1 = c1_lo | c1_hi;
t1.print8("t1");

SuperVector<S> c2_lo = mask2_lo.template pshufb<true>(chars_lo);
c2_lo.print8("c2_lo");
SuperVector<S> c2_hi = mask2_hi.template pshufb<true>(chars_hi);
c2_hi.print8("c2_hi");
SuperVector<S> t2 = c2_lo | c2_hi;
t2.print8("t2");
t2.template vshr_128_imm<1>().print8("t2.vshr_128(1)");
SuperVector<S> t = t1 | (t2.template vshr_128_imm<1>());
t.print8("t");

return !t.eq(SuperVector<S>::Ones());
}
63 changes: 63 additions & 0 deletions src/nfa/loongarch64/truffle.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2023, Loongson Technology
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

/** \file
* \brief Truffle: character class acceleration.
*
*/

template <uint16_t S>
static really_inline
const SuperVector<S> blockSingleMask(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_mask_lo_highset, SuperVector<S> chars) {

chars.print8("chars");
shuf_mask_lo_highclear.print8("shuf_mask_lo_highclear");
shuf_mask_lo_highset.print8("shuf_mask_lo_highset");

SuperVector<S> highconst = SuperVector<S>::dup_u8(0x80);
highconst.print8("highconst");
SuperVector<S> shuf_mask_hi = SuperVector<S>::dup_u64(0x8040201008040201);
shuf_mask_hi.print8("shuf_mask_hi");

SuperVector<S> shuf1 = shuf_mask_lo_highclear.pshufb(chars);
shuf1.print8("shuf1");
SuperVector<S> t1 = chars ^ highconst;
t1.print8("t1");
SuperVector<S> shuf2 = shuf_mask_lo_highset.pshufb(t1);
shuf2.print8("shuf2");
SuperVector<S> t2 = highconst.opandnot(chars.template vshr_64_imm<4>());
t2.print8("t2");
SuperVector<S> shuf3 = shuf_mask_hi.pshufb(t2);
shuf3.print8("shuf3");
SuperVector<S> res = (shuf1 | shuf2) & shuf3;
res.print8("(shuf1 | shuf2) & shuf3");

return !res.eq(SuperVector<S>::Zeroes());
}
130 changes: 130 additions & 0 deletions src/nfa/loongarch64/vermicelli.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2023, Loongson Technology
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

/** \file
* \brief Vermicelli: single-byte and double-byte acceleration.
*/

template <uint16_t S>
static really_inline
const u8 *vermicelliBlock(SuperVector<S> const data, SuperVector<S> const chars, SuperVector<S> const casemask, u8 const *buf, u16 const len) {

SuperVector<S> mask = chars.eq(casemask & data);
return first_non_zero_match<S>(buf, mask, len);
}

template <uint16_t S>
static really_inline
const u8 *vermicelliBlockNeg(SuperVector<S> const data, SuperVector<S> const chars, SuperVector<S> const casemask, u8 const *buf, u16 const len) {

SuperVector<S> mask = !chars.eq(casemask & data);
return first_zero_match_inverted<S>(buf, mask, len);
}

template <uint16_t S>
static really_inline
const u8 *rvermicelliBlock(SuperVector<S> const data, SuperVector<S> const chars, SuperVector<S> const casemask, u8 const *buf, u16 const len) {

SuperVector<S> mask = chars.eq(casemask & data);
return last_non_zero_match<S>(buf, mask, len);
}

template <uint16_t S>
static really_inline
const u8 *rvermicelliBlockNeg(SuperVector<S> const data, SuperVector<S> const chars, SuperVector<S> const casemask, const u8 *buf, u16 const len) {

data.print8("data");
chars.print8("chars");
casemask.print8("casemask");
SuperVector<S> mask = !chars.eq(casemask & data);
mask.print8("mask");
return last_zero_match_inverted<S>(buf, mask, len);
}

template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {

SuperVector<S> v = casemask & data;
SuperVector<S> mask1 = chars1.eq(v);
SuperVector<S> mask2 = chars2.eq(v);
SuperVector<S> mask = mask1 & (mask2 >> 1);

DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
}

return first_non_zero_match<S>(buf, mask, len);
}

template <uint16_t S, bool check_partial>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {

SuperVector<S> v = casemask & data;
SuperVector<S> mask1 = chars1.eq(v);
SuperVector<S> mask2 = chars2.eq(v);
SuperVector<S> mask = (mask1 << 1)& mask2;

DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | (SuperVector<S>::Ones() >> (S-1));
}

return last_non_zero_match<S>(buf, mask, len);
}

template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len) {

SuperVector<S> v1 = chars1.eq(data & mask1);
SuperVector<S> v2 = chars2.eq(data & mask2);
SuperVector<S> mask = v1 & (v2 >> 1);

DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (check_partial && ((buf[0] & m2) == c2) && ((buf[-1] & m1) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
}

return first_non_zero_match<S>(buf, mask, len);
}

Loading