Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i386, i486 and Pentium support #5

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
c829dc3
- Added support for SMEP not being available on the CPU.
superfury Mar 9, 2024
d692353
- Implemented 80386 paging and CPUID support.
superfury Mar 10, 2024
10a6ad0
- Fixed some compiler warnings.
superfury Mar 10, 2024
11b0859
- Improved CPUID check code.
superfury Mar 10, 2024
31a916c
- Fixed issues with CPUID detection assembly code.
superfury Mar 10, 2024
e288d53
- Implemented various code 4KB page table compilation bugfixes.
superfury Mar 10, 2024
23fc0dc
- Fixed invalid reinterpret_cast on 4KB page table pointer.
superfury Mar 10, 2024
7e9dcd5
- Fixed issues with the assembly code for detecting CPUID support.
superfury Mar 10, 2024
5a5db40
- Fixed masking of ESP to align the stack when checking for CPUID cap…
superfury Mar 10, 2024
24b98d4
- Added support for CPUs without the alignment flag supported to not …
superfury Mar 10, 2024
ce99ac0
- Compile for i386 instead of i686.
superfury Mar 10, 2024
ffa417c
- There are no REX prefixes on x86.
superfury Mar 11, 2024
a53485c
- Fixed x64 vs x86 REX prefixes.
superfury Mar 11, 2024
c322510
- Fixed REX prefix code to compile again on x86.
superfury Mar 11, 2024
b5c886d
Update search.hpp
superfury Mar 13, 2024
72305e1
- Pass detect and used prefixes settings to the search engine.
superfury Mar 13, 2024
9929cff
Update search.hpp
superfury Mar 13, 2024
e81dbc8
Update search.cpp
superfury Mar 13, 2024
379731b
Update search.cpp
superfury Mar 13, 2024
7e2159e
Update search.cpp
superfury Mar 13, 2024
4dbe1a2
Update main.cpp
superfury Mar 13, 2024
2ffd34e
- Optimized unused prefix check using a variable to shift.
superfury Mar 13, 2024
a39f50c
- Moved the prefix group LUT into the search engine class.
superfury Mar 13, 2024
64dd5cb
- More code fixes.
superfury Mar 13, 2024
f068892
- Fixed prefix LUT initialization.
superfury Mar 13, 2024
dba2480
- Modified the prefix group LUT into a class.
superfury Mar 13, 2024
d217a47
- Fixed search engine class.
superfury Mar 13, 2024
86e7ce8
- Improved prefix_group_lut initialization.
superfury Mar 13, 2024
2e65fad
- More prefix group LUT code fixes.
superfury Mar 13, 2024
9203312
- Fixed unused prefixes detection.
superfury Mar 13, 2024
34e58a1
- Restored unused prefix detection code.
superfury Mar 13, 2024
576c00e
- Fixed group LUT lookups.
superfury Mar 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ bare64_env.Append(CCFLAGS=" -m64 -march=x86-64 -mno-red-zone",
ASFLAGS=" -felf64")

bare32_env = common_bare_env.Clone(ARCH_NAME="x86_32")
bare32_env.Append(CCFLAGS=" -m32 -march=i686 -mregparm=3 -fomit-frame-pointer",
bare32_env.Append(CCFLAGS=" -m32 -march=i386 -mregparm=3 -fomit-frame-pointer",
ASFLAGS=" -felf32")

bins = []
Expand Down
81 changes: 81 additions & 0 deletions src/common/cpuid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,47 @@

#include "cpuid.hpp"

bool cpuid_supported()
{
#ifndef __x86_64__
//We need to check for the existence of CPUID on 32-bit platforms!
unsigned int res1,res2;
asm(
"pushfl\n\t" //Save original interrupt state
"cli\n\t" //Block interrupts to be safe, as we're modifying the stack alignment, making this a critical section
"push %%ebp\n\t" //Save original stack base pointer
"mov %%esp,%%ebp\n\t" //Save original stack alignment
"and $-4,%%esp\n\t" //align stack
"pushfl\n\t" //Load...
"pop %%eax\n\t" //... old EFLAGS
"mov %%eax,%%ebx\n\t" //Copy of it for the result check
"xor $0x200000,%%eax\n\t" //Flip CPUID bit now
"push %%eax\n\t"
"popfl\n\t" //Store changed bit into flags
"pushfl\n\t" //New eflags back on the stack
"pop %%eax\n\t" //Get if it changed
"mov %%eax, %0\n\t" //Flipped eflags result
"mov %%ebx, %1\n\t" //Original eflags result
"mov %%ebp,%%esp\n\t" //Restore original stack alignment
"pop %%ebp\n\t" //Restore stack base pointer
"popfl" //Restore original interrupt state
: "=a" (res1), "=b" (res2));
return (((res1 ^ res2) & 0x200000)!=0); //Has the CPUID bit changed and is supported?
#else
return true; //Always assumed supported!
#endif
}

cpuid_result get_cpuid(uint32_t leaf, uint32_t subleaf)
{
cpuid_result res;
if (!cpuid_supported()) //CPUID not supported?
{
res.eax = res.ebx = res.edx = res.ecx = 0; //Simply give empty result!
return res; //Give empty result!
}

//CPUID is supported!
asm ("cpuid"
: "=a" (res.eax), "=b" (res.ebx),
"=d" (res.edx), "=c" (res.ecx)
Expand Down Expand Up @@ -47,3 +85,46 @@ bool has_nx()
return get_cpuid_max_ext_level() >= 0x80000001
and (get_cpuid(0x80000001).edx & (1 << 20));
}

bool has_smep()
{
return get_cpuid_max_std_level() >= 7
and (get_cpuid(0x7).ebx & (1 << 7));
}

bool has_pse()
{
return get_cpuid_max_std_level() >= 1
and (get_cpuid(0x1).edx & (1 << 3));
}

bool has_wp()
{
#ifndef __x86_64__
//We need to check for the existence of CR0 WP on 32-bit platforms!
unsigned int res1, res2;
asm(
"pushfl\n\t" //Save original interrupt state
"cli\n\t" //Block interrupts to be safe, as we're modifying the stack alignment, making this a critical section
"push %%ebp\n\t" //Save original stack base pointer
"mov %%esp,%%ebp\n\t" //Save original stack alignment
"and $-4,%%esp\n\t" //align stack. Important to prevent faulting on this down the road!
"pushfl\n\t" //Load...
"pop %%eax\n\t" //... old EFLAGS
"mov %%eax,%%ebx\n\t" //Copy of it for the result check
"xor $0x40000,%%eax\n\t" //Flip AC bit now
"push %%eax\n\t"
"popfl\n\t" //Store changed bit into flags
"pushfl\n\t" //New eflags back on the stack
"pop %%eax\n\t" //Get if it changed
"mov %%eax, %0\n\t" //Flipped eflags result
"mov %%ebx, %1\n\t" //Original eflags result
"mov %%ebp,%%esp\n\t" //Restore original stack alignment
"pop %%ebp\n\t" //Restore stack base pointer
"popfl" //Restore original interrupt state
: "=a" (res1), "=b" (res2));
return (((res1 ^ res2) & 0x40000) != 0); //Has the AC bit changed and is supported (indicates WP bit is supported)?
#else
return true; //Always assumed supported!
#endif
}
9 changes: 9 additions & 0 deletions src/common/include/cpuid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,12 @@ uint32_t get_cpuid_max_ext_level();

// Returns true, if the CPU reports being able to use the NX bit.
bool has_nx();

// Returns true, if the CPU reports being able to use SMEP.
bool has_smep();

// Returns true, if the CPU reports being able to use PSE.
bool has_pse();

// Returns true, if the CPU reports being able to use the WP bit in CR0.
bool has_wp();
15 changes: 12 additions & 3 deletions src/common/include/search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,20 @@ struct instruction_bytes {
{}
};

class prefix_group_lut {
public:
int8_t data[256];

prefix_group_lut(size_t detect_prefixes_); //Prototype!
};

class search_engine {
instruction_bytes current_;
size_t increment_at_ = 0;

const size_t max_prefixes_;
const size_t max_prefixes_; //How many prefixes to use at once.
const size_t used_prefixes_; //What prefixes to scan through.
prefix_group_lut group_lut_; //What group lut to use!

public:

Expand All @@ -37,7 +46,7 @@ class search_engine {
return current_;
}

search_engine(size_t max_prefixes = 0, instruction_bytes const &start = {})
: current_(start), max_prefixes_(max_prefixes)
search_engine(size_t max_prefixes = 0, size_t used_prefixes = 0xFF, size_t detect_prefixes = 0xFF, instruction_bytes const &start = {})
: current_(start), max_prefixes_(max_prefixes), used_prefixes_(used_prefixes), group_lut_(detect_prefixes)
{}
};
64 changes: 42 additions & 22 deletions src/common/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,55 +4,60 @@
#include "search.hpp"
#include "util.hpp"

struct prefix_lut {
int8_t data[256];
};

static constexpr int opcode_to_prefix_group(uint8_t byte)
static constexpr int opcode_to_prefix_group(uint8_t byte, size_t detect_prefixes_)
{
int group = -1;

switch (byte) {
case 0xF0: // LOCK
case 0xF2: // REPNE
case 0xF3: // REP
group = 0;
if (detect_prefixes_ & (1<<0)) //To detect?
{
group = 0;
}
break;
case 0x2E: // CS
case 0x36: // SS
case 0x3e: // DS
case 0x26: // ES
case 0x64: // FS
case 0x65: // GS
group = 1;
if (detect_prefixes_ & (1<<1)) //To detect?
{
group = 1;
}
break;
case 0x66: // operand size override
group = 2;
if (detect_prefixes_ & (1<<2)) //To detect?
{
group = 2;
}
break;
case 0x67: // address size override
group = 3;
if (detect_prefixes_ & (1<<3)) //To detect?
{
group = 3;
}
break;
case 0x40 ... 0x4F: // REX prefixes
group = 4;
if (detect_prefixes_ & (1<<4)) //To detect?
{
group = 4;
}
break;
}

return group;
}

static constexpr prefix_lut create_prefix_group_lut()
prefix_group_lut::prefix_group_lut(size_t detect_prefixes_)
{
prefix_lut group_lut {};

for (size_t i = 0; i < array_size(group_lut.data); i++) {
group_lut.data[i] = (int8_t)opcode_to_prefix_group((uint8_t)i);
for (size_t i = 0; i < array_size(data); i++) {
data[i] = (int8_t)opcode_to_prefix_group((uint8_t)i,detect_prefixes_);
}

return group_lut;
}

static prefix_lut prefix_group_lut {create_prefix_group_lut()};

// Encapsulates which prefixes are there, where and how many there are.
struct prefix_state {
uint8_t count[5] {}; // Count of prefixes in each group.
Expand All @@ -75,7 +80,20 @@ struct prefix_state {
if (c >= 2)
return true;
}

return false;
}

bool has_unused_prefixes(size_t used_prefixes_) const
{
//Detect used_prefixes and filter them out.
size_t b = 1;
for (size_t i = 0; i < array_size(count); i++, b <<= 1)
{
if (count[i] and ((used_prefixes_ & b) == 0)) //Prefix not to be used?
return true;
}

return false;
}

Expand All @@ -96,12 +114,12 @@ struct prefix_state {
}
};

static prefix_state analyze_prefixes(instruction_bytes const &instr)
static prefix_state analyze_prefixes(prefix_group_lut const &group_lut_, instruction_bytes const &instr)
{
prefix_state state;

for (size_t i = 0; i < sizeof(instr.raw); i++) {
int group = prefix_group_lut.data[instr.raw[i]];
int group = group_lut_.data[instr.raw[i]];
if (group < 0)
break;

Expand Down Expand Up @@ -138,12 +156,14 @@ bool search_engine::find_next_candidate()
goto again;
}

auto const state = analyze_prefixes(current_);
auto const state = analyze_prefixes(group_lut_, current_);

// Duplicated prefixes make the search space explode without generating
// insight. Also enforce order on prefixes to further reduce search space.
// And also filter out prefixes that are declared not to be used.
if (state.total_prefix_bytes() > max_prefixes_ or
state.has_duplicated_prefixes() or
state.has_unused_prefixes(used_prefixes_) or
not state.has_ordered_prefixes()) {
goto again;
}
Expand Down
12 changes: 11 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ struct options {

// After how many instructions do we stop. Zero means don't stop.
size_t stop_after = 0;

// What prefixes to use. Zero means no prefixes are valid to use. The bits of the number are the opcode groups.
size_t used_prefixes = 0xFF;

// What prefixes to detect. Zero means no prefixes are valid. The bits of the number are the opcode groups.
size_t detect_prefixes = 0xFF;
};

// This will modify cmdline.
Expand All @@ -158,6 +164,10 @@ static options parse_and_destroy_cmdline(char *cmdline)

if (strcmp(key, "prefixes") == 0)
res.prefixes = atoi(value);
if (strcmp(key, "used_prefixes") == 0)
res.used_prefixes = atoi(value);
if (strcmp(key, "detect_prefixes") == 0)
res.detect_prefixes = atoi(value);
if (strcmp(key, "stop_after") == 0)
res.stop_after = atoi(value);
}
Expand All @@ -182,7 +192,7 @@ void start(cpu_features const &features, char *cmdline)
if (options.stop_after)
format(">>> Stopping after ", options.stop_after, " execution attemps.\n");

search_engine search { options.prefixes };
search_engine search { options.prefixes, options.used_prefixes, options.detect_prefixes };
execution_attempt last_attempt;

do {
Expand Down
48 changes: 44 additions & 4 deletions src/x86_32/arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,52 @@ static bool is_aligned(uint64_t v, int order)

static void setup_paging()
{
bool pse_supported = has_pse(); //pse is supported on the CPU?
bool wp_supported = has_wp(); //wp is supported on the CPU?
uintptr_t istart = reinterpret_cast<uintptr_t>(_image_start);
uintptr_t iend = reinterpret_cast<uintptr_t>(_image_end);
uintptr_t page_tables_start = iend+(1U<<22); //Point to the end of the image to store our page tables!
//For now just store it there if required (assuming enough memory is installed)!
if (page_tables_start & 0xFFF) //Make sure to start on the next 4KB boundary if needed!
{
page_tables_start = (page_tables_start + 0xFFF) & ~0xFFF; //4KB boundary of next page!
}

assert(is_aligned(istart, 22), "Image needs to start on large page boundary");

uintptr_t tablepos = page_tables_start; //For looping sub-page tables in the PDE entries!

// Map our binary 1:1
for (uintptr_t c = istart; c <= iend; c += (1U << 22)) {
uintptr_t idx = c >> 22;
pdt[idx] = c | PTE_P | PTE_W | PTE_PS;
uintptr_t idx;
uintptr_t p; //The physical location!
idx = c >> 22; //What index in the page directory
if (pse_supported) //PSE supported? Map 4MB page tables!
{
p = c | PTE_PS; //Directly mapped!
}
else //Map 4KB PDE page directories to their page tables
{
p = tablepos; //Page table position!
tablepos += (1 << 12); //Move in 4KB chunks!
}
pdt[idx] = PTE_P | PTE_W | p; //Map PDE to page table or page
}
// Map additional page tables, if required (non-PSE systems).
if (!pse_supported) //4KB page tables are required?
{
tablepos = page_tables_start; //Generating pagetables here, requiring up to 4MB!
for (uintptr_t c = istart; c <= iend; c += (1U << 22)) //Process our range again for the page tables!
{
uintptr_t m = c; //Where to start mapping 4MB to!
uint32_t* t = reinterpret_cast<uint32_t*>(tablepos); //Backing page table in physical memory!
for (uintptr_t d = 0; d <= 1024;) //Map one 4MB page to linear memory
{
t[d++] = m | PTE_P | PTE_W; //4KB PTE
m += 4096; //Mapped 4KB of memory!
}
tablepos += 4096; //Next page table to fill!
}
}

// Map user page
Expand All @@ -69,9 +106,12 @@ static void setup_paging()
pdt[bit_select(32, 22, up)] = reinterpret_cast<uintptr_t>(user_pt) | PTE_U | PTE_P;
user_pt[bit_select(22, 12, up)] = reinterpret_cast<uintptr_t>(get_user_page_backing()) | PTE_U | PTE_P;

set_cr4(get_cr4() | CR4_PSE | CR4_SMEP);
if (pse_supported || has_smep()) //Enable either pse or smep and supported?
{
set_cr4(get_cr4() | (pse_supported ? CR4_PSE : 0) | (has_smep() ? CR4_SMEP : 0));
}
set_cr3((uintptr_t)pdt);
set_cr0(get_cr0() | CR0_PG | CR0_WP);
set_cr0(get_cr0() | CR0_PG | (wp_supported?CR0_WP:0));
}

static void setup_gdt()
Expand Down