Skip to content

Commit

Permalink
Back to size optimization + added raw-feldspar-mcs dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
kmate committed Jun 2, 2016
1 parent 16418dc commit 86d6733
Show file tree
Hide file tree
Showing 3 changed files with 334 additions and 1 deletion.
185 changes: 185 additions & 0 deletions csrc/feldspar-parallella.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#include <stdlib.h>

#include "feldspar-parallella.h"

#ifndef __epiphany__

#include <stdint.h>
#include <time.h>

static inline int wait(uint32_t nsec)
{
struct timespec ts = { .tv_sec = 0, .tv_nsec = nsec };
return nanosleep(&ts, NULL);
}

void init_host_chan(host_chan_t *chan,
e_epiphany_t *g, e_coreid_t r, e_coreid_t c,
e_mem_t *buf, off_t is_open_o, off_t is_full_o) {
chan->g = g;
chan->r = r;
chan->c = c;
chan->buf = buf;
chan->is_open = is_open_o;
chan->is_full = is_full_o;
bool is_open[1] = { true };
host_write_local(g, r, c, is_open_o, is_open, 0, 0, 0);
bool is_full[1] = { false };
host_write_local(g, r, c, is_full_o, is_full, 0, 0, 0);
}

void init_core_chan(e_epiphany_t *g, e_coreid_t r, e_coreid_t c,
off_t is_open_o, off_t is_full_o) {
bool is_open[1] = { true };
host_write_local(g, r, c, is_open_o, is_open, 0, 0, 0);
bool is_full[1] = { false };
host_write_local(g, r, c, is_full_o, is_full, 0, 0, 0);
}

bool _host_write_h2c(host_chan_t chan, void *src, size_t off, size_t len) {
// wait for empty space
bool is_full[1] = { true };
do {
bool is_open[1] = { true };
host_read_local(chan.g, chan.r, chan.c, chan.is_open, is_open, 0, 0, 0);
if (!*is_open) {
// do not wait for a closed channel to get empty
return false;
}
host_read_local(chan.g, chan.r, chan.c, chan.is_full, is_full, 0, 0, 0);
} while (*is_full && !wait(HOST_CHANNEL_POLL_NSEC));
// write item and set channel full
host_write_shared(chan.buf, src, 0, off, off + len - 1);
*is_full = true;
host_write_local(chan.g, chan.r, chan.c, chan.is_full, is_full, 0, 0, 0);
return true;
}

bool _host_read_c2h(host_chan_t chan, void *dst, size_t off, size_t len) {
// wait for an item
bool is_full[1] = { false };
host_read_local(chan.g, chan.r, chan.c, chan.is_full, is_full, 0, 0, 0);
while (!*is_full) {
bool is_open[1] = { true };
host_read_local(chan.g, chan.r, chan.c, chan.is_open, is_open, 0, 0, 0);
if (!*is_open) {
// do not wait for a closed channel to be filled
return false;
}
wait(HOST_CHANNEL_POLL_NSEC);
host_read_local(chan.g, chan.r, chan.c, chan.is_full, is_full, 0, 0, 0);
}
// read item and set channel empty
host_read_shared(chan.buf, dst, 0, off, off + len - 1);
*is_full = false;
host_write_local(chan.g, chan.r, chan.c, chan.is_full, is_full, 0, 0, 0);
return true;
}

void host_close_chan(host_chan_t chan) {
bool is_open[1] = { false };
host_write_local(chan.g, chan.r, chan.c, chan.is_open, is_open, 0, 0, 0);
}

#else /* __epiphany__ */

#include <stdint.h>

bool _core_write_c2h(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full,
void *src, size_t off, size_t len) {
do {
if (!*is_open) {
// do not wait for a closed channel to get empty
return false;
}
} while (*is_full);
core_write_shared(buf, src, 0, off, off + len - 1);
*is_full = true;
return true;
}

bool _core_read_h2c(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full,
void *dst, size_t off, size_t len) {
while (!*is_full) {
if (!*is_open) {
// do not wait for a closed channel to be filled
return false;
}
}
core_read_shared(buf, dst, 0, off, off + len - 1);
*is_full = false;
return true;
}

bool _core_write_c2c(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full,
void *src, size_t off, size_t len) {
do {
if (!(*is_open)) {
// do not wait for a closed channel to get empty
return false;
}
} while (*is_full);
core_write_local(buf, src, 0, off, off + len - 1);
*is_full = true;
return true;
}

bool _core_read_c2c(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full,
void *dst, size_t off, size_t len) {
while (!*is_full) {
if (!*is_open) {
// do not wait for a closed channel to be filled
return false;
}
}
core_read_local(buf, dst, 0, off, off + len - 1);
*is_full = false;
return true;
}

void core_close_chan(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full) {
*is_open = false;
}

// based on the epiphany-bsp library
void fast_memcpy(void *dst, const void *src, size_t bytes) {
unsigned bits = (unsigned) dst | (unsigned) src;
if (0 == bits & 0x7) { // align 8
int count = bytes >> 3;
bytes &= 0x7;
uint64_t *dst8 = (uint64_t *) dst;
const uint64_t *src8 = (const uint64_t *) src;
while (count--) {
*dst8++ = *src8++;
}
dst = (void *) dst8;
src = (const void *) src8;
} else if (0 == bits & 0x3) { // align 4
int count = bytes >> 2;
bytes &= 0x3;
uint32_t *dst4 = (uint32_t *) dst;
const uint32_t *src4 = (const uint32_t *) src;
while (count--) {
*dst4++ = *src4++;
}
dst = (void *) dst4;
src = (const void *) src4;
}
uint8_t *dst1 = (uint8_t *) dst;
const uint8_t *src1 = (const uint8_t *) src;
while (bytes--) {
*dst1++ = *src1++;
}
}

#endif /* __epiphany__ */
148 changes: 148 additions & 0 deletions include/feldspar-parallella.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#ifndef __FELDSPAR_PARALLELLA_H__
#define __FELDSPAR_PARALLELLA_H__

#include <stdbool.h>

#ifndef __epiphany__

#include <e-hal.h>

// host operations on core local memories

#define host_write_local(g, r, c, dst, src, offset, lower, upper) \
e_write(g, r, c, (dst) + (offset) * sizeof(*src), (src) + (lower), ((upper) - (lower) + 1) * sizeof(*src))

#define host_read_local(g, r, c, src, dst, offset, lower, upper) \
e_read(g, r, c, (src) + (offset) * sizeof(*dst), (dst) + (lower), ((upper) - (lower) + 1) * sizeof(*dst))

// host operations on shared external memory

#define host_write_shared(dst, src, offset, lower, upper) \
e_write(dst, 0, 0, (offset) * sizeof(*src), (src) + (lower), ((upper) - (lower) + 1) * sizeof(*src))

#define host_read_shared(src, dst, offset, lower, upper) \
e_read(src, 0, 0, (offset) * sizeof(*dst), (dst) + (lower), ((upper) - (lower) + 1) * sizeof(*dst))

// host channel polling interval in nanoseconds

#ifndef HOST_CHANNEL_POLL_NSEC
#define HOST_CHANNEL_POLL_NSEC 10000
#endif

// host-to-core and core-to-host channel

typedef struct host_chan {
e_epiphany_t *g;
e_coreid_t r;
e_coreid_t c;
e_mem_t *buf;
off_t is_open;
off_t is_full;
} host_chan_t;

// host-to-core and core-to-host channel initialization

void init_host_chan(host_chan_t *chan,
e_epiphany_t *g, e_coreid_t r, e_coreid_t c,
e_mem_t *buf, off_t is_open_o, off_t is_full_o);

// core-to-core channel initialization

void init_core_chan(e_epiphany_t *g, e_coreid_t r, e_coreid_t c,
off_t is_open_o, off_t is_full_o);

// host-to-core channel write

#define host_write_h2c(chan, src, off, len) \
_host_write_h2c((chan), (src), (off) * sizeof(*src), (len) * sizeof(*src));

bool _host_write_h2c(host_chan_t chan, void *src, size_t off, size_t len);

// core-to-host channel read

#define host_read_c2h(chan, src, off, len) \
_host_read_c2h((chan), (src), (off) * sizeof(*src), (len) * sizeof(*src));

bool _host_read_c2h(host_chan_t chan, void *dst, size_t off, size_t len);

// close any kind of channel

void host_close_chan(host_chan_t chan);

#else /* __epiphany__ */

#include <e-lib.h>
#include <stdlib.h>

// halting current core

#define core_halt() return 0

// core operations on core local memories

#define core_write_local(dst, src, offset, lower, upper) \
e_dma_copy((void*)((dst) + (offset)), (void*)((src) + (lower)), ((upper) - (lower) + 1) * sizeof(*src))

#define core_read_local(src, dst, offset, lower, upper) \
fast_memcpy((void*)((dst) + (lower)), (void*)((src) + (offset)), ((upper) - (lower) + 1) * sizeof(*dst))

void fast_memcpy(void *dst, const void *src, size_t bytes);

// core operations on shared external memory

#define core_write_shared(dst, src, offset, lower, upper) \
e_dma_copy((void*)(e_emem_config.base + (dst) + (offset) * sizeof(*src)), (void*)((src) + (lower)), ((upper) - (lower) + 1) * sizeof(*src))

#define core_read_shared(src, dst, offset, lower, upper) \
e_dma_copy((void*)((dst) + (lower)), (void*)(e_emem_config.base + (src) + (offset) * sizeof(*dst)), ((upper) - (lower) + 1) * sizeof(*dst))

// core-to-host channel write

#define core_write_c2h(buf, is_open, is_full, src, off, len) \
_core_write_c2h(buf, is_open, is_full, (src), (off) * sizeof(*src), (len) * sizeof(*src));

bool _core_write_c2h(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full,
void *src, size_t off, size_t len);

// host-to-core channel read

#define core_read_h2c(buf, is_open, is_full, src, off, len) \
_core_read_h2c(buf, is_open, is_full, (src), (off) * sizeof(*src), (len) * sizeof(*src));

bool _core_read_h2c(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full,
void *dst, size_t off, size_t len);

// core-to-core channel write

#define core_write_c2c(buf, is_open, is_full, src, off, len) \
_core_write_c2c(buf, is_open, is_full, (src), (off) * sizeof(*src), (len) * sizeof(*src));

bool _core_write_c2c(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full,
void *src, size_t off, size_t len);

// core-to-core channel read

#define core_read_c2c(buf, is_open, is_full, src, off, len) \
_core_read_c2c(buf, is_open, is_full, (src), (off) * sizeof(*src), (len) * sizeof(*src));

bool _core_read_c2c(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full,
void *dst, size_t off, size_t len);

// close any kind of channel

void core_close_chan(volatile void *const buf,
volatile bool *const is_open,
volatile bool *const is_full);

#endif /* __epiphany__ */

#endif /* __FELDSPAR_PARALLELLA_H__ */

2 changes: 1 addition & 1 deletion make_run_epiphany.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ do
SREC=../${f%.*}.srec

# Build DEVICE side programs
e-gcc -std=gnu99 -ffast-math -fsingle-precision-constant -I../include -T ${ELDF} ../csrc/feldspar-parallella.c $f -o ${ELF} -le-lib -lm -O2
e-gcc -std=gnu99 -ffast-math -fsingle-precision-constant -I../include -T ${ELDF} ../csrc/feldspar-parallella.c $f -o ${ELF} -le-lib -lm -Os
# Convert ebinaries to SREC files
e-objcopy --srec-forceS3 --output-target srec ${ELF} ${SREC}
done
Expand Down

0 comments on commit 86d6733

Please sign in to comment.