Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/http range/v23 #6312

Closed
wants to merge 12 commits into from
36 changes: 8 additions & 28 deletions src/app-layer-htp-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,24 +361,6 @@ int HTPFileOpenWithRange(HtpState *s, const uint8_t *filename, uint16_t filename
SCReturnInt(0);
}

static void HTPFileStoreChunkHandleRange(
HttpRangeContainerBlock *c, const uint8_t *data, uint32_t data_len)
{
if (c->container) {
THashDataLock(c->container->hdata);
DEBUG_VALIDATE_BUG_ON(SC_ATOMIC_GET(c->container->hdata->use_cnt) == 0);
if (HttpRangeAppendData(c, data, data_len) < 0) {
SCLogDebug("Failed to append data");
}
DEBUG_VALIDATE_BUG_ON(SC_ATOMIC_GET(c->container->hdata->use_cnt) > (uint32_t)INT_MAX);
THashDataUnlock(c->container->hdata);
} else if (c->toskip > 0) {
if (HttpRangeProcessSkip(c, data, data_len) < 0) {
SCLogDebug("Failed to append data");
}
}
}

/**
* \brief Store a chunk of data in the flow
*
Expand Down Expand Up @@ -417,7 +399,9 @@ int HTPFileStoreChunk(HtpState *s, const uint8_t *data, uint32_t data_len,
}

if (s->file_range != NULL) {
HTPFileStoreChunkHandleRange(s->file_range, data, data_len);
if (HttpRangeAppendData(s->file_range, data, data_len) < 0) {
SCLogDebug("Failed to append data");
}
}

result = FileAppendData(files, data, data_len);
Expand All @@ -435,27 +419,23 @@ int HTPFileStoreChunk(HtpState *s, const uint8_t *data, uint32_t data_len,
static void HTPFileCloseHandleRange(FileContainer *files, const uint8_t flags,
HttpRangeContainerBlock *c, const uint8_t *data, uint32_t data_len)
{
if (HttpRangeAppendData(c, data, data_len) < 0) {
SCLogDebug("Failed to append data");
}
if (c->container) {
// we only call HttpRangeClose if we may some new data
// ie we do not call it if we skipped all this range request
THashDataLock(c->container->hdata);
if (c->container->error) {
SCLogDebug("range in ERROR state");
}
if (HttpRangeAppendData(c, data, data_len) < 0) {
SCLogDebug("Failed to append data");
}
File *ranged = HttpRangeClose(c, flags);
if (ranged) {
/* HtpState owns the constructed file now */
FileContainerAdd(files, ranged);
}
SCLogDebug("c->container->files->tail %p", c->container->files->tail);
THashDataUnlock(c->container->hdata);
} else {
if (c->toskip > 0) {
if (HttpRangeProcessSkip(c, data, data_len) < 0) {
SCLogDebug("Failed to append data");
}
}
}
}

Expand Down
131 changes: 86 additions & 45 deletions src/app-layer-htp-range.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ static int ContainerUrlRangeSet(void *dst, void *src)
}
RB_INIT(&dst_s->fragment_tree);
dst_s->flags = 0;
dst_s->lastsize = 0;
dst_s->totalsize = 0;
dst_s->hdata = NULL;
dst_s->error = false;
Expand Down Expand Up @@ -131,6 +132,7 @@ static inline bool ContainerValueRangeTimeout(HttpRangeContainerFile *cu, struct
// we only timeout if we have no flow referencing us
if ((uint32_t)ts->tv_sec > cu->expire || cu->error) {
if (SC_ATOMIC_GET(cu->hdata->use_cnt) == 0) {
DEBUG_VALIDATE_BUG_ON(cu->fileOwned);
return true;
}
}
Expand Down Expand Up @@ -260,6 +262,11 @@ static HttpRangeContainerBlock *HttpRangeOpenFileAux(HttpRangeContainerFile *c,
DEBUG_VALIDATE_BUG_ON(c->files == NULL);

if (c->files->tail == NULL) {
DEBUG_VALIDATE_BUG_ON(c->fileOwned);
/* this is the first request, we open a single file in the file container
* this could be part of ContainerUrlRangeSet if we could have
* all the arguments there
*/
if (FileOpenFileWithId(c->files, sbcfg, 0, name, name_len, NULL, 0, flags) != 0) {
SCLogDebug("open file for range failed");
return NULL;
Expand All @@ -270,29 +277,45 @@ static HttpRangeContainerBlock *HttpRangeOpenFileAux(HttpRangeContainerFile *c,
c->error = true;
return NULL;
}
curf->fileOwning = false;
if (total > c->totalsize) {
// TODOask add checks about totalsize remaining the same
// we grow to the maximum size indicated by different range requests
// we could add some warning/app-layer event in this case where
// different range requests indicate different total sizes
c->totalsize = total;
}
const uint64_t buflen = end - start + 1;
if (start == c->files->tail->size && !c->appending) {

/* The big part of this function is now to decide which kind of HttpRangeContainerBlock
* we will return :
* - skipping already processed data
* - storing out of order data for later use
* - directly appending to the file if we are at the right offset
*/
if (start == c->lastsize && !c->fileOwned) {
// easy case : append to current file
curf->container = c;
c->appending = true;
curf->fileOwning = true;
// If we see 2 duplicate range requests with the same range,
// the first one takes the ownership with fileOwned
// protected by the lock from caller HTPFileOpenWithRange
c->fileOwned = true;
return curf;
} else if (start < c->files->tail->size && c->files->tail->size - start >= buflen) {
} else if (start < c->lastsize && c->lastsize - start >= buflen) {
// only overlap
// redundant to be explicit that this block is independent
curf->toskip = buflen;
return curf;
} else if (start < c->files->tail->size && c->files->tail->size - start < buflen &&
!c->appending) {
// skip first overlap, then append
curf->toskip = c->files->tail->size - start;
c->appending = true;
} else if (start < c->lastsize && c->lastsize - start < buflen && !c->fileOwned) {
// some overlap, then some data to append to the file
curf->toskip = c->lastsize - start;
curf->fileOwning = true;
c->fileOwned = true;
curf->container = c;
return curf;
}
// Because we are not in the previous cases, we will store the data for later use

// block/range to be inserted in ordered linked list
if (!(THASH_CHECK_MEMCAP(ContainerUrlRangeList.ht, buflen))) {
// skips this range
Expand All @@ -319,6 +342,8 @@ static HttpRangeContainerBlock *HttpRangeOpenFileAux(HttpRangeContainerFile *c,
}
range->buflen = buflen;
range->start = start;
range->offset = 0;
range->gap = 0;
curf->current = range;
return curf;
}
Expand All @@ -335,42 +360,47 @@ HttpRangeContainerBlock *HttpRangeOpenFile(HttpRangeContainerFile *c, uint64_t s
return r;
}

/**
* \note if we are called with a non-null c->container, it is locked
*/
int HttpRangeProcessSkip(HttpRangeContainerBlock *c, const uint8_t *data, const uint32_t len)
int HttpRangeAppendData(HttpRangeContainerBlock *c, const uint8_t *data, uint32_t len)
{
SCLogDebug("update toskip: adding %u bytes to block %p", (uint32_t)len, c);
if (len == 0) {
return 0;
}
// first check if we need to skip all bytes
if (c->toskip >= len) {
c->toskip -= len;
return 0;
}
int r = 0;
if (c->container) {
if (data == NULL) {
// gap overlaping already known data
r = FileAppendData(c->container->files, NULL, len - c->toskip);
} else {
r = FileAppendData(c->container->files, data + c->toskip, len - c->toskip);
// then if we need to skip only some bytes
if (c->toskip > 0) {
int r;
if (c->fileOwning) {
DEBUG_VALIDATE_BUG_ON(c->container->files == NULL);
if (data == NULL) {
// gap overlaping already known data
r = FileAppendData(c->container->files, NULL, len - c->toskip);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like this is accessed w/o a lock in the HTPFileStoreChunk path. It may work due to c->fileOwning, but I'm not happy with this setup. I don't want to see any access of a shared data structure w/o holding a lock.

Like I wrote in another comment, I thought the idea was to move c->container->files into HttpRangeContainerBlock for the flow handling the in-order chunk. That way we can append to it w/o needing to access the shared data at c->container

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reworking with the FileContainer pointer from comment above

} else {
r = FileAppendData(c->container->files, data + c->toskip, len - c->toskip);
}
}
c->toskip = 0;
return r;
}
c->toskip = 0;
return r;
}

int HttpRangeAppendData(HttpRangeContainerBlock *c, const uint8_t *data, uint32_t len)
{
if (len == 0) {
return 0;
// If we are owning the file to append to it, let's do it
if (c->fileOwning) {
DEBUG_VALIDATE_BUG_ON(c->container->files == NULL);
SCLogDebug("update files (FileAppendData)");
return FileAppendData(c->container->files, data, len);
}
// first check if we have a current allocated buffer to copy to
// If we are not in the previous cases, only one case remains
DEBUG_VALIDATE_BUG_ON(c->current == NULL);
// So we have a current allocated buffer to copy to
// in the case of an unordered range being handled
if (c->current) {
SCLogDebug("update current: adding %u bytes to block %p", len, c);
// GAP "data"
if (data == NULL) {
// just feed the gap in the current position, instead of its right one
return FileAppendData(c->container->files, NULL, len);
// just save the length of the gap
c->current->gap += len;
// data, but we're not yet complete
} else if (c->current->offset + len < c->current->buflen) {
memcpy(c->current->buffer + c->current->offset, data, len);
Expand All @@ -386,14 +416,7 @@ int HttpRangeAppendData(HttpRangeContainerBlock *c, const uint8_t *data, uint32_
c->current->offset = c->current->buflen;
}
return 0;
// then check if we are skipping
} else if (c->toskip > 0) {
return HttpRangeProcessSkip(c, data, len);
}
// last we are ordered, simply append
DEBUG_VALIDATE_BUG_ON(c->container->files == NULL);
SCLogDebug("update files (FileAppendData)");
return FileAppendData(c->container->files, data, len);
}

static void HttpRangeFileClose(HttpRangeContainerFile *c, uint16_t flags)
Expand All @@ -413,17 +436,14 @@ File *HttpRangeClose(HttpRangeContainerBlock *c, uint16_t flags)
{
SCLogDebug("c %p c->container %p c->current %p", c, c->container, c->current);

if (c->container == NULL) {
// everything was just skipped : nothing to do
return NULL;
}
DEBUG_VALIDATE_BUG_ON(c->container == NULL);

/* we're processing an OOO chunk, won't be able to get us a full file just yet */
if (c->current) {
SCLogDebug("processing ooo chunk as c->current is set %p", c->current);
// some out-or-order range is finished
if (c->container->files->tail &&
c->container->files->tail->size >= c->current->start + c->current->offset) {
c->container->lastsize >= c->current->start + c->current->offset) {
// if the range has become obsolete because we received the data already
// we just free it
(void)SC_ATOMIC_SUB(ContainerUrlRangeList.ht->memuse, c->current->buflen);
Expand All @@ -438,6 +458,7 @@ File *HttpRangeClose(HttpRangeContainerBlock *c, uint16_t flags)
HTTP_RANGES_RB_INSERT(&c->container->fragment_tree, c->current);
if (res) {
SCLogDebug("duplicate range fragment");
(void)SC_ATOMIC_SUB(ContainerUrlRangeList.ht->memuse, c->current->buflen);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@victorjulien is this correct ?

Everywhere else we SCFree(*->buffer), we diminish the memcap

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand the question. We update the memcap on this free, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We update the memcap on this free, right?

I added this in this latest PR.
It was not there before

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha. This is why I don't like updates to existing PRs btw.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is why I don't like updates to existing PRs btw.

In this case, the change was added between #6308 and #6312, so it was right ?

SCFree(c->current->buffer);
SCFree(c->current);
}
Expand All @@ -455,18 +476,30 @@ File *HttpRangeClose(HttpRangeContainerBlock *c, uint16_t flags)
}

// we just finished an in-order block
c->container->appending = false;
DEBUG_VALIDATE_BUG_ON(c->container->fileOwned == false);
c->container->fileOwned = false;
DEBUG_VALIDATE_BUG_ON(c->container->files->tail == NULL);
// we update the file size now that we own it again
c->container->lastsize = c->container->files->tail->size;
File *f = c->container->files->tail;

/* See if we can use our stored fragments to (partly) reconstruct the file */
HttpRangeContainerBuffer *range, *safe = NULL;
RB_FOREACH_SAFE (range, HTTP_RANGES, &c->container->fragment_tree, safe) {
if (f->size < range->start) {
// this next range is not reached yet
break;
}
if (f->size == range->start) {
// a new range just begins where we ended, append it
if (range->gap > 0) {
// if the range had a gap, begin by it
if (FileAppendData(c->container->files, NULL, range->gap) != 0) {
HttpRangeFileClose(c->container, flags | FILE_TRUNCATED);
c->container->error = true;
return f;
}
}
if (FileAppendData(c->container->files, range->buffer, range->offset) != 0) {
HttpRangeFileClose(c->container, flags | FILE_TRUNCATED);
c->container->error = true;
Expand All @@ -476,6 +509,14 @@ File *HttpRangeClose(HttpRangeContainerBlock *c, uint16_t flags)
// the range starts before where we ended
uint64_t overlap = f->size - range->start;
if (overlap < range->offset) {
if (range->gap > 0) {
// if the range had a gap, begin by it
if (FileAppendData(c->container->files, NULL, range->gap) != 0) {
HttpRangeFileClose(c->container, flags | FILE_TRUNCATED);
c->container->error = true;
return f;
}
}
// And the range ends beyond where we ended
// in this case of overlap, only add the extra data
if (FileAppendData(c->container->files, range->buffer + overlap,
Expand Down
20 changes: 15 additions & 5 deletions src/app-layer-htp-range.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ typedef struct HttpRangeContainerBuffer {
uint64_t start;
/** offset of bytes written in buffer (relative to the start of the range) */
uint64_t offset;
/** number of gaped bytes */
uint64_t gap;
} HttpRangeContainerBuffer;

int HttpRangeContainerBufferCompare(HttpRangeContainerBuffer *a, HttpRangeContainerBuffer *b);
Expand All @@ -46,9 +48,13 @@ RB_HEAD(HTTP_RANGES, HttpRangeContainerBuffer);
RB_PROTOTYPE(HTTP_RANGES, HttpRangeContainerBuffer, rb, HttpRangeContainerBufferCompare);

/** Item in hash table for a file in multiple ranges
* Thread-safety is ensured by the thread-safe hash table
* Thread-safety is ensured with the thread-safe hash table cf THashData
* The number of use is increased for each flow opening a new HttpRangeContainerBlock
* until it closes this HttpRangeContainerBlock
* The design goal is to have concurrency only on opening and closing a range request
* and have a lock-free data structure belonging to one Flow
* (see HttpRangeContainerBlock below)
* for every append in between (we suppose we have many appends per range request)
*/
typedef struct HttpRangeContainerFile {
/** key for hashtable */
Expand All @@ -59,16 +65,19 @@ typedef struct HttpRangeContainerFile {
uint32_t expire;
/** pointer to hashtable data, for locking and use count */
THashData *hdata;
/** total epxected size of the file in ranges */
/** total expected size of the file in ranges */
uint64_t totalsize;
/** size of the file after last sync */
uint64_t lastsize;
/** file container, with only one file */
FileContainer *files;
/** red and black tree list of ranges which came out of order */
struct HTTP_RANGES fragment_tree;
/** file flags */
uint16_t flags;
/** wether a range file is currently appending */
bool appending;
/** wether a HttpRangeContainerBlock is currently
owning the FileContainer in order to append to the file */
bool fileOwned;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style file_owned

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed in next PR

/** error condition for this range. Its up to timeout handling to cleanup */
bool error;
} HttpRangeContainerFile;
Expand All @@ -85,9 +94,10 @@ typedef struct HttpRangeContainerBlock {
HttpRangeContainerBuffer *current;
/** pointer to the main file container, where to directly append data */
HttpRangeContainerFile *container;
/** we own the container's file for now */
bool fileOwning;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style file_owning

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed in next PR

} HttpRangeContainerBlock;

int HttpRangeProcessSkip(HttpRangeContainerBlock *c, const uint8_t *data, const uint32_t len);
int HttpRangeAppendData(HttpRangeContainerBlock *c, const uint8_t *data, uint32_t len);
File *HttpRangeClose(HttpRangeContainerBlock *c, uint16_t flags);

Expand Down