Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to ReadStat 1.1.9 #713

Merged
merged 3 commits into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# haven (development version)

* Updated to ReadStat 1.1.9.

* Fix various SAS catalog file reading bugs (#529, #653, #680, #696, #705).
* Increase maximum SAS page file size to 16MiB (#697).
* Ignore invalid SAV timestamp strings (#683).
* Fix compiler warnings (#707).

* Fixed issue in `write_*()` functions where invisible return of input data
frame included unintended alteration of date/time variables. (@jmobrien, #702)

Expand Down
14 changes: 14 additions & 0 deletions src/readstat/NEWS
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
New in 1.1.9:

* SAV reader: Improved support for Asian code pages #263
* SAV reader: Improved support for Very Long String records #287
* SAS reader: Improved support for RLE decompression #245 #253
* SAS reader: Support 16MiB page sizes #286
* SAS catalog reader: Fix bugs reading big-endian files #293
* SAS catalog reader: Allow formats with no labels #290
* SAS catalog reader: Check for long names in 64-bit files #291
* Improved compatibility with -Wstrict-prototypes #295
* Replace sprintf with snprintf #292

New in 1.1.8:

* XPT reader/writer: Improved support for format strings #257 #258
* DTA writer: Fix off-by-one error in v,o indexing for string refs #270
* SAV/DTA writers: Improved checking of non-ASCII characters #256
* SAS7BDAT reader: Fix use-after-free error #273
* Build: Link to libm on GNU systems #255
* SAS commands: Support more syntax
* SPSS commands: Make file names optional
Expand Down
2 changes: 1 addition & 1 deletion src/readstat/readstat_bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#undef READSTAT_MACHINE_IS_TWOS_COMPLEMENT
#define READSTAT_MACHINE_IS_TWOS_COMPLEMENT 0

int machine_is_little_endian();
int machine_is_little_endian(void);

char ones_to_twos_complement1(char num);
int16_t ones_to_twos_complement2(int16_t num);
Expand Down
2 changes: 1 addition & 1 deletion src/readstat/readstat_convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
readstat_error_t readstat_convert(char *dst, size_t dst_len, const char *src, size_t src_len, iconv_t converter) {
/* strip off spaces from the input because the programs use ASCII space
* padding even with non-ASCII encoding. */
while (src_len && src[src_len-1] == ' ') {
while (src_len && (src[src_len-1] == ' ' || src[src_len-1] == '\0')) {
src_len--;
}
if (dst_len == 0) {
Expand Down
10 changes: 6 additions & 4 deletions src/readstat/readstat_malloc.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#include <stdlib.h>

#define MAX_MALLOC_SIZE 0xFFF000
/* ~16 MB. Needs to be at least 0x3FF00, i.e. the default ~4MB block size used
* in compressed SPSS (ZSAV) files. The purpose here is to prevent massive
* allocations in the event of a malformed file or a bug in the library. */
#define MAX_MALLOC_SIZE 0x1000000
/* =16 MiB. Needs to be at least 0x3FF00, i.e. the default ~4MB block size used
* in compressed SPSS (ZSAV) files. Some SAS installations use 16MiB page sizes
* by default, see https://github.com/tidyverse/haven/issues/697.
* The purpose here is to prevent massive allocations in the event of a
* malformed file or a bug in the library. */

void *readstat_malloc(size_t len) {
if (len > MAX_MALLOC_SIZE || len == 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/readstat/readstat_variable.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <stdlib.h>
#include "readstat.h"

static readstat_value_t make_blank_value();
static readstat_value_t make_blank_value(void);
static readstat_value_t make_double_value(double dval);

static readstat_value_t make_blank_value() {
Expand Down
2 changes: 1 addition & 1 deletion src/readstat/sas/ieee.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ static void ieee2xpt(unsigned char *ieee, unsigned char *xport);

#ifndef FLOATREP
#define FLOATREP get_native()
int get_native();
int get_native(void);
#endif

void memreverse(void *intp_void, int l) {
Expand Down
33 changes: 19 additions & 14 deletions src/readstat/sas/readstat_sas7bcat_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz

/* Pass 1 -- find out the offset of the labels */
for (i=0; i<label_count_capacity; i++) {
if (&lbp1[3] - value_start > value_labels_len || lbp1[2] < 0) {
if (&lbp1[3] - value_start > value_labels_len || sas_read2(&lbp1[2], ctx->bswap) < 0) {
retval = READSTAT_ERROR_PARSE;
goto cleanup;
}
Expand All @@ -76,7 +76,7 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz
}
value_offset[label_pos] = lbp1 - value_start;
}
lbp1 += 6 + lbp1[2];
lbp1 += 6 + sas_read2(&lbp1[2], ctx->bswap);
}

const char *lbp2 = lbp1;
Expand All @@ -93,7 +93,7 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz
readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE };
char string_val[4*16+1];
if (is_string) {
size_t value_entry_len = 6 + lbp1[2];
size_t value_entry_len = 6 + sas_read2(&lbp1[2], ctx->bswap);
retval = readstat_convert(string_val, sizeof(string_val),
&lbp1[value_entry_len-16], 16, ctx->converter);
if (retval != READSTAT_OK)
Expand Down Expand Up @@ -143,18 +143,20 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size,
readstat_error_t retval = READSTAT_OK;

size_t pad = 0;
int label_count_capacity = 0;
int label_count_used = 0;
uint64_t label_count_capacity = 0;
uint64_t label_count_used = 0;
int payload_offset = 106;
uint16_t flags = 0;
char name[4*32+1];

if (data_size < payload_offset)
goto cleanup;

pad = (data[2] & 0x08) ? 4 : 0; // might be 0x10, not sure
flags = sas_read2(&data[2], ctx->bswap);
pad = (flags & 0x08) ? 4 : 0; // might be 0x10, not sure
if (ctx->u64) {
label_count_capacity = sas_read4(&data[42+pad], ctx->bswap);
label_count_used = sas_read4(&data[50+pad], ctx->bswap);
label_count_capacity = sas_read8(&data[42+pad], ctx->bswap);
label_count_used = sas_read8(&data[50+pad], ctx->bswap);

payload_offset += 32;
} else {
Expand All @@ -169,7 +171,7 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size,
pad += 16;
}

if ((data[2] & 0x80) && !ctx->u64) { // has long name
if (((flags & 0x80) && !ctx->u64) || ((flags & 0x20) && ctx->u64)) { // has long name
if (data_size < payload_offset + pad + 32)
goto cleanup;

Expand All @@ -182,6 +184,9 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size,
if (data_size < payload_offset + pad)
goto cleanup;

if (label_count_used == 0)
goto cleanup;

if ((retval = sas7bcat_parse_value_labels(&data[payload_offset+pad], data_size - payload_offset - pad,
label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK)
goto cleanup;
Expand All @@ -200,15 +205,15 @@ static readstat_error_t sas7bcat_augment_index(const char *index, size_t len, sa
break;

if (xlsr[ctx->xlsr_O_offset] == 'O') {
uint32_t page = 0, pos = 0;
uint64_t page = 0, pos = 0;
if (ctx->u64) {
page = sas_read4(&xlsr[8], ctx->bswap);
pos = sas_read4(&xlsr[16], ctx->bswap);
page = sas_read8(&xlsr[8], ctx->bswap);
pos = sas_read2(&xlsr[16], ctx->bswap);
} else {
page = sas_read2(&xlsr[4], ctx->bswap);
page = sas_read4(&xlsr[4], ctx->bswap);
pos = sas_read2(&xlsr[8], ctx->bswap);
}
ctx->block_pointers[ctx->block_pointers_used++] = ((uint64_t)page << 32) + pos;
ctx->block_pointers[ctx->block_pointers_used++] = (page << 32) + pos;
}

if (ctx->block_pointers_used == ctx->block_pointers_capacity) {
Expand Down
3 changes: 2 additions & 1 deletion src/readstat/sas/readstat_sas7bdat_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,6 @@ static readstat_variable_t *sas7bdat_init_variable(sas7bdat_ctx_t *ctx, int i,

cleanup:
if (retval != READSTAT_OK) {
free(variable);
if (out_retval)
*out_retval = retval;

Expand All @@ -716,6 +715,8 @@ static readstat_variable_t *sas7bdat_init_variable(sas7bdat_ctx_t *ctx, int i,
}
}

free(variable);

return NULL;
}

Expand Down
7 changes: 7 additions & 0 deletions src/readstat/sas/readstat_sas_rle.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ typedef SSIZE_T ssize_t;
#include "readstat_sas_rle.h"

#define SAS_RLE_COMMAND_COPY64 0
#define SAS_RLE_COMMAND_COPY64_PLUS_4096 1
#define SAS_RLE_COMMAND_COPY96 2
#define SAS_RLE_COMMAND_INSERT_BYTE18 4
#define SAS_RLE_COMMAND_INSERT_AT17 5
#define SAS_RLE_COMMAND_INSERT_BLANK17 6
Expand All @@ -29,6 +31,7 @@ typedef SSIZE_T ssize_t;

static size_t command_lengths[16] = {
[SAS_RLE_COMMAND_COPY64] = 1,
[SAS_RLE_COMMAND_COPY64_PLUS_4096] = 1,
[SAS_RLE_COMMAND_INSERT_BYTE18] = 2,
[SAS_RLE_COMMAND_INSERT_AT17] = 1,
[SAS_RLE_COMMAND_INSERT_BLANK17] = 1,
Expand Down Expand Up @@ -62,6 +65,10 @@ ssize_t sas_rle_decompress(void *output_buf, size_t output_len,
case SAS_RLE_COMMAND_COPY64:
copy_len = (*input++) + 64 + length * 256;
break;
case SAS_RLE_COMMAND_COPY64_PLUS_4096:
copy_len = (*input++) + 64 + length * 256 + 4096;
break;
case SAS_RLE_COMMAND_COPY96: copy_len = length + 96; break;
case SAS_RLE_COMMAND_INSERT_BYTE18:
insert_len = (*input++) + 18 + length * 256;
insert_byte = *input++;
Expand Down
2 changes: 1 addition & 1 deletion src/readstat/spss/readstat_por.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ ssize_t por_utf8_encode(const unsigned char *input, size_t input_len,
}
/* TODO - For some reason that replacement character isn't recognized
* by some systems, so be prepared to insert an ASCII space instead */
int printed = sprintf(output + offset, "%lc", codepoint);
int printed = snprintf(output + offset, output_len - offset, "%lc", codepoint);
if (printed > 0) {
offset += printed;
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/readstat/spss/readstat_por.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ typedef struct por_ctx_s {
ck_hash_table_t *var_dict;
} por_ctx_t;

por_ctx_t *por_ctx_init();
por_ctx_t *por_ctx_init(void);
void por_ctx_free(por_ctx_t *ctx);
ssize_t por_utf8_encode(const unsigned char *input, size_t input_len,
char *output, size_t output_len, uint16_t lookup[256]);
Expand Down
Loading