Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ERR_FS_FILE_TOO_LARGE error for 2.07 GB (2,223,069,643 bytes) zip file #537

Open
CHRISTrosetolife opened this issue Sep 5, 2024 · 4 comments

Comments

@CHRISTrosetolife
Copy link

CHRISTrosetolife commented Sep 5, 2024

Here is my code:

import AdmZip from "adm-zip";
let zip = new AdmZip(z_path);
zip.extractAllTo(output_path, false);

Google says:

According to this answer on GitHub, 2GB is the limit:

That is the max buffer size in node. To import larger files, the code will need to change the imports to streams instead of putting the whole file in a buffer (...).

Stack trace, excluding my function calls:

node:fs:418
throw new ERR_FS_FILE_TOO_LARGE(size);
^

RangeError [ERR_FS_FILE_TOO_LARGE]: File size (2223069643) is greater than 2 GiB
at tryCreateBuffer (node:fs:418:13)
at Object.readFileSync (node:fs:471:14)
at new module.exports (D:\dev\node_modules\adm-zip\adm-zip.js:60:37)

@CHRISTrosetolife
Copy link
Author

Did some more googling

Apparently, given the official spec of zip files, it is not possible to stream?

@CHRISTrosetolife
Copy link
Author

In case someone in the future has this same error:

Ended up calling command line from within JavaScript to unzip file:

tar -xf input_path -C output_path

I had to ensure the output_path directory exists before unzipping to it

https://askubuntu.com/questions/45349/how-to-extract-files-to-another-directory-using-tar-command

https://superuser.com/questions/1314420/how-to-unzip-a-file-using-the-cmd

@5saviahv
Copy link
Collaborator

Apparently, given the official spec of zip files, it is not possible to stream?

officially ZIP does support streaming.

ADM-ZIP has chosen to use buffer for temporary storage and it may be limiting factor.
For streaming you may look something like yazl what uses streams.

@CHRISTrosetolife
Copy link
Author

Apparently, given the official spec of zip files, it is not possible to stream?

officially ZIP does support streaming.

ADM-ZIP has chosen to use buffer for temporary storage and it may be limiting factor. For streaming you may look something like yazl what uses streams.

Thank you for the reply!

I ended up writing my own function that calls tar from the command line:

import chalk from "chalk";
async function unzip(path_input) {
    let output_path = unzip_folder(path_input);
    if (await file_exists(output_path)) {
        return output_path;
    }
    log(string_combine_multiple(["unzipping to ", output_path]));
    await folder_exists_ensure(output_path);
    let command = string_combine_multiple(["tar -xf ", path_input, " -C ", output_path, ""]);
    await command_line(command);
    return output_path;
}
function unzip_folder(path_input) {
    let extension = ".zip";
    let output_path = string_suffix_without(path_input, extension);
    return output_path;
}
function string_suffix_without(input, suffix) {
    assert_boolean(string_ends_with(input, suffix));
    let prefix_length = string_size(suffix);
    let input_length = string_size(input);
    let difference = input_length - prefix_length;
    return string_take(input, difference);
}
function string_size(input) {
    return input.length;
}
function string_take(input, count) {
    let length = string_size(input);
    assert_message(less_than_equal, [count, length], () => ({
        input,
        count,
        length
    }));
    return string_substring(input, 0, count);
}
function less_than_equal(a, b) {
    return a <= b;
}
function assert_message(fn, args, message_get) {
    return assert_message_string(fn, args, () => {
        let j = json_to(message_get());
        let limit = 1000;
        if (string_size(j > limit)) {
            return string_take(j, limit);
        }
        return j;
    });
}
function assert_message_string(fn, args, message_get) {
    returns_message(fn, true, args, message_get);
}
function returns_message(fn, expected, args, message_get) {
    let actual = fn(...args);
    let e = equal(actual, expected);
    assert_boolean_message(e, message_get);
}
function assert_boolean_message(condition, message_get) {
    if (condition === false) {
        error(message_get());
    }
}
function error(message) {
    throw new Error(message);
}
function equal(a, b) {
    return a === b;
}
function json_to(object) {
    return JSON.stringify(object);
}
function string_substring(input, start, end) {
    assert_arguments_length(arguments, 3);
    return input.substring(start, end);
}
function assert_arguments_length(args, expected) {
    assert(equal, [arguments.length, 2]);
    let actual = args.length;
    assert_message(equal, [actual, expected], () => ({
        message: "expecting different argument count",
        expected,
        actual
    }));
}
function assert(fn, args) {
    returns(fn, true, args);
}
function returns(fn, expected, args) {
    returns_message(fn, expected, args, message_get);
    function message_get() {
        return json_to(args);
    }
}
function assert_boolean(condition) {
    assert_boolean_message(condition, assert_message_error);
}
function assert_message_error() {
    return "assert";
}
function string_ends_with(input, prefix) {
    assert_arguments_length(arguments, 2);
    let string_get_lambda = string_get_reverse;
    return string_starts_with_generic(string_get_lambda, input, prefix);
}
function string_get_reverse(input, index) {
    let last = string_index_last(input);
    return input[last - index];
}
function string_index_last(input) {
    let length = string_size(input);
    let index_last = length - 1;
    return index_last;
}
function string_starts_with_generic(string_get_lambda, input, prefix) {
    let input_length = string_size(input);
    let prefix_length = string_size(prefix);
    if (input_length < prefix_length) {
        return false;
    }
    for (let i of range(prefix_length)) {
        if (string_get_lambda(input, i) !== string_get_lambda(prefix, i)) {
            return false;
        }
    }
    return true;
}
function range(count) {
    assert_arguments_length(arguments, 1);
    let offset = 0;
    return range_generic(count, offset);
}
function range_generic(count, offset) {
    return list_adder(la => {
        for (let i = 0; i < count; i++) {
            la(add(i, offset));
        }
    });
}
function add(a, b) {
    return a + b;
}
function list_adder(lambda) {
    let result = [];
    lambda(item => list_add(result, item));
    return result;
}
function list_add(list, item) {
    assert_arguments_length(arguments, 2);
    list.push(item);
}
async function command_line(command) {
    let silent = false;
    return await command_line_generic(command, silent);
}
async function command_line_generic(command, silent) {
    let c = await import_node("child_process");
    let {spawn} = c;
    return await new Promise(resolve => {
        let {first, remaining} = list_first_remaining(string_split_space(command));
        let child = spawn(first, remaining);
        if (!silent) {
            let {stdout, stderr} = child;
            stdout.setEncoding("utf8");
            stdout.on("data", function (data) {
                log_write(data);
            });
            stderr.setEncoding("utf8");
            stderr.on("data", function (data) {
                log_error_write(data);
            });
        }
        child.on("close", function (code) {
            resolve();
        });
    });
}
function log_error_write(message) {
    log_error_generic(log_write, message);
}
function log_write(message) {
    process.stdout.write(message);
}
function log_error_generic(fn, message) {
    fn(chalk().redBright(message));
}
function chalk() {
    return c;
}
function string_split_space(sentence) {
    return string_split(sentence, " ");
}
function string_split(input, delimeter) {
    assert_arguments_length(arguments, 2);
    return input.split(delimeter);
}
function list_first_remaining(r) {
    let first = list_first(r);
    let remaining = list_skip(r, 1);
    return {
        first,
        remaining
    };
}
function list_skip(list, count) {
    return list.slice(count);
}
function list_first(list) {
    return list_get(list, 0);
}
function list_get(list, index) {
    assert(list_index_is, [list, index]);
    return list[index];
}
function list_index_is(list, index) {
    return greater_than_equal(index, 0) && less_than(index, list_size(list));
}
function list_size(list) {
    assert_arguments_length(arguments, 1);
    assert(list_is, [list]);
    return list.length;
}
function list_is(candidate) {
    return Array.isArray(candidate);
}
function less_than(a, b) {
    return a < b;
}
function greater_than_equal(a, b) {
    return a >= b;
}
async function import_node(libary_to_import) {
    let a;
    if (web_not_is()) {
        a = await import(libary_to_import);
    }
    return a;
}
function web_not_is() {
    return typeof window === "undefined";
}
async function folder_exists_ensure(parent) {
    if (!await file_exists(parent)) {
        await folder_parent_exists_ensure(parent);
        await folder_new(parent);
    }
}
async function folder_new(folder_new_name) {
    let fs = await import_node("fs");
    await fs.promises.mkdir(folder_new_name);
}
async function folder_parent_exists_ensure(file_name) {
    let path = await import("path");
    let parent = path.dirname(file_name);
    await folder_exists_ensure(parent);
}
async function file_exists(file_name) {
    if (web_is()) {
        let {files} = global_get();
        let keys = object_properties(files);
        return list_includes(keys, file_name);
    }
    let fs = await import_node("fs");
    return await new Promise((resolve, reject) => {
        fs.stat(file_name, function (err, stat) {
            if (err == null) {
                resolve(true);
            } else {
                if (err.code === "ENOENT") {
                    resolve(false);
                } else {
                    reject(err);
                }
            }
        });
    });
}
function global_get() {
    if (web_is()) {
        return global;
    }
    return g;
}
function web_is() {
    return !web_not_is();
}
function object_properties(object) {
    return Object.keys(object);
}
function list_includes(list, item) {
    assert_arguments_length(arguments, 2);
    return list.includes(item);
}
function string_combine_multiple(list) {
    assert_arguments_length(arguments, 1);
    assert(list_is, [list]);
    let result = "";
    for (let l of list) {
        result = string_combine(result, l);
    }
    return result;
}
function string_combine(a, b) {
    assert_arguments_length(arguments, 2);
    return a + b;
}
function log(message) {
    let logs = log_logs();
    list_add(logs, message);
    console.log(message);
}
function log_logs() {
    let g = global_get();
    return object_property_initialize(g, log.name, []);
}
function object_property_initialize(lookup, key, initial) {
    return object_property_initialize_get(lookup, key, () => initial);
}
function object_property_initialize_get(lookup, key, initial) {
    if (object_property_exists_not(lookup, key)) {
        object_property_set(lookup, key, initial());
    }
    return object_property_get(lookup, key);
}
function object_property_exists_not(lookup, right) {
    assert_arguments_length(arguments, 2);
    return not(object_property_exists(lookup, right));
}
function object_property_exists(object, property_name) {
    return Object.hasOwn(object, property_name);
}
function not(b) {
    return !b;
}
function object_property_get(object, property_name) {
    let result = object[property_name];
    assert_message(undefined_not_is, [result], () => ({
        property_name,
        object
    }));
    return result;
}
function undefined_not_is(previous_token) {
    return !undefined_is(previous_token);
}
function undefined_is(input) {
    return typeof input === "undefined";
}
function object_property_set(object, property_name, value) {
    object[property_name] = value;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants