diff --git a/TODO.md b/TODO.md deleted file mode 100644 index 620230f..0000000 --- a/TODO.md +++ /dev/null @@ -1,23 +0,0 @@ -# TODO - -## Copy to -- [x] header -- [x] sheet -- [ ] types -- [ ] implicit copy to when it sees a gsheets url -- [x] warn when more than 2048 rows -- [x] support > 2048 rows - -## read_gsheet() -- [ ] types -- [x] large sheets -- [x] implicit read_gsheet when it sees a gsheets url - -## Auth Providers -- [x] OAuth (subject to approval) -- [x] Token -- [ ] Service Account keyfile - -## Tests -- [x] Tests for read_gsheet() -- [x] Tests for copy to diff --git a/docs/pages/index.md b/docs/pages/index.md index b0ebddd..b1a58df 100644 --- a/docs/pages/index.md +++ b/docs/pages/index.md @@ -23,7 +23,7 @@ INSTALL gsheets FROM community; LOAD gsheets; ``` -The latest version of DuckDB (currently 1.1.2) is required. +The latest version of [DuckDB](https://duckdb.org/docs/installation) (currently 1.1.2) is required. ## Usage @@ -109,3 +109,7 @@ This token will periodically expire - you can re-run the above command again to - Reading sheets where data does not start in A1 is not yet supported. - Writing data to a sheet starting from a cell other than A1 is not yet supported. - Sheets must already exist to COPY TO them. + +## Support + +If you are having problems, find a bug, or have an idea for an improvement, please [file an issue on GitHub](https://github.com/evidence-dev/duckdb_gsheets). \ No newline at end of file diff --git a/src/gsheets_copy.cpp b/src/gsheets_copy.cpp index 1eda39a..17d9fbb 100644 --- a/src/gsheets_copy.cpp +++ b/src/gsheets_copy.cpp @@ -103,25 +103,11 @@ namespace duckdb for (idx_t c = 0; c < input.ColumnCount(); c++) { auto &col = input.data[c]; - switch (col.GetType().id()) { - case LogicalTypeId::VARCHAR: - row.push_back(FlatVector::GetData(col)[r].GetString()); - break; - case LogicalTypeId::INTEGER: - row.push_back(to_string(FlatVector::GetData(col)[r])); - break; - case LogicalTypeId::BIGINT: - row.push_back(to_string(FlatVector::GetData(col)[r])); - break; - case LogicalTypeId::DOUBLE: - row.push_back(to_string(FlatVector::GetData(col)[r])); - break; - case LogicalTypeId::BOOLEAN: - row.push_back(FlatVector::GetData(col)[r] ? "TRUE" : "FALSE"); - break; - default: - row.push_back("Type not implemented"); - break; + Value val = col.GetValue(r); + if (val.IsNull()) { + row.push_back(""); + } else { + row.push_back(val.ToString()); } } values.push_back(row); diff --git a/src/gsheets_read.cpp b/src/gsheets_read.cpp index 9be1581..2fdd029 100644 --- a/src/gsheets_read.cpp +++ b/src/gsheets_read.cpp @@ -15,6 +15,22 @@ ReadSheetBindData::ReadSheetBindData(string spreadsheet_id, string token, bool h response = call_sheets_api(spreadsheet_id, token, sheet_name, HttpMethod::GET); } +bool IsValidNumber(const string& value) { + // Skip empty strings + if (value.empty()) { + return false; + } + + try { + // Try to parse as double + size_t processed; + std::stod(value, &processed); + // Ensure the entire string was processed + return processed == value.length(); + } catch (...) { + return false; + } +} void ReadSheetFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { auto &bind_data = const_cast(data_p.bind_data->Cast()); @@ -40,7 +56,7 @@ void ReadSheetFunction(ClientContext &context, TableFunctionInput &data_p, DataC const string& value = first_data_row[col]; if (value == "true" || value == "false") { column_types[col] = LogicalType::BOOLEAN; - } else if (value.find_first_not_of("0123456789.+-eE") == string::npos) { + } else if (IsValidNumber(value)) { column_types[col] = LogicalType::DOUBLE; } } @@ -155,7 +171,7 @@ unique_ptr ReadSheetBind(ClientContext &context, TableFunctionBind const string& value = first_data_row[i]; if (value == "true" || value == "false") { return_types.push_back(LogicalType::BOOLEAN); - } else if (value.find_first_not_of("0123456789.+-eE") == string::npos) { + } else if (IsValidNumber(value)) { return_types.push_back(LogicalType::DOUBLE); } else { return_types.push_back(LogicalType::VARCHAR); diff --git a/src/gsheets_requests.cpp b/src/gsheets_requests.cpp index 2ac154a..e5e0ce1 100644 --- a/src/gsheets_requests.cpp +++ b/src/gsheets_requests.cpp @@ -98,7 +98,7 @@ namespace duckdb if (method == HttpMethod::POST) { path += ":append"; - path += "?valueInputOption=RAW"; + path += "?valueInputOption=USER_ENTERED"; } return perform_https_request(host, path, token, method, body); diff --git a/test/sql/types.test b/test/sql/types.test new file mode 100644 index 0000000..61939dc --- /dev/null +++ b/test/sql/types.test @@ -0,0 +1,52 @@ +# name: test/sql/types.test +# description: test types +# group: [gsheets] + +require-env TOKEN + +# Require statement will ensure this test is run with this extension loaded +require gsheets + +# Create a secret NB must substitute a token, do not commit! +statement ok +create secret test_secret ( + type gsheet, + provider access_token, + token '${TOKEN}' +); + +# Test the easy types +# TODO: add the other types +# bool,tinyint,smallint,int,bigint,hugeint,uhugeint,utinyint,usmallint,uint,ubigint,varint,date,time,timestamp,timestamp_s,timestamp_ms,timestamp_ns,time_tz,timestamp_tz,float,double,dec_4_1,dec_9_4,dec_18_6,dec38_10,uuid,interval,varchar,blob,bit,small_enum,medium_enum,large_enum,int_array,double_array,date_array,timestamp_array,timestamptz_array,varchar_array,nested_int_array,struct,struct_of_arrays,array_of_structs,map,union,fixed_int_array,fixed_varchar_array,fixed_nested_int_array,fixed_nested_varchar_array,fixed_struct_array,struct_of_fixed_array,fixed_array_of_int_list,list_of_fixed_int_array +statement ok +copy (select + bool, + tinyint, + smallint, + int, + bigint, + hugeint, + uhugeint, + utinyint, + usmallint, + uint, + ubigint, + varint, + date, + time, + timestamp, + timestamp_s, + timestamp_ms, + timestamp_ns, + time_tz, + timestamp_tz, + float, + double +from test_all_types()) to 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=1295634987#gid=1295634987' (format gsheet); + +# Read the types back +query IIIIIIIIIIIIIIIIIIIIII +from 'https://docs.google.com/spreadsheets/d/11QdEasMWbETbFVxry-SsD8jVcdYIT1zBQszcF84MdE8/edit?gid=1295634987#gid=1295634987'; +---- +FALSE -128.0 -32768.0 -2147483648.0 -9.22337e+18 -1.70141e+38 0.0 0.0 0.0 0.0 0.0 -1.79769e+308 5877642-06-25 (BC) 0:00:00 290309-12-22 (BC) 00:00:00 290309-12-22 (BC) 00:00:00 290309-12-22 (BC) 00:00:00 1677-09-22 0:00:00 00:00:00+15:59:59 290309-12-22 (BC) 00:00:00+00 -3.4e+38 -1.80E+308 +TRUE 127.0 32767.0 2147483647.0 9.223372036854776e+18 1.7014118346046923e+38 3.402823669209385e+38 255.0 65535.0 4294967295.0 1.8446744073709552e+19 1.7976931348623157e+308 5881580-07-10 24:00:00 294247-01-10 04:00:54.775806 294247-01-10 04:00:54 294247-01-10 04:00:54.775 2262-04-11 23:47:17 24:00:00-15:59:59 294247-01-10 04:00:54.775806+00 3.4e+38 1.80E+308 \ No newline at end of file