From fe3b4d414886159497384925bdb6eb8b15fefcc5 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Fri, 7 Feb 2020 10:19:45 -0600 Subject: [PATCH 01/21] Add some basic scaffolding. --- +dj/+internal/External.m | 0 +tests/TestTls.m | 2 +- .travis.yml | 3 +-- setupDJ.m | 4 ++-- 4 files changed, 4 insertions(+), 5 deletions(-) create mode 100644 +dj/+internal/External.m diff --git a/+dj/+internal/External.m b/+dj/+internal/External.m new file mode 100644 index 00000000..e69de29b diff --git a/+tests/TestTls.m b/+tests/TestTls.m index cdda43d0..6433f83e 100644 --- a/+tests/TestTls.m +++ b/+tests/TestTls.m @@ -63,7 +63,7 @@ function testStructException(testCase) testCase.CONN_INFO.user, ... testCase.CONN_INFO.password, ... '',true,struct('ca','fake/path/some/where')), ... - 'DataJoint:TLS:InvalidStruct'); + 'mYm:TLS:InvalidStruct'); end end end \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 09778d2b..9535ef6b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ branches: except: - - master-stage - - stage + - /^stage.*$/ sudo: required services: - docker diff --git a/setupDJ.m b/setupDJ.m index 5fc13e22..1d721ffe 100644 --- a/setupDJ.m +++ b/setupDJ.m @@ -27,7 +27,7 @@ function setupDJ(skipPathAddition, force) fprintf('mym missing. Downloading...\n') target = fullfile(base, 'mym.zip'); % mymURL = 'https://github.com/datajoint/mym/archive/master.zip'; - mymURL = 'https://github.com/datajoint/mym/archive/external-storage.zip'; + mymURL = 'https://github.com/guzman-raphael/mym/archive/ext-serialize.zip'; target = websave(target, mymURL); if isunix && ~ismac % on Linux Matlab unzip doesn't work properly so use system unzip @@ -37,7 +37,7 @@ function setupDJ(skipPathAddition, force) end % rename extracted mym-master directory to mym % movefile(fullfile(base, 'mym-master'), mymdir) - movefile(fullfile(base, 'mym-external-storage'), mymdir) + movefile(fullfile(base, 'mym-ext-serialize'), mymdir) delete(target) end From df71efb0fdf42b8afad7d5950c6b3421f93f4e05 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Wed, 12 Feb 2020 12:42:18 -0600 Subject: [PATCH 02/21] Added file storage plugin implementation. --- +dj/+internal/External.m | 266 ++++++++++++++++++++++++++++++++ +dj/+store_plugins/@File/File.m | 43 ++++++ .gitignore | 3 +- 3 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 +dj/+store_plugins/@File/File.m diff --git a/+dj/+internal/External.m b/+dj/+internal/External.m index e69de29b..0f36e554 100644 --- a/+dj/+internal/External.m +++ b/+dj/+internal/External.m @@ -0,0 +1,266 @@ +% dj.internal.External - an external static method class. +classdef External + methods (Static) + function config = buildConfig(config, validation_config, store_name) + function validateInput(address, target) + for k=1:numel(fieldnames(target)) + fn = fieldnames(target); + address{end+1} = '.'; + address{end+1} = fn{k}; + if ~isstruct(target.(fn{k})) + subscript = substruct(address{:}); + try + value = subsref(config, subscript); + vconfig = subsref(validation_config, subscript); + type_check = vconfig.type_check; + if ~type_check(value) + % Throw error for config that fails type validation + error('DataJoint:StoreConfig:WrongType', ... + 'Unexpected type `%s` for config `%s` in store `%s`. Expecting `%s`.', class(value), strjoin(address, ''), store_name, char(type_check)); + end + catch ME + if strcmp(ME.identifier,'MATLAB:nonExistentField') + % Throw error for extra config + error('DataJoint:StoreConfig:ExtraConfig', ... + 'Unexpected additional config `%s` specified in store `%s`.', strjoin(address, ''), store_name); + else + rethrow(ME); + end + end + else + validateInput(address, target.(fn{k})); + end + address(end) = []; + address(end) = []; + end + end + function validateConfig(address, target) + for k=1:numel(fieldnames(target)) + fn = fieldnames(target); + address{end+1} = '.'; + address{end+1} = fn{k}; + if any(strcmp('required',fieldnames(target))) + address(end) = []; + address(end) = []; + subscript = substruct(address{:}); + vconfig = subsref(validation_config, subscript); + required = vconfig.required; + try + value = subsref(config, subscript); + catch ME + if required && strcmp(ME.identifier,'MATLAB:nonExistentField') + % Throw error for required config + error('DataJoint:StoreConfig:MissingRequired', ... + 'Missing required config `%s` in store `%s`.', strjoin(address, ''), store_name); + elseif strcmp(ME.identifier,'MATLAB:nonExistentField') + % Set default for optional config + default = vconfig.default; + config = subsasgn(config, subscript, default); + end + end + break; + else + validateConfig(address, target.(fn{k})); + end + address(end) = []; + address(end) = []; + end + end + + validateInput({}, config); + validateConfig({}, validation_config); + end + function store_targets = test() + % test correct multi + c = struct('local', ... + struct('store_config', ... + struct(... + 'protocol', 'file', ... + 'location', '/tmp/raphael'... + ), 'type_config', ... + struct(... + 'blob', struct(... + 'subfolding', [3,4], ... + 'cache', '/tmp/cache'... + )... + )... + ), ... + 'remote', ... + struct('store_config', ... + struct(... + 'protocol', 'file', ... + 'location', '/tmp/john'... + )... + )... + ); + +% % test incorrect string used +% c = struct('local', ... +% struct('store_config', ... +% struct(... +% 'protocol', "file", ... +% 'location', '/tmp/raphael'... +% ), 'type_config', ... +% struct(... +% 'blob', struct(... +% 'subfolding', [3,4], ... +% 'cache', '/tmp/cache'... +% )... +% )... +% ), ... +% 'remote', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', '/tmp/john'... +% )... +% )... +% ); + + % % test store not exists +% c = struct('local', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', '/tmp/raphael'... +% ), 'type_config', ... +% struct(... +% 'blob', struct(... +% 'subfolding', [3,4], ... +% 'cache', '/tmp/cache'... +% )... +% )... +% ), ... +% 'remote', ... +% struct('store_config', ... +% struct(... +% 'protocol', 's3', ... +% 'location', '/tmp/john'... +% )... +% )... +% ); + + % % no required config +% c = struct('local', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', '/tmp/raphael'... +% ), 'type_config', ... +% struct(... +% 'blob', struct(... +% 'subfolding', [3,4], ... +% 'cache', '/tmp/cache'... +% )... +% )... +% ), ... +% 'remote', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file' ... +% )... +% )... +% ); + + + % % primary config invalid type +% c = struct('local', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', '/tmp/raphael'... +% ), 'type_config', ... +% struct(... +% 'blob', struct(... +% 'subfolding', [3,4], ... +% 'cache', '/tmp/cache'... +% )... +% )... +% ), ... +% 'remote', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', 10 ... +% )... +% )... +% ); + + % % secondary config invalid type +% c = struct('local', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', '/tmp/raphael'... +% ), 'type_config', ... +% struct(... +% 'blob', struct(... +% 'subfolding', [3,4.2], ... +% 'cache', '/tmp/cache'... +% )... +% )... +% ), ... +% 'remote', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', '/tmp/john'... +% )... +% )... +% ); + + % % extra config +% c = struct('local', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', '/tmp/raphael'... +% ), 'type_config', ... +% struct(... +% 'blob', struct(... +% 'subfolding', [3,4], ... +% 'cache', '/tmp/cache',... +% 'stage', '/tmp/stage'... +% )... +% )... +% ), ... +% 'remote', ... +% struct('store_config', ... +% struct(... +% 'protocol', 'file', ... +% 'location', '/tmp/john'... +% )... +% )... +% ); + + store_map = fieldnames(c); + store_targets = struct(); + for k=1:numel(store_map) + assert(isstruct(c.(store_map{k})), 'Store `%s` not configured as struct.', store_map{k}); + assert(any(strcmp('store_config', fieldnames(c.(store_map{k})))), 'Store `%s` missing `store_config` key.', store_map{k}); + assert(isstruct(c.(store_map{k}).store_config), 'Store `%s` set `store_config` as `%s` but expecting `struct`.', store_map{k}, class(c.(store_map{k}).store_config)); + assert(any(strcmp('protocol', fieldnames(c.(store_map{k}).store_config))), 'Store `%s` missing `store_config.protocol` key.', store_map{k}); + if isstring(c.(store_map{k}).store_config.protocol) + storePlugin = char(c.(store_map{k}).store_config.protocol); + else + assert(ischar(c.(store_map{k}).store_config.protocol), 'Store `%s` set `store_config.protocol` as `%s` but expecting `char||string`.', store_map{k}, class(c.(store_map{k}).store_config.protocol)); + storePlugin = c.(store_map{k}).store_config.protocol; + end + + storePlugin(1) = upper(storePlugin(1)); + try + config = dj.internal.External.buildConfig(c.(store_map{k}), dj.store_plugins.(storePlugin).validation_config, store_map{k}); + store_targets.(store_map{k}) = dj.store_plugins.(storePlugin)(config); + catch ME + if strcmp(ME.identifier,'MATLAB:undefinedVarOrClass') + % Throw error if plugin not found + error('DataJoint:StorePlugin:Missing', ... + 'Missing store plugin `%s`.', storePlugin); + else + rethrow(ME); + end + end + end + end + end +end diff --git a/+dj/+store_plugins/@File/File.m b/+dj/+store_plugins/@File/File.m new file mode 100644 index 00000000..248340f7 --- /dev/null +++ b/+dj/+store_plugins/@File/File.m @@ -0,0 +1,43 @@ +% dj.internal.File - an external storage class for local file stores. +classdef File + properties (Hidden, Constant) + validation_config = struct( ... + 'store_config', struct( ... + 'protocol', struct( ... + 'required', true, ... + 'type_check', @(x) ischar(x) ... + ), ... + 'location', struct( ... + 'required', true, ... + 'type_check', @(x) ischar(x) ... + ) ... + ), ... + 'type_config', struct( ... + 'blob', struct( ... + 'subfolding', struct( ... + 'required', false, ... + 'type_check', @(x) all(floor(x) == x), ... + 'default', [2, 2] ... + ), ... + 'cache', struct( ... + 'required', false, ... + 'type_check', @(x) ischar(x), ... + 'default', [] ... + ) ... + ) ... + ) ... + ) + end + properties + protocol + location + blob_config + end + methods + function file_store = File(config) + file_store.protocol = config.store_config.protocol; + file_store.location = config.store_config.location; + file_store.blob_config = config.type_config.blob; + end + end +end diff --git a/.gitignore b/.gitignore index b48a4ccf..561b275d 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ mym/ *.env notebook *getSchema.m -docker-compose.yml \ No newline at end of file +docker-compose.yml +.vscode \ No newline at end of file From f77e87e52d07e44f923831d94820509438057c13 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Wed, 12 Feb 2020 15:34:29 -0600 Subject: [PATCH 03/21] Add File store methods that are compatible with datajoint-python File store. --- +dj/+internal/External.m | 1 - +dj/+store_plugins/@File/File.m | 42 +++++++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/+dj/+internal/External.m b/+dj/+internal/External.m index 0f36e554..40dced10 100644 --- a/+dj/+internal/External.m +++ b/+dj/+internal/External.m @@ -66,7 +66,6 @@ function validateConfig(address, target) address(end) = []; end end - validateInput({}, config); validateConfig({}, validation_config); end diff --git a/+dj/+store_plugins/@File/File.m b/+dj/+store_plugins/@File/File.m index 248340f7..f53d0d97 100644 --- a/+dj/+store_plugins/@File/File.m +++ b/+dj/+store_plugins/@File/File.m @@ -33,11 +33,45 @@ location blob_config end + methods (Static) + function result = exists(external_filepath) + result = isfile(external_filepath); + end + function remove_object(external_filepath) + delete(external_filepath); + end + function upload_buffer(buffer, external_filepath) + fileID = fopen(external_filepath, 'w'); + fwrite(fileID, buffer); + fclose(fileID); + end + function result = download_buffer(external_filepath) + fileID = fopen(external_filepath, 'r'); + result = fread(fileID); + end + end methods - function file_store = File(config) - file_store.protocol = config.store_config.protocol; - file_store.location = config.store_config.location; - file_store.blob_config = config.type_config.blob; + function self = File(config) + self.protocol = config.store_config.protocol; + self.location = config.store_config.location; + self.blob_config = config.type_config.blob; + end + function external_filepath = make_external_filepath(self, relative_filepath) + external_filepath = [self.location '/' relative_filepath]; end end end + + +%x make_external_filepath -- (validation) (for file use filesystem style +% directly, for s3 convert to posix path) + +%x upload_file -- (for uploading filepath, attach) +%x download_file -- (for downloading filepath, attach) +%x upload_buffer -- (for uploading blob) +%x download_buffer -- (for downloading blob) +%x remove_object -- (for deleting object from storage) +%x exists -- (verify if object exists in storage) + + + From 23652ba24bee7adccfbafece4ac15e94ed58d0aa Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Wed, 26 Feb 2020 09:51:02 -0600 Subject: [PATCH 04/21] [WIP] Add external blob insert and fetch. --- +dj/+internal/Declare.m | 44 ++++++--- +dj/+internal/ExternalMapping.m | 21 +++++ +dj/+internal/ExternalTable.m | 154 ++++++++++++++++++++++++++++++++ +dj/+internal/GeneralRelvar.m | 9 +- +dj/+internal/Header.m | 29 ++++-- +dj/+internal/Table.m | 25 ++++-- +dj/+store_plugins/@File/File.m | 2 + +dj/Connection.m | 6 ++ +dj/Relvar.m | 12 ++- +dj/Schema.m | 3 + +tests/TestFetch.m | 2 +- +tests/TestUuid.m | 4 +- matlab.prf | 12 +++ 13 files changed, 286 insertions(+), 37 deletions(-) create mode 100644 +dj/+internal/ExternalMapping.m create mode 100644 +dj/+internal/ExternalTable.m create mode 100644 matlab.prf diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index d63cb8ed..8d038d71 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -3,19 +3,23 @@ % table definitions, and to declare the corresponding mysql tables. properties(Constant) + UUID_DATA_TYPE = 'binary(16)' CONSTANT_LITERALS = {'CURRENT_TIMESTAMP'} + EXTERNAL_TABLE_ROOT = '~external' TYPE_PATTERN = struct( ... 'NUMERIC', '^((tiny|small|medium|big)?int|decimal|double|float)', ... 'STRING', '^((var)?char|enum|date|(var)?year|time|timestamp)', ... - 'INTERNAL_BLOB', '^(tiny|medium|long)?blob', ... + 'INTERNAL_BLOB', '^(tiny|medium|long)?blob$', ... + 'EXTERNAL_BLOB', 'blob@(?[a-z]\w*)$', ... 'UUID', 'uuid$' ... ) - UUID_DATA_TYPE = 'binary(16)' - SPECIAL_TYPES = {'UUID'} + SPECIAL_TYPES = {'UUID', 'EXTERNAL_BLOB'} + EXTERNAL_TYPES = {'EXTERNAL_BLOB'} % data referenced by a UUID in external tables + SERIALIZED_TYPES = {'EXTERNAL_BLOB'} % requires packing data end methods(Static) - function sql = declare(table_instance, def) + function [sql, external_stores] = declare(table_instance, def) % sql = DECLARE(query, definition) % Parse table declaration and declares the table. % sql: Generated SQL to create a table. @@ -36,12 +40,13 @@ switch true case {isa(table_instance, 'dj.internal.UserRelation'), isa(table_instance, ... - 'dj.Part'), isa(table_instance, 'dj.Jobs')} + 'dj.Part'), isa(table_instance, 'dj.Jobs'), ... + isa(table_instance, 'dj.internal.ExternalTable')} % New-style declaration using special classes for each tier tableInfo = struct; if isa(table_instance, 'dj.Part') tableInfo.tier = 'part'; - else + elseif ~isa(table_instance, 'dj.internal.ExternalTable') specialClass = find(cellfun(@(c) isa(table_instance, c), ... dj.Schema.tierClasses)); assert(length(specialClass)==1, ... @@ -70,11 +75,13 @@ dj.internal.fromCamelCase(table_instance.className(length( ... table_instance.master.className)+1:end)))); %#ok - else + elseif ~isa(table_instance, 'dj.internal.ExternalTable') tableName = sprintf('%s%s%s', ... table_instance.schema.prefix, dj.Schema.tierPrefixes{ ... strcmp(tableInfo.tier, dj.Schema.allowedTiers)}, ... dj.internal.fromCamelCase(tableInfo.className)); + else + tableName = [dj.internal.Declare.EXTERNAL_TABLE_ROOT '_' table_instance.store]; end otherwise @@ -111,6 +118,7 @@ % fields and foreign keys inKey = true; primaryFields = {}; + external_stores = {}; fields = {}; for iLine = 1:length(def) line = def{iLine}; @@ -144,9 +152,11 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name primaryFields{end+1} = fieldInfo.name; %#ok end fields{end+1} = fieldInfo.name; %#ok - sql = sprintf('%s%s', sql, ... - dj.internal.Declare.compileAttribute(fieldInfo)); - + [attr_sql, store] = dj.internal.Declare.compileAttribute(fieldInfo); + sql = sprintf('%s%s', sql, attr_sql); + if ~isempty(store) + external_stores{end+1} = store; %#ok + end otherwise error('Invalid table declaration line "%s"', line) end @@ -178,7 +188,7 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name '^(?[a-z][a-z\d_]*)\s*' % field name ['=\s*(?".*"|''.*''|\w+|[-+]?[0-9]*\.?[0-9]+([eE][-+]?' ... '[0-9]+)?)\s*'] % default value - [':\s*(?\w[\w\s]+(\(.*\))?(\s*[aA][uU][tT][oO]_[iI][nN]' ... + [':\s*(?\w[@\w\s]+(\(.*\))?(\s*[aA][uU][tT][oO]_[iI][nN]' ... '[cC][rR][eE][mM][eE][nN][tT])?)\s*'] % datatype '#(?.*)' % comment '$' % end of line @@ -269,7 +279,8 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name rel.tableHeader.names)); fieldInfo.name = newattrs{i}; fieldInfo.nullabe = ~inKey; % nonprimary references are nullable - sql = sprintf('%s%s', sql, dj.internal.Declare.compileAttribute(fieldInfo)); + [attr_sql, ~] = dj.internal.Declare.compileAttribute(fieldInfo); + sql = sprintf('%s%s', sql, attr_sql); end fkattrs = rel.primaryKey; @@ -288,10 +299,13 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name % category: DataJoint type match based on TYPE_PATTERN. if strcmpi(category, 'UUID') field.type = dj.internal.Declare.UUID_DATA_TYPE; + elseif any(strcmpi(category, dj.internal.Declare.EXTERNAL_TYPES)) + field.store = field.type((strfind(field.type,'@')+1):end); + field.type = dj.internal.Declare.UUID_DATA_TYPE; end end - function sql = compileAttribute(field) + function [sql, store] = compileAttribute(field) % sql = COMPILEATTRIBUTE(field) % Convert the structure field with header {'name' 'type' 'default' 'comment'} % to the SQL column declaration. @@ -317,9 +331,13 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name 'illegal characters in attribute comment "%s"', field.comment) category = dj.internal.Declare.matchType(field.type); + store = []; if any(strcmpi(category, dj.internal.Declare.SPECIAL_TYPES)) field.comment = [':' strip(field.type) ':' field.comment]; field = dj.internal.Declare.substituteSpecialType(field, category); + if isfield(field, 'store') + store = field.store; + end end sql = sprintf('`%s` %s %s COMMENT "%s",\n', ... field.name, strtrim(field.type), default, field.comment); diff --git a/+dj/+internal/ExternalMapping.m b/+dj/+internal/ExternalMapping.m new file mode 100644 index 00000000..05dce9a0 --- /dev/null +++ b/+dj/+internal/ExternalMapping.m @@ -0,0 +1,21 @@ +% dj.internal.External - an external static method class. +classdef ExternalMapping < handle + properties + schema + tables + end + methods + function self = ExternalMapping(schema) + self.schema = schema; + self.tables = struct(); + end + function store_table = table(self, store) + keys = fieldnames(self.tables); + if all(~strcmp(store, keys)) + self.tables.(store) = dj.internal.ExternalTable(... + self.schema.conn, store, self.schema); + end + store_table = self.tables.(store); + end + end +end diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m new file mode 100644 index 00000000..68fed081 --- /dev/null +++ b/+dj/+internal/ExternalTable.m @@ -0,0 +1,154 @@ +% dj.internal.External - an external static method class. +classdef ExternalTable < dj.internal.Table + properties + store + spec + end + properties (Hidden) + connection + end + methods + function self = ExternalTable(connection, store, schema) +% curr_schema = self.schema; + self.store = store; + self.schema = schema; + self.connection = connection; + stores = dj.config('stores'); + assert(isstruct(stores.(store)), 'Store `%s` not configured as struct.', store); + assert(any(strcmp('store_config', fieldnames(stores.(store)))), 'Store `%s` missing `store_config` key.', store); + assert(isstruct(stores.(store).store_config), 'Store `%s` set `store_config` as `%s` but expecting `struct`.', store, class(stores.(store).store_config)); + assert(any(strcmp('protocol', fieldnames(stores.(store).store_config))), 'Store `%s` missing `store_config.protocol` key.', store); + if isstring(stores.(store).store_config.protocol) + storePlugin = char(stores.(store).store_config.protocol); + else + assert(ischar(stores.(store).store_config.protocol), 'Store `%s` set `store_config.protocol` as `%s` but expecting `char||string`.', store, class(stores.(store).store_config.protocol)); + storePlugin = stores.(store).store_config.protocol; + end + + storePlugin(1) = upper(storePlugin(1)); + try + config = buildConfig(stores.(store), dj.store_plugins.(storePlugin).validation_config, store); + self.spec = dj.store_plugins.(storePlugin)(config); + catch ME + if strcmp(ME.identifier,'MATLAB:undefinedVarOrClass') + % Throw error if plugin not found + error('DataJoint:StorePlugin:Missing', ... + 'Missing store plugin `%s`.', storePlugin); + else + rethrow(ME); + end + end + end + function create(self) + % parses the table declration and declares the table + + if self.isCreated + return + end + self.schema.reload % ensure that the table does not already exist + if self.isCreated + return + end + def = {... + '# external storage tracking' + 'hash : uuid # hash of contents (blob), of filename + contents (attach), or relative filepath (filepath)' + '---' + 'size :bigint unsigned # size of object in bytes' + 'attachment_name=null : varchar(255) # the filename of an attachment' + 'filepath=null : varchar(1000) # relative filepath or attachment filename' + 'contents_hash=null : uuid # used for the filepath datatype' + 'timestamp=CURRENT_TIMESTAMP :timestamp # automatic timestamp' + }; + def = sprintf('%s\n',def{:}); + + [sql, ~] = dj.internal.Declare.declare(self, def); + self.schema.conn.query(sql); + self.schema.reload + end + function uuid_path = make_uuid_path(self, uuid, suffix) + uuid_path = self.spec.make_external_filepath([self.schema.dbname '/' strjoin(subfold(uuid, self.spec.blob_config.subfolding), '/') '/' uuid suffix]); + end + function uuid = upload_buffer(self, blob) + uuid = '1d751e2e1e74faf84ab485fde8ef72be'; + packed_cell = mym('serialize {M}', blob); + self.spec.upload_buffer(packed_cell{1}, self.make_uuid_path(uuid, '')); + % insert tracking info + sql = sprintf('INSERT INTO %s (hash, size) VALUES (X''%s'', %s) ON DUPLICATE KEY UPDATE timestamp=CURRENT_TIMESTAMP', self.fullTableName, uuid, length(packed_cell{1})); + self.schema.conn.query(sql); + end + function blob = download_buffer(self, uuid) + blob = mym('deserialize', uint8(self.spec.download_buffer(self.make_uuid_path(uuid, '')))); + end + end +end +function folded_array = subfold(name, folds) + folded_array = arrayfun(@(len,idx,s) name(s-len+1:s), folds, 1:length(folds), cumsum(folds), 'UniformOutput', false); +end +function config = buildConfig(config, validation_config, store_name) + function validateInput(address, target) + for k=1:numel(fieldnames(target)) + fn = fieldnames(target); + address{end+1} = '.'; + address{end+1} = fn{k}; + if ~isstruct(target.(fn{k})) + subscript = substruct(address{:}); + try + value = subsref(config, subscript); + vconfig = subsref(validation_config, subscript); + type_check = vconfig.type_check; + if ~type_check(value) + % Throw error for config that fails type validation + error('DataJoint:StoreConfig:WrongType', ... + 'Unexpected type `%s` for config `%s` in store `%s`. Expecting `%s`.', class(value), strjoin(address, ''), store_name, char(type_check)); + end + catch ME + if strcmp(ME.identifier,'MATLAB:nonExistentField') + % Throw error for extra config + error('DataJoint:StoreConfig:ExtraConfig', ... + 'Unexpected additional config `%s` specified in store `%s`.', strjoin(address, ''), store_name); + else + rethrow(ME); + end + end + else + validateInput(address, target.(fn{k})); + end + address(end) = []; + address(end) = []; + end + end + function validateConfig(address, target) + for k=1:numel(fieldnames(target)) + fn = fieldnames(target); + address{end+1} = '.'; + address{end+1} = fn{k}; + if any(strcmp('required',fieldnames(target))) + address(end) = []; + address(end) = []; + subscript = substruct(address{:}); + vconfig = subsref(validation_config, subscript); + required = vconfig.required; + try + value = subsref(config, subscript); + catch ME + if required && strcmp(ME.identifier,'MATLAB:nonExistentField') + % Throw error for required config + error('DataJoint:StoreConfig:MissingRequired', ... + 'Missing required config `%s` in store `%s`.', strjoin(address, ''), store_name); + elseif strcmp(ME.identifier,'MATLAB:nonExistentField') + % Set default for optional config + default = vconfig.default; + config = subsasgn(config, subscript, default); + end + end + break; + else + validateConfig(address, target.(fn{k})); + end + address(end) = []; + address(end) = []; + end + end + validateInput({}, config); + validateConfig({}, validation_config); +end \ No newline at end of file diff --git a/+dj/+internal/GeneralRelvar.m b/+dj/+internal/GeneralRelvar.m index 7c4c2caa..bfd165e2 100644 --- a/+dj/+internal/GeneralRelvar.m +++ b/+dj/+internal/GeneralRelvar.m @@ -210,7 +210,7 @@ function clip(self) ret = self.conn.query(sprintf('SELECT %s FROM %s%s', ... hdr.sql, sql_, limit)); ret = dj.struct.fromFields(ret); - ret = get(self.header.attributes, ret); + ret = get(self.conn, self.header.attributes, ret); if nargout>1 % return primary key structure array @@ -924,7 +924,7 @@ case isa(cond, 'dj.internal.GeneralRelvar') str = strrep(str, '\', '\\'); end -function data = get(attr, data) +function data = get(connection, attr, data) % data = GET(attr, data) % Process in place fetched data. % data: Fetched records. @@ -940,6 +940,11 @@ case isa(cond, 'dj.internal.GeneralRelvar') new_value(21:end)]; data(j).(attr(i).name) = new_value; end + elseif attr(i).isBlob && attr(i).isExternal + for j = 1:length(data) + uuid = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); + data(j).(attr(i).name) = connection.schemas.(attr(i).database).external.tables.(attr(i).store).download_buffer(uuid); + end end end end \ No newline at end of file diff --git a/+dj/+internal/Header.m b/+dj/+internal/Header.m index 9d62e18b..09bff794 100644 --- a/+dj/+internal/Header.m +++ b/+dj/+internal/Header.m @@ -78,31 +78,39 @@ attrs.isautoincrement = false(length(attrs.isnullable), 1); attrs.isNumeric = false(length(attrs.isnullable), 1); attrs.isString = false(length(attrs.isnullable), 1); - attrs.isBlob = false(length(attrs.isnullable), 1); attrs.isUuid = false(length(attrs.isnullable), 1); + attrs.isBlob = false(length(attrs.isnullable), 1); + attrs.isExternal = false(length(attrs.isnullable), 1); + attrs.database = cell(length(attrs.isnullable),1); + attrs.store = cell(length(attrs.isnullable),1); attrs.alias = cell(length(attrs.isnullable),1); attrs.sqlType = cell(length(attrs.isnullable),1); attrs.sqlComment = cell(length(attrs.isnullable),1); for i = 1:length(attrs.isnullable) + attrs.database{i} = schema.dbname; attrs.sqlType{i} = attrs.type{i}; attrs.sqlComment{i} = attrs.comment{i}; special = regexp(attrs.comment{i}, ':([^:]+):(.*)', 'tokens'); if ~isempty(special) attrs.type{i} = special{1}{1}; attrs.comment{i} = special{1}{2}; + category = dj.internal.Declare.matchType(attrs.type{i}); + assert(any(strcmpi(category, dj.internal.Declare.SPECIAL_TYPES))); + else + category = dj.internal.Declare.matchType(attrs.sqlType{i}); end attrs.isnullable{i} = strcmpi(attrs.isnullable{i}, 'YES'); attrs.iskey{i} = strcmpi(char(attrs.iskey{i}), 'PRI'); attrs.isautoincrement(i) = ~isempty(regexpi(attrs.Extra{i}, ... 'auto_increment', 'once')); - attrs.isNumeric(i) = any(strcmpi( ... - dj.internal.Declare.matchType(attrs.type{i}), {'NUMERIC'})); - attrs.isString(i) = strcmpi(dj.internal.Declare.matchType(attrs.type{i}), ... - 'STRING'); - attrs.isBlob(i) = strcmpi(dj.internal.Declare.matchType(attrs.type{i}), ... - 'INTERNAL_BLOB'); - attrs.isUuid(i) = strcmpi(dj.internal.Declare.matchType(attrs.type{i}), ... - 'UUID'); + attrs.isNumeric(i) = any(strcmpi(category, {'NUMERIC'})); + attrs.isString(i) = strcmpi(category, 'STRING'); + attrs.isUuid(i) = strcmpi(category, 'UUID'); + attrs.isBlob(i) = any(strcmpi(category, {'INTERNAL_BLOB', 'EXTERNAL_BLOB'})); + if any(strcmpi(category, dj.internal.Declare.EXTERNAL_TYPES)) + attrs.isExternal(i) = true; + attrs.store{i} = attrs.type{i}(regexp(attrs.type{i}, '@', 'once')+1:end); + end % strip field lengths off integer types attrs.type{i} = regexprep(sprintf('%s',attrs.type{i}), ... '((tiny|small|medium|big)?int)\(\d+\)','$1'); @@ -170,6 +178,9 @@ function project(self, params) 'isString', false, ... 'isBlob', false, ... 'isUuid', false, ... + 'isExternal', false, ... + 'store', [], ... + 'database', [], ... 'alias', toks{1}{1}, ... 'sqlType', self.computedTypeString, ... 'sqlComment', '' ... diff --git a/+dj/+internal/Table.m b/+dj/+internal/Table.m index 1a631ecb..f7fea273 100755 --- a/+dj/+internal/Table.m +++ b/+dj/+internal/Table.m @@ -20,10 +20,10 @@ properties(SetAccess = protected) className % the name of the corresponding base dj.Relvar class + schema % handle to a schema object end properties(SetAccess = private) - schema % handle to a schema object plainTableName % just the table name tableHeader % attribute information end @@ -54,6 +54,10 @@ name = class(self); if any(strcmp(name,{'dj.Table','dj.Relvar'})) name = ''; + elseif isa(self,'dj.internal.ExternalTable') + store = self.store; + store(1) = upper(store(1)); + name = [self.schema.package '.External' store]; end end end @@ -331,7 +335,7 @@ function addAttribute(self, definition, after) after = [' ' after]; end - sql = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... + [sql, ~] = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... definition)); self.alter(sprintf('ADD COLUMN %s%s', sql(1:end-2), after)); end @@ -346,7 +350,7 @@ function alterAttribute(self, attrName, newDefinition) % dj.Table/alterAttribute - Modify the definition of attribute % attrName using its new line from the table definition % "newDefinition" - sql = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... + [sql, ~] = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... newDefinition)); self.alter(sprintf('CHANGE COLUMN `%s` %s', attrName, sql(1:end-2))); end @@ -582,11 +586,6 @@ function drop(self) methods(Access=private) - function yes = isCreated(self) - yes = self.schema.tableNames.isKey(self.className); - end - - function alter(self, alterStatement) % dj.Table/alter % alter(self, alterStatement) @@ -606,6 +605,10 @@ function alter(self, alterStatement) methods + function yes = isCreated(self) + yes = self.schema.tableNames.isKey(self.className); + end + function create(self) % parses the table declration and declares the table @@ -618,7 +621,11 @@ function create(self) end def = dj.internal.Declare.getDefinition(self); - sql = dj.internal.Declare.declare(self, def); + [sql, external_stores] = dj.internal.Declare.declare(self, def); + for k=1:length(external_stores) + table = self.schema.external.table(external_stores{k}); + table.create; + end self.schema.conn.query(sql); self.schema.reload end diff --git a/+dj/+store_plugins/@File/File.m b/+dj/+store_plugins/@File/File.m index f53d0d97..cd08b466 100644 --- a/+dj/+store_plugins/@File/File.m +++ b/+dj/+store_plugins/@File/File.m @@ -41,6 +41,8 @@ function remove_object(external_filepath) delete(external_filepath); end function upload_buffer(buffer, external_filepath) + [~,start_idx,~] = regexp(external_filepath, '/', 'match', 'start', 'end'); + mkdir(external_filepath(1:(start_idx(end)-1))); fileID = fopen(external_filepath, 'w'); fwrite(fileID, buffer); fclose(fileID); diff --git a/+dj/Connection.m b/+dj/Connection.m index 1d393fa8..05139d2a 100644 --- a/+dj/Connection.m +++ b/+dj/Connection.m @@ -8,6 +8,7 @@ inTransaction = false connId % connection handle packages % maps database names to package names + schemas % registered schema objects % dependency lookups by table name foreignKeys % maps table names to their referenced table names (primary foreign key) @@ -48,9 +49,14 @@ end self.foreignKeys = struct([]); self.packages = containers.Map; + self.schemas = struct(); end + function register(self, schema) + self.schemas.(schema.dbname) = schema; + end + function addPackage(self, dbname, package) self.packages(dbname) = package; end diff --git a/+dj/Relvar.m b/+dj/Relvar.m index 03769f6f..49436c59 100755 --- a/+dj/Relvar.m +++ b/+dj/Relvar.m @@ -228,7 +228,17 @@ function insert(self, tuples, command) decMtx = hex2dec(hexMtx); value = uint8(decMtx); elseif header.attributes(attr_idx).isBlob - placeholder = '"{M}"'; + if ~header.attributes(attr_idx).isExternal + placeholder = '"{M}"'; + else + placeholder = '"{B}"'; + value = self.schema.external.tables.(header.attributes(attr_idx).store).upload_buffer(value); + hexstring = value'; + reshapedString = reshape(hexstring,2,16); + hexMtx = reshapedString.'; + decMtx = hex2dec(hexMtx); + value = uint8(decMtx); + end else assert((isnumeric(value) || islogical(value)) && (isscalar( ... value) || isempty(value)),... diff --git a/+dj/Schema.m b/+dj/Schema.m index 759dd81f..b1aca5a5 100755 --- a/+dj/Schema.m +++ b/+dj/Schema.m @@ -21,6 +21,7 @@ tableNames % tables indexed by classNames headers % dj.internal.Header objects indexed by table names v % virtual class generator + external end @@ -63,6 +64,8 @@ self.headers = containers.Map('KeyType','char','ValueType','any'); self.tableNames = containers.Map('KeyType','char','ValueType','char'); self.v = dj.internal.TableAccessor(self); + self.external = dj.internal.ExternalMapping(self); + conn.register(self); end diff --git a/+tests/TestFetch.m b/+tests/TestFetch.m index 39120208..64ecdbbd 100644 --- a/+tests/TestFetch.m +++ b/+tests/TestFetch.m @@ -37,7 +37,7 @@ function testDescribe(testCase) q = University.All; raw_def = dj.internal.Declare.getDefinition(q); assembled_def = describe(q); - raw_sql = dj.internal.Declare.declare(q, raw_def); + [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); assembled_sql = dj.internal.Declare.declare(q, assembled_def); testCase.verifyEqual(raw_sql, assembled_sql); end diff --git a/+tests/TestUuid.m b/+tests/TestUuid.m index 33b2182d..fa999269 100644 --- a/+tests/TestUuid.m +++ b/+tests/TestUuid.m @@ -57,8 +57,8 @@ function testReverseEngineering(testCase) q = University.Message; raw_def = dj.internal.Declare.getDefinition(q); assembled_def = describe(q); - raw_sql = dj.internal.Declare.declare(q, raw_def); - assembled_sql = dj.internal.Declare.declare(q, assembled_def); + [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); + [assembled_sql, ~] = dj.internal.Declare.declare(q, assembled_def); testCase.verifyEqual(raw_sql, assembled_sql); end end diff --git a/matlab.prf b/matlab.prf new file mode 100644 index 00000000..c2801867 --- /dev/null +++ b/matlab.prf @@ -0,0 +1,12 @@ +#MATLAB Preferences +#Thu Feb 13 17:52:58 UTC 2020 +MatlabExitConfirm=Bfalse +RLOrderB2_WB_2014b_1=S0:1: +RLHiddenB2_WB_2014b_1=I16376 +CommandWindowClearConfirmation=Btrue +CurrentKeyBindingSet=SWindows +RLWidthB2_WB_2014b_1_1=I130 +RLWidthB2_WB_2014b_1_0=I130 +RLPrevInitB2_WB_2014b_1=Btrue +NV_08002751cda240809722=S1584207724571 +HasAppendedMlxToFileExtensionsKey=Btrue \ No newline at end of file From 645383c4af2e24f92c9fd6e01f54e796fd1b006f Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 3 Mar 2020 13:13:26 -0600 Subject: [PATCH 05/21] [WIP] Add external storage infrastructure. --- +dj/+internal/Declare.m | 263 +++++++++- +dj/+internal/ExternalTable.m | 58 ++- +dj/+internal/GeneralRelvar.m | 30 +- +dj/+internal/Table.m | 19 +- +dj/+lib/DataHash-license.txt | 1 - +dj/+lib/DataHash.m | 933 +++++++++++++++++----------------- +dj/Relvar.m | 6 +- +tests/TestFetch.m | 4 +- +tests/TestUuid.m | 4 +- 9 files changed, 820 insertions(+), 498 deletions(-) delete mode 100644 +dj/+lib/DataHash-license.txt diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index 8d038d71..c85c23b5 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -19,6 +19,260 @@ end methods(Static) + function [sql, newattrs] = makeFK2(line, existingFields, inKey, hash) + % [sql, newattrs] = MAKEFK(sql, line, existingFields, inKey, hash) + % Add foreign key to SQL table definition. + % sql: Modified in-place SQL to include foreign keys. + % newattrs: Extracted new field attributes. + % line: DataJoint definition, single line. + % existingFields: Existing field attributes. + % inKey: Set as primary key. + % hash: Current hash as base. + sql = ''; + pat = ['^(?\([\s\w,]*\))?' ... + '\s*->\s*' ... + '(?\w+\.[A-Z][A-Za-z0-9]*)' ... + '\w*' ... + '(?\([\s\w,]*\))?' ... + '\s*(#.*)?$']; + fk = regexp(line, pat, 'names'); + if exist(fk.cname, 'class') + rel = feval(fk.cname); + assert(isa(rel, 'dj.Relvar'), 'class %s is not a DataJoint relation', fk.cname) + else + rel = dj.Relvar(fk.cname); + end + + % parse and validate the attribute lists + attrs = strsplit(fk.attrs, {' ',',','(',')'}); + newattrs = strsplit(fk.newattrs, {' ',',','(',')'}); + attrs(cellfun(@isempty, attrs))=[]; + newattrs(cellfun(@isempty, newattrs))=[]; + assert(all(cellfun(@(a) ismember(a, rel.primaryKey), attrs)), ... + 'All attributes in (%s) must be in the primary key of %s', ... + strjoin(attrs, ','), rel.className) + if length(newattrs)==1 + % unambiguous single attribute + if length(rel.primaryKey)==1 + attrs = rel.primaryKey; + elseif isempty(attrs) && length(setdiff(rel.primaryKey, existingFields))==1 + attrs = setdiff(rel.primaryKey, existingFields); + end + end + assert(length(attrs) == length(newattrs) , ... + 'Mapped fields (%s) and (%s) must match in the foreign key.', ... + strjoin(newattrs,','), strjoin(attrs,',')) + + % prepend unspecified primary key attributes that have not yet been + % included + pk = rel.primaryKey; + pk(ismember(pk,attrs) | ismember(pk,existingFields))=[]; + attrs = [pk attrs]; + newattrs = [pk newattrs]; + + % fromFields and toFields are sorted in the same order as + % ref.rel.tableHeader.attributes + [~, ix] = sort(cellfun(@(a) find(strcmp(a, rel.primaryKey)), attrs)); + attrs = attrs(ix); + newattrs = newattrs(ix); + + for i=1:length(attrs) + fieldInfo = rel.tableHeader.attributes(strcmp(attrs{i}, ... + rel.tableHeader.names)); + fieldInfo.name = newattrs{i}; + fieldInfo.nullabe = ~inKey; % nonprimary references are nullable + [attr_sql, ~, ~] = dj.internal.Declare.compileAttribute(fieldInfo, NaN); + sql = sprintf('%s%s,\n', sql, attr_sql); + end + + fkattrs = rel.primaryKey; + fkattrs(ismember(fkattrs, attrs))=newattrs; + hash = dj.internal.shorthash([{hash rel.fullTableName} newattrs]); + sql = sprintf(... + ['%sCONSTRAINT `%s` FOREIGN KEY (%s) REFERENCES %s (%s) ' ... + 'ON UPDATE CASCADE ON DELETE RESTRICT'], sql, hash, backquotedList(fkattrs), ... + rel.fullTableName, backquotedList(rel.primaryKey)); + end + function [sql, external_stores] = declare2(table_instance, def) + % sql = DECLARE(query, definition) + % Parse table declaration and declares the table. + % sql: Generated SQL to create a table. + % query: DataJoint Table instance. + % definition: DataJoint Table definition. + + def = strrep(def, '%{', ''); + def = strrep(def, '%}', ''); + def = strtrim(regexp(def,'\n','split')'); + + % append the next line to lines that end in a backslash + for i=find(cellfun(@(x) ~isempty(x) && x(end)=='\', def')) + def{i} = [def{i}(1:end-1) ' ' def{i+1}]; + def(i+1) = ''; + end + + % parse table schema, name, type, and comment + switch true + + case {isa(table_instance, 'dj.internal.UserRelation'), isa(table_instance, ... + 'dj.Part'), isa(table_instance, 'dj.Jobs'), ... + isa(table_instance, 'dj.internal.ExternalTable')} + % New-style declaration using special classes for each tier + tableInfo = struct; + if isa(table_instance, 'dj.Part') + tableInfo.tier = 'part'; + elseif ~isa(table_instance, 'dj.internal.ExternalTable') + specialClass = find(cellfun(@(c) isa(table_instance, c), ... + dj.Schema.tierClasses)); + assert(length(specialClass)==1, ... + 'Unknown type of UserRelation in %s', class(table_instance)) + tableInfo.tier = dj.Schema.allowedTiers{specialClass}; + end + % remove empty lines + def(cellfun(@(x) isempty(x), def)) = []; + if strncmp(def{1}, '#', 1) + tableInfo.comment = strtrim(def{1}(2:end)); + def = def(2:end); + else + tableInfo.comment = ''; + end + % remove pure comments + def(cellfun(@(x) strncmp('#',strtrim(x),1), def)) = []; + cname = strsplit(table_instance.className, '.'); + tableInfo.package = strjoin(cname(1:end-1), '.'); + tableInfo.className = cname{end}; + if isa(table_instance, 'dj.Part') + tableName = sprintf('%s%s%s', ... + table_instance.schema.prefix, ... + dj.Schema.tierPrefixes{strcmp(tableInfo.tier, ... + dj.Schema.allowedTiers)}, sprintf('%s__%s', ... + table_instance.master.plainTableName, ... + dj.internal.fromCamelCase(table_instance.className(length( ... + table_instance.master.className)+1:end)))); + %#ok + elseif ~isa(table_instance, 'dj.internal.ExternalTable') + tableName = sprintf('%s%s%s', ... + table_instance.schema.prefix, dj.Schema.tierPrefixes{ ... + strcmp(tableInfo.tier, dj.Schema.allowedTiers)}, ... + dj.internal.fromCamelCase(tableInfo.className)); + else + tableName = [dj.internal.Declare.EXTERNAL_TABLE_ROOT '_' table_instance.store]; + end + + otherwise + % Old-style declaration for backward compatibility + + % remove empty lines and pure comment lines + def(cellfun(@(x) isempty(x) || strncmp('#',x,1), def)) = []; + firstLine = strtrim(def{1}); + def = def(2:end); + pat = { + '^(?\w+)\.(?\w+)\s*' % package.TableName + '\(\s*(?\w+)\s*\)\s*' % (tier) + '#\s*(?.*)$' % # comment + }; + tableInfo = regexp(firstLine, cat(2,pat{:}), 'names'); + assert(numel(tableInfo)==1, ... + ['invalidTableDeclaration:Incorrect syntax in table declaration, ' ... + 'line 1: \n %s'], firstLine) + assert(ismember(tableInfo.tier, dj.Schema.allowedTiers),... + 'invalidTableTier:Invalid tier for table ', tableInfo.className) + cname = sprintf('%s.%s', tableInfo.package, tableInfo.className); + assert(strcmp(cname, table_instance.className), ... + 'Table name %s does not match in file %s', cname, ... + table_instance.className) + tableName = sprintf('%s%s%s', table_instance.schema.prefix, ... + dj.Schema.tierPrefixes{strcmp(tableInfo.tier, ... + dj.Schema.allowedTiers)}, dj.internal.fromCamelCase( ... + stableInfo.className)); + end + +% sql = sprintf('CREATE TABLE `%s`.`%s` (\n', table_instance.schema.dbname, ... +% tableName); + + % fields and foreign keys + inKey = true; + primaryFields = {}; + foreignKeySql = {}; + indexSql = {}; + attributeSql = {}; + external_stores = {}; + fields = {}; + for iLine = 1:length(def) + line = def{iLine}; + switch true + case strncmp(line,'---',3) + inKey = false; + % foreign key + case regexp(line, '^(\s*\([^)]+\)\s*)?->.+$') +% [sql, newFields] = dj.internal.Declare.makeFK2( ... +% sql, line, fields, inKey, ... +% dj.internal.shorthash(sprintf('`%s`.`%s`', ... +% table_instance.schema.dbname, tableName))); +% sql = sprintf('%s,\n', sql); + [fk_sql, newFields] = dj.internal.Declare.makeFK2( ... + line, fields, inKey, ... + dj.internal.shorthash(sprintf('`%s`.`%s`', ... + table_instance.schema.dbname, tableName))); + foreignKeySql = [foreignKeySql, fk_sql]; %#ok + fields = [fields, newFields]; %#ok + if inKey + primaryFields = [primaryFields, newFields]; %#ok + end + + % index + case regexpi(line, '^(unique\s+)?index[^:]*$') +% sql = sprintf('%s%s,\n', sql, line); % add checks + indexSql = [indexSql, line]; %#ok + + % attribute + case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name + '(=\s*\S+(\s+\S+)*\s*)?' ... % opt. default + ':\s*\w.*$']) % type, comment + fieldInfo = dj.internal.Declare.parseAttrDef(line); + assert(~inKey || ~fieldInfo.isnullable, ... + 'primary key attributes cannot be nullable') + if inKey + primaryFields{end+1} = fieldInfo.name; %#ok + end + fields{end+1} = fieldInfo.name; %#ok + [attr_sql, store, foreignKeySql] = dj.internal.Declare.compileAttribute(fieldInfo, foreignKeySql); +% sql = sprintf('%s%s', sql, attr_sql); + attributeSql = [attributeSql, attr_sql]; %#ok + if ~isempty(store) + external_stores{end+1} = store; %#ok + end + otherwise + error('Invalid table declaration line "%s"', line) + end + end + +% % add primary key declaration +% assert(~isempty(primaryFields), 'table must have a primary key') +% sql = sprintf('%sPRIMARY KEY (%s),\n' ,sql, backquotedList(primaryFields)); +% +% % finish the declaration +% sql = sprintf('%s\n) ENGINE = InnoDB, COMMENT "%s"', sql(1:end-2), ... +% tableInfo.comment); + + create_sql = sprintf('CREATE TABLE `%s`.`%s` (\n', table_instance.schema.dbname, tableName); + table_sql = {attributeSql', {['PRIMARY KEY (`' strjoin(primaryFields, '`,`') '`)']}, foreignKeySql', indexSql'}; + table_sql = sprintf([strjoin(cat(1, table_sql{:}), ',\n') '\n']); + engine_sql = sprintf(') ENGINE = InnoDB, COMMENT "%s"', tableInfo.comment); + + sql = sprintf('%s%s%s', create_sql, table_sql, engine_sql); + + + % execute declaration + fprintf \n\n + fprintf(sql) + fprintf \n\n\n + end + % table_comment => tableInfo.comment + % primary_key => tableInfo.primary_key + % attribute_sql => tableInfo.attribute_sql + % foreign_key_sql => tableInfo.foreign_key_sql + % index_sql => tableInfo.index_sql + % external_stores => tableInfo.external_stores function [sql, external_stores] = declare(table_instance, def) % sql = DECLARE(query, definition) % Parse table declaration and declares the table. @@ -292,7 +546,7 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name rel.fullTableName, backquotedList(rel.primaryKey)); end - function field = substituteSpecialType(field, category) + function [field, foreignKeySql] = substituteSpecialType(field, category, foreignKeySql) % field = SUBSTITUTESPECIALTYPE(field, category) % Substitute DataJoint type with sql type. % field: Modified in-place field attributes. @@ -302,10 +556,11 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name elseif any(strcmpi(category, dj.internal.Declare.EXTERNAL_TYPES)) field.store = field.type((strfind(field.type,'@')+1):end); field.type = dj.internal.Declare.UUID_DATA_TYPE; + foreignKeySql = [foreignKeySql, sprintf('FOREIGN KEY (`%s`) REFERENCES `{database}`.`%s_%s` (`hash`) ON UPDATE RESTRICT ON DELETE RESTRICT', field.name, dj.internal.Declare.EXTERNAL_TABLE_ROOT, field.store)]; %#ok end end - function [sql, store] = compileAttribute(field) + function [sql, store, foreignKeySql] = compileAttribute(field, foreignKeySql) % sql = COMPILEATTRIBUTE(field) % Convert the structure field with header {'name' 'type' 'default' 'comment'} % to the SQL column declaration. @@ -334,12 +589,12 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name store = []; if any(strcmpi(category, dj.internal.Declare.SPECIAL_TYPES)) field.comment = [':' strip(field.type) ':' field.comment]; - field = dj.internal.Declare.substituteSpecialType(field, category); + [field, foreignKeySql] = dj.internal.Declare.substituteSpecialType(field, category, foreignKeySql); if isfield(field, 'store') store = field.store; end end - sql = sprintf('`%s` %s %s COMMENT "%s",\n', ... + sql = sprintf('`%s` %s %s COMMENT "%s"', ... field.name, strtrim(field.type), default, field.comment); end diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m index 68fed081..92fc33f7 100644 --- a/+dj/+internal/ExternalTable.m +++ b/+dj/+internal/ExternalTable.m @@ -1,5 +1,6 @@ % dj.internal.External - an external static method class. -classdef ExternalTable < dj.internal.Table +% classdef ExternalTable < dj.internal.Table +classdef ExternalTable < dj.Relvar properties store spec @@ -61,24 +62,71 @@ function create(self) }; def = sprintf('%s\n',def{:}); - [sql, ~] = dj.internal.Declare.declare(self, def); + [sql, ~] = dj.internal.Declare.declare2(self, def); self.schema.conn.query(sql); self.schema.reload end function uuid_path = make_uuid_path(self, uuid, suffix) + uuid = strrep(uuid, '-', ''); uuid_path = self.spec.make_external_filepath([self.schema.dbname '/' strjoin(subfold(uuid, self.spec.blob_config.subfolding), '/') '/' uuid suffix]); end function uuid = upload_buffer(self, blob) - uuid = '1d751e2e1e74faf84ab485fde8ef72be'; packed_cell = mym('serialize {M}', blob); + % https://www.mathworks.com/matlabcentral/fileexchange/25921-getmd5 + uuid = dj.lib.DataHash(packed_cell{1}, 'bin', 'hex', 'MD5'); self.spec.upload_buffer(packed_cell{1}, self.make_uuid_path(uuid, '')); % insert tracking info - sql = sprintf('INSERT INTO %s (hash, size) VALUES (X''%s'', %s) ON DUPLICATE KEY UPDATE timestamp=CURRENT_TIMESTAMP', self.fullTableName, uuid, length(packed_cell{1})); - self.schema.conn.query(sql); + sql = sprintf('INSERT INTO %s (hash, size) VALUES (X''%s'', %i) ON DUPLICATE KEY UPDATE timestamp=CURRENT_TIMESTAMP', self.fullTableName, uuid, length(packed_cell{1})); + self.connection.query(sql); end function blob = download_buffer(self, uuid) blob = mym('deserialize', uint8(self.spec.download_buffer(self.make_uuid_path(uuid, '')))); end + function refs = references(self) + sql = {... + 'SELECT concat(''`'', table_schema, ''`.`'', table_name, ''`'') as referencing_table, column_name ' + 'FROM information_schema.key_column_usage ' + 'WHERE referenced_table_name="{S}" and referenced_table_schema="{S}"' + }; + sql = sprintf('%s',sql{:}); + refs = self.connection.query(sql, self.plainTableName, self.schema.dbname); + end + function used = used(self) + ref = self.references; + used = self & cellfun(@(column, table) sprintf('hex(`hash`) in (select hex(`%s`) from %s)', column, table), ref.column_name, ref.referencing_table, 'UniformOutput', false); + end + function unused = unused(self) + ref = self.references; + unused = self - cellfun(@(column, table) sprintf('hex(`hash`) in (select hex(`%s`) from %s)', column, table), ref.column_name, ref.referencing_table, 'UniformOutput', false); + end + function paths = fetch_external_paths(self, varargin) + external_content = fetch(self, 'hash', 'attachment_name', 'filepath', varargin{:}); + paths = cell(length(external_content),1); + for i = 1:length(external_content) + if ~isempty(external_content(i).attachment_name) + elseif ~isempty(external_content(i).filepath) + else + paths{i}{2} = self.make_uuid_path(external_content(i).hash, ''); + end + paths{i}{1} = external_content(i).hash; + end + end + function delete(self, delete_external_files, limit) + if ~delete_external_files + delQuick(self.unused); + else + if ~isempty(limit) + items = fetch_external_paths(self.unused, sprintf('LIMIT %i', limit)); + else + items = fetch_external_paths(self.unused); + end + for i = 1:length(items) + count = delQuick(self & struct('hash',items{i}{1}), true); + assert(count == 0); + self.spec.remove_object(items{i}{2}); + end + end + end end end function folded_array = subfold(name, folds) diff --git a/+dj/+internal/GeneralRelvar.m b/+dj/+internal/GeneralRelvar.m index bfd165e2..fab70440 100644 --- a/+dj/+internal/GeneralRelvar.m +++ b/+dj/+internal/GeneralRelvar.m @@ -433,10 +433,10 @@ function restrict(self, varargin) function ret = minus(self, arg) % MINUS -- relational antijoin - if iscell(arg) - throwAsCaller(MException('DataJoint:invalidOperator',... - 'Antijoin only accepts single restrictions')) - end +% if iscell(arg) +% throwAsCaller(MException('DataJoint:invalidOperator',... +% 'Antijoin only accepts single restrictions')) +% end ret = self.copy; ret.restrict('not', arg) end @@ -932,18 +932,22 @@ case isa(cond, 'dj.internal.GeneralRelvar') for i = 1:length(attr) if attr(i).isUuid for j = 1:length(data) - new_value = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); - new_value = [new_value(1:8) '-' ... - new_value(9:12) '-' ... - new_value(13:16) '-' ... - new_value(17:20) '-' ... - new_value(21:end)]; - data(j).(attr(i).name) = new_value; + if ~isempty(data(j).(attr(i).name)) + new_value = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); + new_value = [new_value(1:8) '-' ... + new_value(9:12) '-' ... + new_value(13:16) '-' ... + new_value(17:20) '-' ... + new_value(21:end)]; + data(j).(attr(i).name) = new_value; + end end elseif attr(i).isBlob && attr(i).isExternal for j = 1:length(data) - uuid = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); - data(j).(attr(i).name) = connection.schemas.(attr(i).database).external.tables.(attr(i).store).download_buffer(uuid); + if ~isempty(data(j).(attr(i).name)) + uuid = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); + data(j).(attr(i).name) = connection.schemas.(attr(i).database).external.tables.(attr(i).store).download_buffer(uuid); + end end end end diff --git a/+dj/+internal/Table.m b/+dj/+internal/Table.m index f7fea273..9730e53d 100755 --- a/+dj/+internal/Table.m +++ b/+dj/+internal/Table.m @@ -248,7 +248,7 @@ function erd(self, up, down) % get foreign keys fk = self.schema.conn.foreignKeys; if ~isempty(fk) - fk = fk(arrayfun(@(s) strcmp(s.from, self.fullTableName), fk)); + fk = fk(arrayfun(@(s) strcmp(s.from, self.fullTableName) && ~contains(s.ref, '~external'), fk)); end attributes_thus_far = {}; @@ -335,9 +335,9 @@ function addAttribute(self, definition, after) after = [' ' after]; end - [sql, ~] = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... - definition)); - self.alter(sprintf('ADD COLUMN %s%s', sql(1:end-2), after)); + [sql, ~, ~] = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... + definition), NaN); + self.alter(sprintf('ADD COLUMN %s%s', sql, after)); end function dropAttribute(self, attrName) @@ -350,9 +350,9 @@ function alterAttribute(self, attrName, newDefinition) % dj.Table/alterAttribute - Modify the definition of attribute % attrName using its new line from the table definition % "newDefinition" - [sql, ~] = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... - newDefinition)); - self.alter(sprintf('CHANGE COLUMN `%s` %s', attrName, sql(1:end-2))); + [sql, ~, ~] = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... + newDefinition), NaN); + self.alter(sprintf('CHANGE COLUMN `%s` %s', attrName, sql)); end function addForeignKey(self, target) @@ -368,7 +368,7 @@ function addForeignKey(self, target) if isa(target, 'dj.Table') target = sprintf('->%s', target.className); end - sql = dj.internal.Declare.makeFK('', target, self.primaryKey, ... + sql = dj.internal.Declare.makeFK2('', target, self.primaryKey, ... true, dj.internal.shorthash(self.fullTableName)); self.alter(sprintf('ADD %s', sql)) end @@ -621,7 +621,8 @@ function create(self) end def = dj.internal.Declare.getDefinition(self); - [sql, external_stores] = dj.internal.Declare.declare(self, def); + [sql, external_stores] = dj.internal.Declare.declare2(self, def); + sql = strrep(sql, '{database}', self.schema.dbname); for k=1:length(external_stores) table = self.schema.external.table(external_stores{k}); table.create; diff --git a/+dj/+lib/DataHash-license.txt b/+dj/+lib/DataHash-license.txt deleted file mode 100644 index 8b137891..00000000 --- a/+dj/+lib/DataHash-license.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/+dj/+lib/DataHash.m b/+dj/+lib/DataHash.m index 6809618d..a47847a2 100644 --- a/+dj/+lib/DataHash.m +++ b/+dj/+lib/DataHash.m @@ -1,484 +1,495 @@ -function Hash = DataHash(Data, Opt) -% DATAHASH - Checksum for Matlab array of any type -% This function creates a hash value for an input of any type. The type and -% dimensions of the input are considered as default, such that UINT8([0,0]) and -% UINT16(0) have different hash values. Nested STRUCTs and CELLs are parsed -% recursively. -% -% Hash = DataHash(Data, Opt) -% INPUT: -% Data: Array of these built-in types: -% (U)INT8/16/32/64, SINGLE, DOUBLE, (real/complex, full/sparse) -% CHAR, LOGICAL, CELL (nested), STRUCT (scalar or array, nested), -% function_handle. -% Opt: Struct to specify the hashing algorithm and the output format. -% Opt and all its fields are optional. -% Opt.Method: String, known methods for Java 1.6 (Matlab 2011b): -% 'SHA-1', 'SHA-256', 'SHA-384', 'SHA-512', 'MD2', 'MD5'. -% Call DataHash without inputs to get a list of available methods. -% Default: 'MD5'. -% Opt.Format: String specifying the output format: -% 'hex', 'HEX': Lower/uppercase hexadecimal string. -% 'double', 'uint8': Numerical vector. -% 'base64': Base64 encoded string, only printable ASCII -% characters, shorter than 'hex', no padding. -% Default: 'hex'. -% Opt.Input: Type of the input as string, not case-sensitive: -% 'array': The contents, type and size of the input [Data] are -% considered for the creation of the hash. Nested CELLs -% and STRUCT arrays are parsed recursively. Empty arrays of -% different type reply different hashs. -% 'file': [Data] is treated as file name and the hash is calculated -% for the files contents. -% 'bin': [Data] is a numerical, LOGICAL or CHAR array. Only the -% binary contents of the array is considered, such that -% e.g. empty arrays of different type reply the same hash. -% 'ascii': Same as 'bin', but only the 8-bit ASCII part of the 16-bit -% Matlab CHARs is considered. -% Default: 'array'. -% -% OUTPUT: -% Hash: String, DOUBLE or UINT8 vector. The length depends on the hashing -% method. -% -% EXAMPLES: -% % Default: MD5, hex: -% DataHash([]) % 5b302b7b2099a97ba2a276640a192485 -% % MD5, Base64: -% Opt = struct('Format', 'base64', 'Method', 'MD5'); -% DataHash(int32(1:10), Opt) % +tJN9yeF89h3jOFNN55XLg -% % SHA-1, Base64: -% S.a = uint8([]); -% S.b = {{1:10}, struct('q', uint64(415))}; -% Opt.Method = 'SHA-1'; -% Opt.Format = 'HEX'; -% DataHash(S, Opt) % 18672BE876463B25214CA9241B3C79CC926F3093 -% % SHA-1 of binary values: -% Opt = struct('Method', 'SHA-1', 'Input', 'bin'); -% DataHash(1:8, Opt) % 826cf9d3a5d74bbe415e97d4cecf03f445f69225 -% % SHA-256, consider ASCII part only (Matlab's CHAR has 16 bits!): -% Opt.Method = 'SHA-256'; -% Opt.Input = 'ascii'; -% DataHash('abc', Opt) -% % ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad -% % Or equivalently: -% Opt.Input = 'bin'; -% DataHash(uint8('abc'), Opt) -% -% NOTES: -% Function handles and user-defined objects cannot be converted uniquely: -% - The subfunction ConvertFuncHandle uses the built-in function FUNCTIONS, -% but the replied struct can depend on the Matlab version. -% - It is tried to convert objects to UINT8 streams in the subfunction -% ConvertObject. A conversion by STRUCT() might be more appropriate. -% Adjust these subfunctions on demand. -% -% MATLAB CHARs have 16 bits! Use Opt.Input='ascii' for comparisons with e.g. -% online hash generators. -% -% Matt Raum suggested this for e.g. user-defined objects: -% DataHash(getByteStreamFromArray(Data) -% This works very well, but unfortunately getByteStreamFromArray is -% undocumented, such that it might vanish in the future or reply different -% output. -% -% For arrays the calculated hash value might be changed in new versions. -% Calling this function without inputs replies the version of the hash. -% -% The C-Mex function GetMD5 is 2 to 100 times faster, but obtains MD5 only: -% http://www.mathworks.com/matlabcentral/fileexchange/25921 -% -% Tested: Matlab 7.7, 7.8, 7.13, 8.6, WinXP/32, Win7/64 -% Author: Jan Simon, Heidelberg, (C) 2011-2016 matlab.2010(a)n(MINUS)simon.de -% -% See also: TYPECAST, CAST. -% -% Michael Kleder, "Compute Hash", no structs and cells: -% http://www.mathworks.com/matlabcentral/fileexchange/8944 -% Tim, "Serialize/Deserialize", converts structs and cells to a byte stream: -% http://www.mathworks.com/matlabcentral/fileexchange/29457 - -% $JRev: R-H V:033 Sum:R+m7rAPNLvlw Date:18-Jun-2016 14:33:17 $ -% $License: BSD (use/copy/change/redistribute on own risk, mention the author) $ -% $File: Tools\GLFile\DataHash.m $ -% History: -% 001: 01-May-2011 21:52, First version. -% 007: 10-Jun-2011 10:38, [Opt.Input], binary data, complex values considered. -% 011: 26-May-2012 15:57, Fixed: Failed for binary input and empty data. -% 014: 04-Nov-2012 11:37, Consider Mex-, MDL- and P-files also. -% Thanks to David (author 243360), who found this bug. -% Jan Achterhold (author 267816) suggested to consider Java objects. -% 016: 01-Feb-2015 20:53, Java heap space exhausted for large files. -% Now files are process in chunks to save memory. -% 017: 15-Feb-2015 19:40, Collsions: Same hash for different data. -% Examples: zeros(1,1) and zeros(1,1,0) -% complex(0) and zeros(1,1,0,0) -% Now the number of dimensions is included, to avoid this. -% 022: 30-Mar-2015 00:04, Bugfix: Failed for strings and [] without TYPECASTX. -% Ross found these 2 bugs, which occur when TYPECASTX is not installed. -% If you need the base64 format padded with '=' characters, adjust -% fBase64_enc as you like. -% 026: 29-Jun-2015 00:13, Changed hash for STRUCTs. -% Struct arrays are analysed field by field now, which is much faster. -% 027: 13-Sep-2015 19:03, 'ascii' input as abbrev. for Input='bin' and UINT8(). -% 028: 15-Oct-2015 23:11, Example values in help section updated to v022. -% 029: 16-Oct-2015 22:32, Use default options for empty input. -% 031: 28-Feb-2016 15:10, New hash value to get same reply as GetMD5. -% New Matlab version (at least 2015b) use a fast method for TYPECAST, such -% that calling James Tursa's TYPECASTX is not needed anymore. -% Matlab 6.5 not supported anymore: MException for CATCH. -% 033: 18-Jun-2016 14:28, BUGFIX: Failed on empty files. -% Thanks to Christian (AuthorID 2918599). - -% OPEN BUGS: -% Nath wrote: -% function handle refering to struct containing the function will create -% infinite loop. Is there any workaround ? -% Example: -% d= dynamicprops(); -% addprop(d,'f'); -% d.f= @(varargin) struct2cell(d); -% DataHash(d.f) % infinite loop -% This is caught with an error message concerning the recursion limit now. - -% Main function: =============================================================== -% Default options: ------------------------------------------------------------- -Method = 'MD5'; -OutFormat = 'hex'; -isFile = false; -isBin = false; - -% Check number and type of inputs: --------------------------------------------- -nArg = nargin; -if nArg == 2 - if isa(Opt, 'struct') == 0 % Bad type of 2nd input: - Error_L('BadInput2', '2nd input [Opt] must be a struct.'); - end - - % Specify hash algorithm: - if isfield(Opt, 'Method') && ~isempty(Opt.Method) % Short-circuiting - Method = upper(Opt.Method); +function Hash = DataHash(Data, varargin) + % DATAHASH - Checksum for Matlab array of any type + % This function creates a hash value for an input of any type. The type and + % dimensions of the input are considered as default, such that UINT8([0,0]) and + % UINT16(0) have different hash values. Nested STRUCTs and CELLs are parsed + % recursively. + % + % Hash = DataHash(Data, Opts...) + % INPUT: + % Data: Array of these built-in types: + % (U)INT8/16/32/64, SINGLE, DOUBLE, (real/complex, full/sparse) + % CHAR, LOGICAL, CELL (nested), STRUCT (scalar or array, nested), + % function_handle, string. + % Opts: Char strings to specify the method, the input and theoutput types: + % Input types: + % 'array': The contents, type and size of the input [Data] are + % considered for the creation of the hash. Nested CELLs + % and STRUCT arrays are parsed recursively. Empty arrays of + % different type reply different hashs. + % 'file': [Data] is treated as file name and the hash is calculated + % for the files contents. + % 'bin': [Data] is a numerical, LOGICAL or CHAR array. Only the + % binary contents of the array is considered, such that + % e.g. empty arrays of different type reply the same hash. + % 'ascii': Same as 'bin', but only the 8-bit ASCII part of the 16-bit + % Matlab CHARs is considered. + % Output types: + % 'hex', 'HEX': Lower/uppercase hexadecimal string. + % 'double', 'uint8': Numerical vector. + % 'base64': Base64. + % 'short': Base64 without padding. + % Hashing method: + % 'SHA-1', 'SHA-256', 'SHA-384', 'SHA-512', 'MD2', 'MD5'. + % Call DataHash without inputs to get a list of available methods. + % + % Default: 'MD5', 'hex', 'array' + % + % OUTPUT: + % Hash: String, DOUBLE or UINT8 vector. The length depends on the hashing + % method. + % If DataHash is called without inputs, a struct is replied: + % .HashVersion: Version number of the hashing method of this tool. In + % case of bugs or additions, the output can change. + % .Date: Date of release of the current HashVersion. + % .HashMethod: Cell string of the recognized hash methods. + % + % EXAMPLES: + % % Default: MD5, hex: + % DataHash([]) % 5b302b7b2099a97ba2a276640a192485 + % % MD5, Base64: + % DataHash(int32(1:10), 'short', 'MD5') % +tJN9yeF89h3jOFNN55XLg + % % SHA-1, Base64: + % S.a = uint8([]); + % S.b = {{1:10}, struct('q', uint64(415))}; + % DataHash(S, 'SHA-1', 'HEX') % 18672BE876463B25214CA9241B3C79CC926F3093 + % % SHA-1 of binary values: + % DataHash(1:8, 'SHA-1', 'bin') % 826cf9d3a5d74bbe415e97d4cecf03f445f69225 + % % SHA-256, consider ASCII part only (Matlab's CHAR has 16 bits!): + % DataHash('abc', 'SHA-256', 'ascii') + % % ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad + % % Or equivalently by converting the input to UINT8: + % DataHash(uint8('abc'), 'SHA-256', 'bin') + % + % NOTES: + % Function handles and user-defined objects cannot be converted uniquely: + % - The subfunction ConvertFuncHandle uses the built-in function FUNCTIONS, + % but the replied struct can depend on the Matlab version. + % - It is tried to convert objects to UINT8 streams in the subfunction + % ConvertObject. A conversion by STRUCT() might be more appropriate. + % Adjust these subfunctions on demand. + % + % MATLAB CHARs have 16 bits! Use Opt.Input='ascii' for comparisons with e.g. + % online hash generators. + % + % Matt Raum suggested this for e.g. user-defined objects: + % DataHash(getByteStreamFromArray(Data)) + % This works very well, but unfortunately getByteStreamFromArray is + % undocumented, such that it might vanish in the future or reply different + % output. + % + % For arrays the calculated hash value might be changed in new versions. + % Calling this function without inputs replies the version of the hash. + % + % The older style for input arguments is accepted also: Struct with fields + % 'Input', 'Method', 'OutFormat'. + % + % The C-Mex function GetMD5 is 2 to 100 times faster, but obtains MD5 only: + % http://www.mathworks.com/matlabcentral/fileexchange/25921 + % + % Tested: Matlab 2009a, 2015b(32/64), 2016b, 2018b, Win7/10 + % Author: Jan Simon, Heidelberg, (C) 2011-2019 matlab.2010(a)n(MINUS)simon.de + % + % See also: TYPECAST, CAST. + % + % Michael Kleder, "Compute Hash", no structs and cells: + % http://www.mathworks.com/matlabcentral/fileexchange/8944 + % Tim, "Serialize/Deserialize", converts structs and cells to a byte stream: + % http://www.mathworks.com/matlabcentral/fileexchange/29457 + % $JRev: R-R V:043 Sum:VbfXFn6217Hp Date:18-Apr-2019 12:11:42 $ + % $License: BSD (use/copy/change/redistribute on own risk, mention the author) $ + % $UnitTest: uTest_DataHash $ + % $File: Tools\GLFile\DataHash.m $ + % History: + % 001: 01-May-2011 21:52, First version. + % 007: 10-Jun-2011 10:38, [Opt.Input], binary data, complex values considered. + % 011: 26-May-2012 15:57, Fixed: Failed for binary input and empty data. + % 014: 04-Nov-2012 11:37, Consider Mex-, MDL- and P-files also. + % Thanks to David (author 243360), who found this bug. + % Jan Achterhold (author 267816) suggested to consider Java objects. + % 016: 01-Feb-2015 20:53, Java heap space exhausted for large files. + % Now files are process in chunks to save memory. + % 017: 15-Feb-2015 19:40, Collsions: Same hash for different data. + % Examples: zeros(1,1) and zeros(1,1,0) + % complex(0) and zeros(1,1,0,0) + % Now the number of dimensions is included, to avoid this. + % 022: 30-Mar-2015 00:04, Bugfix: Failed for strings and [] without TYPECASTX. + % Ross found these 2 bugs, which occur when TYPECASTX is not installed. + % If you need the base64 format padded with '=' characters, adjust + % fBase64_enc as you like. + % 026: 29-Jun-2015 00:13, Changed hash for STRUCTs. + % Struct arrays are analysed field by field now, which is much faster. + % 027: 13-Sep-2015 19:03, 'ascii' input as abbrev. for Input='bin' and UINT8(). + % 028: 15-Oct-2015 23:11, Example values in help section updated to v022. + % 029: 16-Oct-2015 22:32, Use default options for empty input. + % 031: 28-Feb-2016 15:10, New hash value to get same reply as GetMD5. + % New Matlab version (at least 2015b) use a fast method for TYPECAST, such + % that calling James Tursa's TYPECASTX is not needed anymore. + % Matlab 6.5 not supported anymore: MException for CATCH. + % 033: 18-Jun-2016 14:28, BUGFIX: Failed on empty files. + % Thanks to Christian (AuthorID 2918599). + % 035: 19-May-2018 01:11, STRING type considered. + % 040: 13-Nov-2018 01:20, Fields of Opt not case-sensitive anymore. + % 041: 09-Feb-2019 18:12, ismethod(class(V),) to support R2018b. + % 042: 02-Mar-2019 18:39, base64: in Java, short: Base64 with padding. + % Unit test. base64->short. + % OPEN BUGS: + % Nath wrote: + % function handle refering to struct containing the function will create + % infinite loop. Is there any workaround ? + % Example: + % d= dynamicprops(); + % addprop(d,'f'); + % d.f= @(varargin) struct2cell(d); + % DataHash(d.f) % infinite loop + % This is caught with an error message concerning the recursion limit now. + %#ok<*CHARTEN> + % Reply current version if called without inputs: ------------------------------ + if nargin == 0 + R = Version_L; + + if nargout == 0 + disp(R); + else + Hash = R; + end + + return; end - - % Specify output format: - if isfield(Opt, 'Format') && ~isempty(Opt.Format) % Short-circuiting - OutFormat = Opt.Format; + % Parse inputs: ---------------------------------------------------------------- + [Method, OutFormat, isFile, isBin, Data] = ParseInput(Data, varargin{:}); + % Create the engine: ----------------------------------------------------------- + try + Engine = java.security.MessageDigest.getInstance(Method); + + catch ME % Handle errors during initializing the engine: + if ~usejava('jvm') + Error_L('needJava', 'DataHash needs Java.'); + end + Error_L('BadInput2', 'Invalid hashing algorithm: [%s]. %s', ... + Method, ME.message); end - - % Check if the Input type is specified - default: 'array': - if isfield(Opt, 'Input') && ~isempty(Opt.Input) % Short-circuiting - if strcmpi(Opt.Input, 'File') - if ischar(Data) == 0 - Error_L('CannotOpen', '1st input FileName must be a string'); - end - isFile = true; - - elseif strncmpi(Opt.Input, 'bin', 3) % Accept 'binary' also - if (isnumeric(Data) || ischar(Data) || islogical(Data)) == 0 || ... - issparse(Data) - Error_L('BadDataType', ... - '1st input must be numeric, CHAR or LOGICAL for binary input.'); + % Create the hash value: ------------------------------------------------------- + if isFile + [FID, Msg] = fopen(Data, 'r'); % Open the file + if FID < 0 + Error_L('BadFile', ['Cannot open file: %s', char(10), '%s'], Data, Msg); + end + + % Read file in chunks to save memory and Java heap space: + Chunk = 1e6; % Fastest for 1e6 on Win7/64, HDD + Count = Chunk; % Dummy value to satisfy WHILE condition + while Count == Chunk + [Data, Count] = fread(FID, Chunk, '*uint8'); + if Count ~= 0 % Avoid error for empty file + Engine.update(Data); end - isBin = true; + end + fclose(FID); - elseif strncmpi(Opt.Input, 'asc', 3) % 8-bit ASCII characters - if ~ischar(Data) - Error_L('BadDataType', ... - '1st input must be a CHAR for the input type ASCII.'); + elseif isBin % Contents of an elementary array, type tested already: + if ~isempty(Data) % Engine.update fails for empty input! + if isnumeric(Data) + if isreal(Data) + Engine.update(typecast(Data(:), 'uint8')); + else + Engine.update(typecast(real(Data(:)), 'uint8')); + Engine.update(typecast(imag(Data(:)), 'uint8')); + end + elseif islogical(Data) % TYPECAST cannot handle LOGICAL + Engine.update(typecast(uint8(Data(:)), 'uint8')); + elseif ischar(Data) % TYPECAST cannot handle CHAR + Engine.update(typecast(uint16(Data(:)), 'uint8')); + % Bugfix: Line removed + elseif myIsString(Data) + if isscalar(Data) + Engine.update(typecast(uint16(Data{1}), 'uint8')); + else + Error_L('BadBinData', 'Bin type requires scalar string.'); + end + else % This should have been caught above! + Error_L('BadBinData', 'Data type not handled: %s', class(Data)); end - isBin = true; - Data = uint8(Data); end + else % Array with type: + Engine = CoreHash(Data, Engine); + end + % Calculate the hash: ---------------------------------------------------------- + Hash = typecast(Engine.digest, 'uint8'); + + % Convert hash specific output format: ----------------------------------------- + switch OutFormat + case 'hex' + Hash = sprintf('%.2x', double(Hash)); + case 'HEX' + Hash = sprintf('%.2X', double(Hash)); + case 'double' + Hash = double(reshape(Hash, 1, [])); + case 'uint8' + Hash = reshape(Hash, 1, []); + case 'short' + Hash = fBase64_enc(double(Hash), 0); + case 'base64' + Hash = fBase64_enc(double(Hash), 1); + + otherwise + Error_L('BadOutFormat', ... + '[Opt.Format] must be: HEX, hex, uint8, double, base64.'); end - -elseif nArg == 0 % Reply version of this function: - R = Version_L; - - if nargout == 0 - disp(R); - else - Hash = R; end - - return; - -elseif nArg ~= 1 % Bad number of arguments: - Error_L('BadNInput', '1 or 2 inputs required.'); -end - -% Create the engine: ----------------------------------------------------------- -try - Engine = java.security.MessageDigest.getInstance(Method); -catch - Error_L('BadInput2', 'Invalid algorithm: [%s].', Method); -end - -% Create the hash value: ------------------------------------------------------- -if isFile - % Open the file: - FID = fopen(Data, 'r'); - if FID < 0 - % Check existence of file: - Found = FileExist_L(Data); - if Found - Error_L('CantOpenFile', 'Cannot open file: %s.', Data); + % ****************************************************************************** + function Engine = CoreHash(Data, Engine) + % Consider the type and dimensions of the array to distinguish arrays with the + % same data, but different shape: [0 x 0] and [0 x 1], [1,2] and [1;2], + % DOUBLE(0) and SINGLE([0,0]): + % < v016: [class, size, data]. BUG! 0 and zeros(1,1,0) had the same hash! + % >= v016: [class, ndims, size, data] + Engine.update([uint8(class(Data)), ... + typecast(uint64([ndims(Data), size(Data)]), 'uint8')]); + + if issparse(Data) % Sparse arrays to struct: + [S.Index1, S.Index2, S.Value] = find(Data); + Engine = CoreHash(S, Engine); + elseif isstruct(Data) % Hash for all array elements and fields: + F = sort(fieldnames(Data)); % Ignore order of fields + for iField = 1:length(F) % Loop over fields + aField = F{iField}; + Engine.update(uint8(aField)); + for iS = 1:numel(Data) % Loop over elements of struct array + Engine = CoreHash(Data(iS).(aField), Engine); + end + end + elseif iscell(Data) % Get hash for all cell elements: + for iS = 1:numel(Data) + Engine = CoreHash(Data{iS}, Engine); + end + elseif isempty(Data) % Nothing to do + elseif isnumeric(Data) + if isreal(Data) + Engine.update(typecast(Data(:), 'uint8')); else - Error_L('FileNotFound', 'File not found: %s.', Data); + Engine.update(typecast(real(Data(:)), 'uint8')); + Engine.update(typecast(imag(Data(:)), 'uint8')); end - end - - % Read file in chunks to save memory and Java heap space: - Chunk = 1e6; % Fastest for 1e6 on Win7/64, HDD - Count = Chunk; % Dummy value to satisfy WHILE condition - while Count == Chunk - [Data, Count] = fread(FID, Chunk, '*uint8'); - if Count ~= 0 % Avoid error for empty file - Engine.update(Data); + elseif islogical(Data) % TYPECAST cannot handle LOGICAL + Engine.update(typecast(uint8(Data(:)), 'uint8')); + elseif ischar(Data) % TYPECAST cannot handle CHAR + Engine.update(typecast(uint16(Data(:)), 'uint8')); + elseif myIsString(Data) % [19-May-2018] String class in >= R2016b + classUint8 = uint8([117, 105, 110, 116, 49, 54]); % 'uint16' + for iS = 1:numel(Data) + % Emulate without recursion: Engine = CoreHash(uint16(Data{iS}), Engine) + aString = uint16(Data{iS}); + Engine.update([classUint8, ... + typecast(uint64([ndims(aString), size(aString)]), 'uint8')]); + if ~isempty(aString) + Engine.update(typecast(uint16(aString), 'uint8')); + end end - end - fclose(FID); - - % Calculate the hash: - Hash = typecast(Engine.digest, 'uint8'); - -elseif isBin % Contents of an elementary array, type tested already: - if isempty(Data) % Nothing to do, Engine.update fails for empty input! - Hash = typecast(Engine.digest, 'uint8'); - else % Matlab's TYPECAST is less elegant: - if isnumeric(Data) - if isreal(Data) - Engine.update(typecast(Data(:), 'uint8')); - else - Engine.update(typecast(real(Data(:)), 'uint8')); - Engine.update(typecast(imag(Data(:)), 'uint8')); + + elseif isa(Data, 'function_handle') + Engine = CoreHash(ConvertFuncHandle(Data), Engine); + elseif (isobject(Data) || isjava(Data)) && ismethod(class(Data), 'hashCode') + Engine = CoreHash(char(Data.hashCode), Engine); + else % Most likely a user-defined object: + try + BasicData = ConvertObject(Data); + catch ME + error(['JSimon:', mfilename, ':BadDataType'], ... + '%s: Cannot create elementary array for type: %s\n %s', ... + mfilename, class(Data), ME.message); + end + + try + Engine = CoreHash(BasicData, Engine); + catch ME + if strcmpi(ME.identifier, 'MATLAB:recursionLimit') + ME = MException(['JSimon:', mfilename, ':RecursiveType'], ... + '%s: Cannot create hash for recursive data type: %s', ... + mfilename, class(Data)); end - elseif islogical(Data) % TYPECAST cannot handle LOGICAL - Engine.update(typecast(uint8(Data(:)), 'uint8')); - elseif ischar(Data) % TYPECAST cannot handle CHAR - Engine.update(typecast(uint16(Data(:)), 'uint8')); - % Bugfix: Line removed + throw(ME); end - Hash = typecast(Engine.digest, 'uint8'); end -else % Array with type: - Engine = CoreHash(Data, Engine); - Hash = typecast(Engine.digest, 'uint8'); -end - -% Convert hash specific output format: ----------------------------------------- -switch OutFormat - case 'hex' - Hash = sprintf('%.2x', double(Hash)); - case 'HEX' - Hash = sprintf('%.2X', double(Hash)); - case 'double' - Hash = double(reshape(Hash, 1, [])); - case 'uint8' - Hash = reshape(Hash, 1, []); - case 'base64' - Hash = fBase64_enc(double(Hash)); - otherwise - Error_L('BadOutFormat', ... - '[Opt.Format] must be: HEX, hex, uint8, double, base64.'); -end - -% return; - -% ****************************************************************************** -function Engine = CoreHash(Data, Engine) -% This methods uses the slower TYPECAST of Matlab - -% Consider the type and dimensions of the array to distinguish arrays with the -% same data, but different shape: [0 x 0] and [0 x 1], [1,2] and [1;2], -% DOUBLE(0) and SINGLE([0,0]): -% < v016: [class, size, data]. BUG! 0 and zeros(1,1,0) had the same hash! -% >= v016: [class, ndims, size, data] -Engine.update([uint8(class(Data)), ... - typecast(uint64([ndims(Data), size(Data)]), 'uint8')]); - -if issparse(Data) % Sparse arrays to struct: - [S.Index1, S.Index2, S.Value] = find(Data); - Engine = CoreHash(S, Engine); -elseif isstruct(Data) % Hash for all array elements and fields: - F = sort(fieldnames(Data)); % Ignore order of fields - for iField = 1:length(F) % Loop over fields - aField = F{iField}; - Engine.update(uint8(aField)); - for iS = 1:numel(Data) % Loop over elements of struct array - Engine = CoreHash(Data(iS).(aField), Engine); + end + % ****************************************************************************** + function [Method, OutFormat, isFile, isBin, Data] = ParseInput(Data, varargin) + % Default options: ------------------------------------------------------------- + Method = 'MD5'; + OutFormat = 'hex'; + isFile = false; + isBin = false; + % Check number and type of inputs: --------------------------------------------- + nOpt = nargin - 1; + Opt = varargin; + if nOpt == 1 && isa(Opt{1}, 'struct') % Old style Options as struct: + Opt = struct2cell(Opt{1}); + nOpt = numel(Opt); + end + % Loop over strings in the input: ---------------------------------------------- + for iOpt = 1:nOpt + aOpt = Opt{iOpt}; + if ~ischar(aOpt) + Error_L('BadInputType', '[Opt] must be a struct or chars.'); + end + + switch lower(aOpt) + case 'file' % Data contains the file name: + isFile = true; + case {'bin', 'binary'} % Just the contents of the data: + if (isnumeric(Data) || ischar(Data) || islogical(Data) || ... + myIsString(Data)) == 0 || issparse(Data) + Error_L('BadDataType', ['[Bin] input needs data type: ', ... + 'numeric, CHAR, LOGICAL, STRING.']); + end + isBin = true; + case 'array' + isBin = false; % Is the default already + case {'asc', 'ascii'} % 8-bit part of MATLAB CHAR or STRING: + isBin = true; + if ischar(Data) + Data = uint8(Data); + elseif myIsString(Data) && numel(Data) == 1 + Data = uint8(char(Data)); + else + Error_L('BadDataType', ... + 'ASCII method: Data must be a CHAR or scalar STRING.'); + end + case 'hex' + if aOpt(1) == 'H' + OutFormat = 'HEX'; + else + OutFormat = 'hex'; + end + case {'double', 'uint8', 'short', 'base64'} + OutFormat = lower(aOpt); + otherwise % Guess that this is the method: + Method = upper(aOpt); end end -elseif iscell(Data) % Get hash for all cell elements: - for iS = 1:numel(Data) - Engine = CoreHash(Data{iS}, Engine); end -elseif isempty(Data) % Nothing to do -elseif isnumeric(Data) - if isreal(Data) - Engine.update(typecast(Data(:), 'uint8')); - else - Engine.update(typecast(real(Data(:)), 'uint8')); - Engine.update(typecast(imag(Data(:)), 'uint8')); + % ****************************************************************************** + function FuncKey = ConvertFuncHandle(FuncH) + % The subfunction ConvertFuncHandle converts function_handles to a struct + % using the Matlab function FUNCTIONS. The output of this function changes + % with the Matlab version, such that DataHash(@sin) replies different hashes + % under Matlab 6.5 and 2009a. + % An alternative is using the function name and name of the file for + % function_handles, but this is not unique for nested or anonymous functions. + % If the MATLABROOT is removed from the file's path, at least the hash of + % Matlab's toolbox functions is (usually!) not influenced by the version. + % Finally I'm in doubt if there is a unique method to hash function handles. + % Please adjust the subfunction ConvertFuncHandles to your needs. + % The Matlab version influences the conversion by FUNCTIONS: + % 1. The format of the struct replied FUNCTIONS is not fixed, + % 2. The full paths of toolbox function e.g. for @mean differ. + FuncKey = functions(FuncH); + % Include modification file time and file size. Suggested by Aslak Grinsted: + if ~isempty(FuncKey.file) + d = dir(FuncKey.file); + if ~isempty(d) + FuncKey.filebytes = d.bytes; + FuncKey.filedate = d.datenum; + end + end + % ALTERNATIVE: Use name and path. The part of the toolbox functions + % is replaced such that the hash for @mean does not depend on the Matlab + % version. + % Drawbacks: Anonymous functions, nested functions... + % funcStruct = functions(FuncH); + % funcfile = strrep(funcStruct.file, matlabroot, ''); + % FuncKey = uint8([funcStruct.function, ' ', funcfile]); + % Finally I'm afraid there is no unique method to get a hash for a function + % handle. Please adjust this conversion to your needs. + end + % ****************************************************************************** + function DataBin = ConvertObject(DataObj) + % Convert a user-defined object to a binary stream. There cannot be a unique + % solution, so this part is left for the user... + try % Perhaps a direct conversion is implemented: + DataBin = uint8(DataObj); + + % Matt Raum had this excellent idea - unfortunately this function is + % undocumented and might not be supported in te future: + % DataBin = getByteStreamFromArray(DataObj); + + catch % Or perhaps this is better: + WarnS = warning('off', 'MATLAB:structOnObject'); + DataBin = struct(DataObj); + warning(WarnS); end -elseif islogical(Data) % TYPECAST cannot handle LOGICAL - Engine.update(typecast(uint8(Data(:)), 'uint8')); -elseif ischar(Data) % TYPECAST cannot handle CHAR - Engine.update(typecast(uint16(Data(:)), 'uint8')); -elseif isa(Data, 'function_handle') - Engine = CoreHash(ConvertFuncHandle(Data), Engine); -elseif (isobject(Data) || isjava(Data)) && ismethod(Data, 'hashCode') - Engine = CoreHash(char(Data.hashCode), Engine); -else % Most likely a user-defined object: - try - BasicData = ConvertObject(Data); - catch ME - error(['JSimon:', mfilename, ':BadDataType'], ... - '%s: Cannot create elementary array for type: %s\n %s', ... - mfilename, class(Data), ME.message); end - + % ****************************************************************************** + function Out = fBase64_enc(In, doPad) + % Encode numeric vector of UINT8 values to base64 string. + B64 = org.apache.commons.codec.binary.Base64; + Out = char(B64.encode(In)).'; + if ~doPad + Out(Out == '=') = []; + end + % Matlab method: + % Pool = [65:90, 97:122, 48:57, 43, 47]; % [0:9, a:z, A:Z, +, /] + % v8 = [128; 64; 32; 16; 8; 4; 2; 1]; + % v6 = [32, 16, 8, 4, 2, 1]; + % + % In = reshape(In, 1, []); + % X = rem(floor(bsxfun(@rdivide, In, v8)), 2); + % d6 = rem(numel(X), 6); + % if d6 ~= 0 + % X = [X(:); zeros(6 - d6, 1)]; + % end + % Out = char(Pool(1 + v6 * reshape(X, 6, []))); + % + % p = 3 - rem(numel(Out) - 1, 4); + % if doPad && p ~= 0 % Standard base64 string with trailing padding: + % Out = [Out, repmat('=', 1, p)]; + % end + end + % ****************************************************************************** + function T = myIsString(S) + % isstring was introduced in R2016: + persistent hasString + if isempty(hasString) + matlabVer = [100, 1] * sscanf(version, '%d.', 2); + hasString = (matlabVer >= 901); % isstring existing since R2016b + end + T = hasString && isstring(S); % Short-circuting + end + % ****************************************************************************** + function R = Version_L() + % The output differs between versions of this function. So give the user a + % chance to recognize the version: + % 1: 01-May-2011, Initial version + % 2: 15-Feb-2015, The number of dimensions is considered in addition. + % In version 1 these variables had the same hash: + % zeros(1,1) and zeros(1,1,0), complex(0) and zeros(1,1,0,0) + % 3: 29-Jun-2015, Struct arrays are processed field by field and not element + % by element, because this is much faster. In consequence the hash value + % differs, if the input contains a struct. + % 4: 28-Feb-2016 15:20, same output as GetMD5 for MD5 sums. Therefore the + % dimensions are casted to UINT64 at first. + % 19-May-2018 01:13, STRING type considered. + R.HashVersion = 4; + R.Date = [2018, 5, 19]; + R.HashMethod = {}; try - Engine = CoreHash(BasicData, Engine); - catch ME - if strcmpi(ME.identifier, 'MATLAB:recursionLimit') - ME = MException(['JSimon:', mfilename, ':RecursiveType'], ... - '%s: Cannot create hash for recursive data type: %s', ... - mfilename, class(Data)); + Provider = java.security.Security.getProviders; + for iProvider = 1:numel(Provider) + S = char(Provider(iProvider).getServices); + Index = strfind(S, 'MessageDigest.'); + for iDigest = 1:length(Index) + Digest = strtok(S(Index(iDigest):end)); + Digest = strrep(Digest, 'MessageDigest.', ''); + R.HashMethod = cat(2, R.HashMethod, {Digest}); + end end - throw(ME); + catch ME + fprintf(2, '%s\n', ME.message); + R.HashMethod = 'error'; end -end - -% return; - -% ****************************************************************************** -function FuncKey = ConvertFuncHandle(FuncH) -% The subfunction ConvertFuncHandle converts function_handles to a struct -% using the Matlab function FUNCTIONS. The output of this function changes -% with the Matlab version, such that DataHash(@sin) replies different hashes -% under Matlab 6.5 and 2009a. -% An alternative is using the function name and name of the file for -% function_handles, but this is not unique for nested or anonymous functions. -% If the MATLABROOT is removed from the file's path, at least the hash of -% Matlab's toolbox functions is (usually!) not influenced by the version. -% Finally I'm in doubt if there is a unique method to hash function handles. -% Please adjust the subfunction ConvertFuncHandles to your needs. - -% The Matlab version influences the conversion by FUNCTIONS: -% 1. The format of the struct replied FUNCTIONS is not fixed, -% 2. The full paths of toolbox function e.g. for @mean differ. -FuncKey = functions(FuncH); - -% Include modification file time and file size. Suggested by Aslak Grinsted: -if ~isempty(FuncKey.file) - d = dir(FuncKey.file); - if ~isempty(d) - FuncKey.filebytes = d.bytes; - FuncKey.filedate = d.datenum; - end -end - -% ALTERNATIVE: Use name and path. The part of the toolbox functions -% is replaced such that the hash for @mean does not depend on the Matlab -% version. -% Drawbacks: Anonymous functions, nested functions... -% funcStruct = functions(FuncH); -% funcfile = strrep(funcStruct.file, matlabroot, ''); -% FuncKey = uint8([funcStruct.function, ' ', funcfile]); - -% Finally I'm afraid there is no unique method to get a hash for a function -% handle. Please adjust this conversion to your needs. - -% return; - -% ****************************************************************************** -function DataBin = ConvertObject(DataObj) -% Convert a user-defined object to a binary stream. There cannot be a unique -% solution, so this part is left for the user... - -try % Perhaps a direct conversion is implemented: - DataBin = uint8(DataObj); - - % Matt Raum had this excellent idea - unfortunately this function is - % undocumented and might not be supported in te future: - % DataBin = getByteStreamFromArray(DataObj); - -catch % Or perhaps this is better: - WarnS = warning('off', 'MATLAB:structOnObject'); - DataBin = struct(DataObj); - warning(WarnS); -end - -% return; - -% ****************************************************************************** -function Out = fBase64_enc(In) -% Encode numeric vector of UINT8 values to base64 string. -% The intention of this is to create a shorter hash than the HEX format. -% Therefore a padding with '=' characters is omitted on purpose. - -Pool = [65:90, 97:122, 48:57, 43, 47]; % [0:9, a:z, A:Z, +, /] -v8 = [128; 64; 32; 16; 8; 4; 2; 1]; -v6 = [32, 16, 8, 4, 2, 1]; - -In = reshape(In, 1, []); -X = rem(floor(In(ones(8, 1), :) ./ v8(:, ones(length(In), 1))), 2); -Y = reshape([X(:); zeros(6 - rem(numel(X), 6), 1)], 6, []); -Out = char(Pool(1 + v6 * Y)); - -% return; - -% ****************************************************************************** -function Ex = FileExist_L(FileName) -% A more reliable version of EXIST(FileName, 'file'): -dirFile = dir(FileName); -if length(dirFile) == 1 - Ex = ~(dirFile.isdir); -else - Ex = false; -end - -% return; - -% ****************************************************************************** -function R = Version_L() -% The output differs between versions of this function. So give the user a -% chance to recognize the version: -% 1: 01-May-2011, Initial version -% 2: 15-Feb-2015, The number of dimensions is considered in addition. -% In version 1 these variables had the same hash: -% zeros(1,1) and zeros(1,1,0), complex(0) and zeros(1,1,0,0) -% 3: 29-Jun-2015, Struct arrays are processed field by field and not element -% by element, because this is much faster. In consequence the hash value -% differs, if the input contains a struct. -% 4: 28-Feb-2016 15:20, same output as GetMD5 for MD5 sums. Therefore the -% dimensions are casted to UINT64 at first. -R.HashVersion = 4; -R.Date = [2016, 2, 28]; - -R.HashMethod = {}; -try - Provider = java.security.Security.getProviders; - for iProvider = 1:numel(Provider) - S = char(Provider(iProvider).getServices); - Index = strfind(S, 'MessageDigest.'); - for iDigest = 1:length(Index) - Digest = strtok(S(Index(iDigest):end)); - Digest = strrep(Digest, 'MessageDigest.', ''); - R.HashMethod = cat(2, R.HashMethod, {Digest}); - end end -catch ME - fprintf(2, '%s\n', ME.message); - R.HashMethod = 'error'; -end - -% return; - -% ****************************************************************************** -function Error_L(ID, varargin) - -error(['JSimon:', mfilename, ':', ID], ['*** %s: ', varargin{1}], ... - mfilename, varargin{2:nargin - 1}); - -% return; + % ****************************************************************************** + function Error_L(ID, varargin) + error(['JSimon:', mfilename, ':', ID], ['*** %s: ', varargin{1}], ... + mfilename, varargin{2:nargin - 1}); + end \ No newline at end of file diff --git a/+dj/Relvar.m b/+dj/Relvar.m index 49436c59..2395a203 100755 --- a/+dj/Relvar.m +++ b/+dj/Relvar.m @@ -20,11 +20,15 @@ id = ret.lid; end - function delQuick(self) + function count = delQuick(self, getCount) % DELQUICK - remove all tuples of the relation from its table. % Unlike del, delQuick does not prompt for user % confirmation, nor does it attempt to cascade down to the dependent tables. self.schema.conn.query(sprintf('DELETE FROM %s', self.sql)) + count = []; + if nargin > 1 && getCount + count = self.schema.conn.query(sprintf('SELECT count(*) as count FROM %s', self.sql)).count; + end end diff --git a/+tests/TestFetch.m b/+tests/TestFetch.m index 64ecdbbd..bf167379 100644 --- a/+tests/TestFetch.m +++ b/+tests/TestFetch.m @@ -37,8 +37,8 @@ function testDescribe(testCase) q = University.All; raw_def = dj.internal.Declare.getDefinition(q); assembled_def = describe(q); - [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); - assembled_sql = dj.internal.Declare.declare(q, assembled_def); + [raw_sql, ~] = dj.internal.Declare.declare2(q, raw_def); + assembled_sql = dj.internal.Declare.declare2(q, assembled_def); testCase.verifyEqual(raw_sql, assembled_sql); end end diff --git a/+tests/TestUuid.m b/+tests/TestUuid.m index fa999269..6715c045 100644 --- a/+tests/TestUuid.m +++ b/+tests/TestUuid.m @@ -57,8 +57,8 @@ function testReverseEngineering(testCase) q = University.Message; raw_def = dj.internal.Declare.getDefinition(q); assembled_def = describe(q); - [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); - [assembled_sql, ~] = dj.internal.Declare.declare(q, assembled_def); + [raw_sql, ~] = dj.internal.Declare.declare2(q, raw_def); + [assembled_sql, ~] = dj.internal.Declare.declare2(q, assembled_def); testCase.verifyEqual(raw_sql, assembled_sql); end end From bde3edd0866f971f9bf296c7a10a68b7793b1d00 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 3 Mar 2020 14:34:27 -0600 Subject: [PATCH 06/21] Add working prototype of file store. --- +dj/+internal/Declare.m | 15 +++++++++------ +dj/+internal/Table.m | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index c85c23b5..2977b73e 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -19,7 +19,7 @@ end methods(Static) - function [sql, newattrs] = makeFK2(line, existingFields, inKey, hash) + function [all_attr_sql, fk_sql, newattrs] = makeFK2(line, existingFields, inKey, hash) % [sql, newattrs] = MAKEFK(sql, line, existingFields, inKey, hash) % Add foreign key to SQL table definition. % sql: Modified in-place SQL to include foreign keys. @@ -28,7 +28,8 @@ % existingFields: Existing field attributes. % inKey: Set as primary key. % hash: Current hash as base. - sql = ''; + fk_sql = ''; + all_attr_sql = ''; pat = ['^(?\([\s\w,]*\))?' ... '\s*->\s*' ... '(?\w+\.[A-Z][A-Za-z0-9]*)' ... @@ -82,15 +83,16 @@ fieldInfo.name = newattrs{i}; fieldInfo.nullabe = ~inKey; % nonprimary references are nullable [attr_sql, ~, ~] = dj.internal.Declare.compileAttribute(fieldInfo, NaN); - sql = sprintf('%s%s,\n', sql, attr_sql); + all_attr_sql = sprintf('%s%s,\n', all_attr_sql, attr_sql); end + all_attr_sql = all_attr_sql(1:end-2); fkattrs = rel.primaryKey; fkattrs(ismember(fkattrs, attrs))=newattrs; hash = dj.internal.shorthash([{hash rel.fullTableName} newattrs]); - sql = sprintf(... + fk_sql = sprintf(... ['%sCONSTRAINT `%s` FOREIGN KEY (%s) REFERENCES %s (%s) ' ... - 'ON UPDATE CASCADE ON DELETE RESTRICT'], sql, hash, backquotedList(fkattrs), ... + 'ON UPDATE CASCADE ON DELETE RESTRICT'], fk_sql, hash, backquotedList(fkattrs), ... rel.fullTableName, backquotedList(rel.primaryKey)); end function [sql, external_stores] = declare2(table_instance, def) @@ -209,10 +211,11 @@ case regexp(line, '^(\s*\([^)]+\)\s*)?->.+$') % dj.internal.shorthash(sprintf('`%s`.`%s`', ... % table_instance.schema.dbname, tableName))); % sql = sprintf('%s,\n', sql); - [fk_sql, newFields] = dj.internal.Declare.makeFK2( ... + [fk_attr_sql, fk_sql, newFields] = dj.internal.Declare.makeFK2( ... line, fields, inKey, ... dj.internal.shorthash(sprintf('`%s`.`%s`', ... table_instance.schema.dbname, tableName))); + attributeSql = [attributeSql, fk_attr_sql]; %#ok foreignKeySql = [foreignKeySql, fk_sql]; %#ok fields = [fields, newFields]; %#ok if inKey diff --git a/+dj/+internal/Table.m b/+dj/+internal/Table.m index 9730e53d..96f94786 100755 --- a/+dj/+internal/Table.m +++ b/+dj/+internal/Table.m @@ -368,9 +368,9 @@ function addForeignKey(self, target) if isa(target, 'dj.Table') target = sprintf('->%s', target.className); end - sql = dj.internal.Declare.makeFK2('', target, self.primaryKey, ... + [attr_sql, fk_sql, ~] = dj.internal.Declare.makeFK2('', target, self.primaryKey, ... true, dj.internal.shorthash(self.fullTableName)); - self.alter(sprintf('ADD %s', sql)) + self.alter(sprintf('ADD %s%s', attr_sql, fk_sql)) end function dropForeignKey(self, target) From f6734fb82a86e440ba93b1031eec13788e635819 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 3 Mar 2020 15:21:25 -0600 Subject: [PATCH 07/21] Clean up declare/makeFK. --- +dj/+internal/Declare.m | 245 ++-------------------------------------- 1 file changed, 8 insertions(+), 237 deletions(-) diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index 2977b73e..6dd5f74b 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -19,82 +19,6 @@ end methods(Static) - function [all_attr_sql, fk_sql, newattrs] = makeFK2(line, existingFields, inKey, hash) - % [sql, newattrs] = MAKEFK(sql, line, existingFields, inKey, hash) - % Add foreign key to SQL table definition. - % sql: Modified in-place SQL to include foreign keys. - % newattrs: Extracted new field attributes. - % line: DataJoint definition, single line. - % existingFields: Existing field attributes. - % inKey: Set as primary key. - % hash: Current hash as base. - fk_sql = ''; - all_attr_sql = ''; - pat = ['^(?\([\s\w,]*\))?' ... - '\s*->\s*' ... - '(?\w+\.[A-Z][A-Za-z0-9]*)' ... - '\w*' ... - '(?\([\s\w,]*\))?' ... - '\s*(#.*)?$']; - fk = regexp(line, pat, 'names'); - if exist(fk.cname, 'class') - rel = feval(fk.cname); - assert(isa(rel, 'dj.Relvar'), 'class %s is not a DataJoint relation', fk.cname) - else - rel = dj.Relvar(fk.cname); - end - - % parse and validate the attribute lists - attrs = strsplit(fk.attrs, {' ',',','(',')'}); - newattrs = strsplit(fk.newattrs, {' ',',','(',')'}); - attrs(cellfun(@isempty, attrs))=[]; - newattrs(cellfun(@isempty, newattrs))=[]; - assert(all(cellfun(@(a) ismember(a, rel.primaryKey), attrs)), ... - 'All attributes in (%s) must be in the primary key of %s', ... - strjoin(attrs, ','), rel.className) - if length(newattrs)==1 - % unambiguous single attribute - if length(rel.primaryKey)==1 - attrs = rel.primaryKey; - elseif isempty(attrs) && length(setdiff(rel.primaryKey, existingFields))==1 - attrs = setdiff(rel.primaryKey, existingFields); - end - end - assert(length(attrs) == length(newattrs) , ... - 'Mapped fields (%s) and (%s) must match in the foreign key.', ... - strjoin(newattrs,','), strjoin(attrs,',')) - - % prepend unspecified primary key attributes that have not yet been - % included - pk = rel.primaryKey; - pk(ismember(pk,attrs) | ismember(pk,existingFields))=[]; - attrs = [pk attrs]; - newattrs = [pk newattrs]; - - % fromFields and toFields are sorted in the same order as - % ref.rel.tableHeader.attributes - [~, ix] = sort(cellfun(@(a) find(strcmp(a, rel.primaryKey)), attrs)); - attrs = attrs(ix); - newattrs = newattrs(ix); - - for i=1:length(attrs) - fieldInfo = rel.tableHeader.attributes(strcmp(attrs{i}, ... - rel.tableHeader.names)); - fieldInfo.name = newattrs{i}; - fieldInfo.nullabe = ~inKey; % nonprimary references are nullable - [attr_sql, ~, ~] = dj.internal.Declare.compileAttribute(fieldInfo, NaN); - all_attr_sql = sprintf('%s%s,\n', all_attr_sql, attr_sql); - end - all_attr_sql = all_attr_sql(1:end-2); - - fkattrs = rel.primaryKey; - fkattrs(ismember(fkattrs, attrs))=newattrs; - hash = dj.internal.shorthash([{hash rel.fullTableName} newattrs]); - fk_sql = sprintf(... - ['%sCONSTRAINT `%s` FOREIGN KEY (%s) REFERENCES %s (%s) ' ... - 'ON UPDATE CASCADE ON DELETE RESTRICT'], fk_sql, hash, backquotedList(fkattrs), ... - rel.fullTableName, backquotedList(rel.primaryKey)); - end function [sql, external_stores] = declare2(table_instance, def) % sql = DECLARE(query, definition) % Parse table declaration and declares the table. @@ -276,162 +200,6 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name % foreign_key_sql => tableInfo.foreign_key_sql % index_sql => tableInfo.index_sql % external_stores => tableInfo.external_stores - function [sql, external_stores] = declare(table_instance, def) - % sql = DECLARE(query, definition) - % Parse table declaration and declares the table. - % sql: Generated SQL to create a table. - % query: DataJoint Table instance. - % definition: DataJoint Table definition. - - def = strrep(def, '%{', ''); - def = strrep(def, '%}', ''); - def = strtrim(regexp(def,'\n','split')'); - - % append the next line to lines that end in a backslash - for i=find(cellfun(@(x) ~isempty(x) && x(end)=='\', def')) - def{i} = [def{i}(1:end-1) ' ' def{i+1}]; - def(i+1) = ''; - end - - % parse table schema, name, type, and comment - switch true - - case {isa(table_instance, 'dj.internal.UserRelation'), isa(table_instance, ... - 'dj.Part'), isa(table_instance, 'dj.Jobs'), ... - isa(table_instance, 'dj.internal.ExternalTable')} - % New-style declaration using special classes for each tier - tableInfo = struct; - if isa(table_instance, 'dj.Part') - tableInfo.tier = 'part'; - elseif ~isa(table_instance, 'dj.internal.ExternalTable') - specialClass = find(cellfun(@(c) isa(table_instance, c), ... - dj.Schema.tierClasses)); - assert(length(specialClass)==1, ... - 'Unknown type of UserRelation in %s', class(table_instance)) - tableInfo.tier = dj.Schema.allowedTiers{specialClass}; - end - % remove empty lines - def(cellfun(@(x) isempty(x), def)) = []; - if strncmp(def{1}, '#', 1) - tableInfo.comment = strtrim(def{1}(2:end)); - def = def(2:end); - else - tableInfo.comment = ''; - end - % remove pure comments - def(cellfun(@(x) strncmp('#',strtrim(x),1), def)) = []; - cname = strsplit(table_instance.className, '.'); - tableInfo.package = strjoin(cname(1:end-1), '.'); - tableInfo.className = cname{end}; - if isa(table_instance, 'dj.Part') - tableName = sprintf('%s%s%s', ... - table_instance.schema.prefix, ... - dj.Schema.tierPrefixes{strcmp(tableInfo.tier, ... - dj.Schema.allowedTiers)}, sprintf('%s__%s', ... - table_instance.master.plainTableName, ... - dj.internal.fromCamelCase(table_instance.className(length( ... - table_instance.master.className)+1:end)))); - %#ok - elseif ~isa(table_instance, 'dj.internal.ExternalTable') - tableName = sprintf('%s%s%s', ... - table_instance.schema.prefix, dj.Schema.tierPrefixes{ ... - strcmp(tableInfo.tier, dj.Schema.allowedTiers)}, ... - dj.internal.fromCamelCase(tableInfo.className)); - else - tableName = [dj.internal.Declare.EXTERNAL_TABLE_ROOT '_' table_instance.store]; - end - - otherwise - % Old-style declaration for backward compatibility - - % remove empty lines and pure comment lines - def(cellfun(@(x) isempty(x) || strncmp('#',x,1), def)) = []; - firstLine = strtrim(def{1}); - def = def(2:end); - pat = { - '^(?\w+)\.(?\w+)\s*' % package.TableName - '\(\s*(?\w+)\s*\)\s*' % (tier) - '#\s*(?.*)$' % # comment - }; - tableInfo = regexp(firstLine, cat(2,pat{:}), 'names'); - assert(numel(tableInfo)==1, ... - ['invalidTableDeclaration:Incorrect syntax in table declaration, ' ... - 'line 1: \n %s'], firstLine) - assert(ismember(tableInfo.tier, dj.Schema.allowedTiers),... - 'invalidTableTier:Invalid tier for table ', tableInfo.className) - cname = sprintf('%s.%s', tableInfo.package, tableInfo.className); - assert(strcmp(cname, table_instance.className), ... - 'Table name %s does not match in file %s', cname, ... - table_instance.className) - tableName = sprintf('%s%s%s', table_instance.schema.prefix, ... - dj.Schema.tierPrefixes{strcmp(tableInfo.tier, ... - dj.Schema.allowedTiers)}, dj.internal.fromCamelCase( ... - stableInfo.className)); - end - - sql = sprintf('CREATE TABLE `%s`.`%s` (\n', table_instance.schema.dbname, ... - tableName); - - % fields and foreign keys - inKey = true; - primaryFields = {}; - external_stores = {}; - fields = {}; - for iLine = 1:length(def) - line = def{iLine}; - switch true - case strncmp(line,'---',3) - inKey = false; - % foreign key - case regexp(line, '^(\s*\([^)]+\)\s*)?->.+$') - [sql, newFields] = dj.internal.Declare.makeFK( ... - sql, line, fields, inKey, ... - dj.internal.shorthash(sprintf('`%s`.`%s`', ... - table_instance.schema.dbname, tableName))); - sql = sprintf('%s,\n', sql); - fields = [fields, newFields]; %#ok - if inKey - primaryFields = [primaryFields, newFields]; %#ok - end - - % index - case regexpi(line, '^(unique\s+)?index[^:]*$') - sql = sprintf('%s%s,\n', sql, line); % add checks - - % attribute - case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name - '(=\s*\S+(\s+\S+)*\s*)?' ... % opt. default - ':\s*\w.*$']) % type, comment - fieldInfo = dj.internal.Declare.parseAttrDef(line); - assert(~inKey || ~fieldInfo.isnullable, ... - 'primary key attributes cannot be nullable') - if inKey - primaryFields{end+1} = fieldInfo.name; %#ok - end - fields{end+1} = fieldInfo.name; %#ok - [attr_sql, store] = dj.internal.Declare.compileAttribute(fieldInfo); - sql = sprintf('%s%s', sql, attr_sql); - if ~isempty(store) - external_stores{end+1} = store; %#ok - end - otherwise - error('Invalid table declaration line "%s"', line) - end - end - - % add primary key declaration - assert(~isempty(primaryFields), 'table must have a primary key') - sql = sprintf('%sPRIMARY KEY (%s),\n' ,sql, backquotedList(primaryFields)); - - % finish the declaration - sql = sprintf('%s\n) ENGINE = InnoDB, COMMENT "%s"', sql(1:end-2), ... - tableInfo.comment); - - % execute declaration - fprintf \n\n - fprintf(sql) - fprintf \n\n\n - end function fieldInfo = parseAttrDef(line) % fieldInfo = PARSEATTRDEF(line) @@ -475,7 +243,7 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name fieldInfo.isnullable = strcmpi(fieldInfo.default,'null'); end - function [sql, newattrs] = makeFK(sql, line, existingFields, inKey, hash) + function [all_attr_sql, fk_sql, newattrs] = makeFK2(line, existingFields, inKey, hash) % [sql, newattrs] = MAKEFK(sql, line, existingFields, inKey, hash) % Add foreign key to SQL table definition. % sql: Modified in-place SQL to include foreign keys. @@ -484,6 +252,8 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name % existingFields: Existing field attributes. % inKey: Set as primary key. % hash: Current hash as base. + fk_sql = ''; + all_attr_sql = ''; pat = ['^(?\([\s\w,]*\))?' ... '\s*->\s*' ... '(?\w+\.[A-Z][A-Za-z0-9]*)' ... @@ -536,16 +306,17 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name rel.tableHeader.names)); fieldInfo.name = newattrs{i}; fieldInfo.nullabe = ~inKey; % nonprimary references are nullable - [attr_sql, ~] = dj.internal.Declare.compileAttribute(fieldInfo); - sql = sprintf('%s%s', sql, attr_sql); + [attr_sql, ~, ~] = dj.internal.Declare.compileAttribute(fieldInfo, NaN); + all_attr_sql = sprintf('%s%s,\n', all_attr_sql, attr_sql); end + all_attr_sql = all_attr_sql(1:end-2); fkattrs = rel.primaryKey; fkattrs(ismember(fkattrs, attrs))=newattrs; hash = dj.internal.shorthash([{hash rel.fullTableName} newattrs]); - sql = sprintf(... + fk_sql = sprintf(... ['%sCONSTRAINT `%s` FOREIGN KEY (%s) REFERENCES %s (%s) ' ... - 'ON UPDATE CASCADE ON DELETE RESTRICT'], sql, hash, backquotedList(fkattrs), ... + 'ON UPDATE CASCADE ON DELETE RESTRICT'], fk_sql, hash, backquotedList(fkattrs), ... rel.fullTableName, backquotedList(rel.primaryKey)); end From 7e00031f98b9d9a7acad7c2eec92418641425bc1 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 3 Mar 2020 15:24:23 -0600 Subject: [PATCH 08/21] Clean up declare/makeFK.2 --- +dj/+internal/Declare.m | 6 +++--- +dj/+internal/ExternalTable.m | 2 +- +dj/+internal/Table.m | 4 ++-- +tests/TestFetch.m | 4 ++-- +tests/TestUuid.m | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index 6dd5f74b..76801839 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -19,7 +19,7 @@ end methods(Static) - function [sql, external_stores] = declare2(table_instance, def) + function [sql, external_stores] = declare(table_instance, def) % sql = DECLARE(query, definition) % Parse table declaration and declares the table. % sql: Generated SQL to create a table. @@ -135,7 +135,7 @@ case regexp(line, '^(\s*\([^)]+\)\s*)?->.+$') % dj.internal.shorthash(sprintf('`%s`.`%s`', ... % table_instance.schema.dbname, tableName))); % sql = sprintf('%s,\n', sql); - [fk_attr_sql, fk_sql, newFields] = dj.internal.Declare.makeFK2( ... + [fk_attr_sql, fk_sql, newFields] = dj.internal.Declare.makeFK( ... line, fields, inKey, ... dj.internal.shorthash(sprintf('`%s`.`%s`', ... table_instance.schema.dbname, tableName))); @@ -243,7 +243,7 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name fieldInfo.isnullable = strcmpi(fieldInfo.default,'null'); end - function [all_attr_sql, fk_sql, newattrs] = makeFK2(line, existingFields, inKey, hash) + function [all_attr_sql, fk_sql, newattrs] = makeFK(line, existingFields, inKey, hash) % [sql, newattrs] = MAKEFK(sql, line, existingFields, inKey, hash) % Add foreign key to SQL table definition. % sql: Modified in-place SQL to include foreign keys. diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m index 92fc33f7..8ca60b5c 100644 --- a/+dj/+internal/ExternalTable.m +++ b/+dj/+internal/ExternalTable.m @@ -62,7 +62,7 @@ function create(self) }; def = sprintf('%s\n',def{:}); - [sql, ~] = dj.internal.Declare.declare2(self, def); + [sql, ~] = dj.internal.Declare.declare(self, def); self.schema.conn.query(sql); self.schema.reload end diff --git a/+dj/+internal/Table.m b/+dj/+internal/Table.m index 96f94786..bda2bbd7 100755 --- a/+dj/+internal/Table.m +++ b/+dj/+internal/Table.m @@ -368,7 +368,7 @@ function addForeignKey(self, target) if isa(target, 'dj.Table') target = sprintf('->%s', target.className); end - [attr_sql, fk_sql, ~] = dj.internal.Declare.makeFK2('', target, self.primaryKey, ... + [attr_sql, fk_sql, ~] = dj.internal.Declare.makeFK('', target, self.primaryKey, ... true, dj.internal.shorthash(self.fullTableName)); self.alter(sprintf('ADD %s%s', attr_sql, fk_sql)) end @@ -621,7 +621,7 @@ function create(self) end def = dj.internal.Declare.getDefinition(self); - [sql, external_stores] = dj.internal.Declare.declare2(self, def); + [sql, external_stores] = dj.internal.Declare.declare(self, def); sql = strrep(sql, '{database}', self.schema.dbname); for k=1:length(external_stores) table = self.schema.external.table(external_stores{k}); diff --git a/+tests/TestFetch.m b/+tests/TestFetch.m index bf167379..64ecdbbd 100644 --- a/+tests/TestFetch.m +++ b/+tests/TestFetch.m @@ -37,8 +37,8 @@ function testDescribe(testCase) q = University.All; raw_def = dj.internal.Declare.getDefinition(q); assembled_def = describe(q); - [raw_sql, ~] = dj.internal.Declare.declare2(q, raw_def); - assembled_sql = dj.internal.Declare.declare2(q, assembled_def); + [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); + assembled_sql = dj.internal.Declare.declare(q, assembled_def); testCase.verifyEqual(raw_sql, assembled_sql); end end diff --git a/+tests/TestUuid.m b/+tests/TestUuid.m index 6715c045..fa999269 100644 --- a/+tests/TestUuid.m +++ b/+tests/TestUuid.m @@ -57,8 +57,8 @@ function testReverseEngineering(testCase) q = University.Message; raw_def = dj.internal.Declare.getDefinition(q); assembled_def = describe(q); - [raw_sql, ~] = dj.internal.Declare.declare2(q, raw_def); - [assembled_sql, ~] = dj.internal.Declare.declare2(q, assembled_def); + [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); + [assembled_sql, ~] = dj.internal.Declare.declare(q, assembled_def); testCase.verifyEqual(raw_sql, assembled_sql); end end From e299938f982955dc3b352e1910edd64e5a45eb1e Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 3 Mar 2020 15:54:51 -0600 Subject: [PATCH 09/21] Clean up. --- +dj/+internal/Declare.m | 52 ++--- +dj/+internal/External.m | 265 -------------------------- +dj/+internal/ExternalTable.m | 49 +++-- +dj/+internal/GeneralRelvar.m | 8 +- +dj/+internal/Header.m | 2 +- +dj/+internal/Table.m | 14 +- +dj/+store_plugins/{@File => }/File.m | 14 -- .gitignore | 3 +- matlab.prf | 12 -- 9 files changed, 69 insertions(+), 350 deletions(-) delete mode 100644 +dj/+internal/External.m rename +dj/+store_plugins/{@File => }/File.m (84%) delete mode 100644 matlab.prf diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index 76801839..fc8ffb17 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -81,7 +81,8 @@ strcmp(tableInfo.tier, dj.Schema.allowedTiers)}, ... dj.internal.fromCamelCase(tableInfo.className)); else - tableName = [dj.internal.Declare.EXTERNAL_TABLE_ROOT '_' table_instance.store]; + tableName = [dj.internal.Declare.EXTERNAL_TABLE_ROOT '_' ... + table_instance.store]; end otherwise @@ -112,9 +113,6 @@ stableInfo.className)); end -% sql = sprintf('CREATE TABLE `%s`.`%s` (\n', table_instance.schema.dbname, ... -% tableName); - % fields and foreign keys inKey = true; primaryFields = {}; @@ -130,11 +128,6 @@ case strncmp(line,'---',3) inKey = false; % foreign key case regexp(line, '^(\s*\([^)]+\)\s*)?->.+$') -% [sql, newFields] = dj.internal.Declare.makeFK2( ... -% sql, line, fields, inKey, ... -% dj.internal.shorthash(sprintf('`%s`.`%s`', ... -% table_instance.schema.dbname, tableName))); -% sql = sprintf('%s,\n', sql); [fk_attr_sql, fk_sql, newFields] = dj.internal.Declare.makeFK( ... line, fields, inKey, ... dj.internal.shorthash(sprintf('`%s`.`%s`', ... @@ -148,7 +141,6 @@ case regexp(line, '^(\s*\([^)]+\)\s*)?->.+$') % index case regexpi(line, '^(unique\s+)?index[^:]*$') -% sql = sprintf('%s%s,\n', sql, line); % add checks indexSql = [indexSql, line]; %#ok % attribute @@ -162,8 +154,8 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name primaryFields{end+1} = fieldInfo.name; %#ok end fields{end+1} = fieldInfo.name; %#ok - [attr_sql, store, foreignKeySql] = dj.internal.Declare.compileAttribute(fieldInfo, foreignKeySql); -% sql = sprintf('%s%s', sql, attr_sql); + [attr_sql, store, foreignKeySql] = ... + dj.internal.Declare.compileAttribute(fieldInfo, foreignKeySql); attributeSql = [attributeSql, attr_sql]; %#ok if ~isempty(store) external_stores{end+1} = store; %#ok @@ -173,17 +165,15 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name end end -% % add primary key declaration -% assert(~isempty(primaryFields), 'table must have a primary key') -% sql = sprintf('%sPRIMARY KEY (%s),\n' ,sql, backquotedList(primaryFields)); -% -% % finish the declaration -% sql = sprintf('%s\n) ENGINE = InnoDB, COMMENT "%s"', sql(1:end-2), ... -% tableInfo.comment); - - create_sql = sprintf('CREATE TABLE `%s`.`%s` (\n', table_instance.schema.dbname, tableName); - table_sql = {attributeSql', {['PRIMARY KEY (`' strjoin(primaryFields, '`,`') '`)']}, foreignKeySql', indexSql'}; + % create declaration + create_sql = sprintf('CREATE TABLE `%s`.`%s` (\n', table_instance.schema.dbname,... + tableName); + % add attribute, primary key, foreign key, and index declaration + assert(~isempty(primaryFields), 'table must have a primary key') + table_sql = {attributeSql', {['PRIMARY KEY (`' strjoin(primaryFields, '`,`') ... + '`)']}, foreignKeySql', indexSql'}; table_sql = sprintf([strjoin(cat(1, table_sql{:}), ',\n') '\n']); + % finish the declaration engine_sql = sprintf(') ENGINE = InnoDB, COMMENT "%s"', tableInfo.comment); sql = sprintf('%s%s%s', create_sql, table_sql, engine_sql); @@ -194,12 +184,6 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name fprintf(sql) fprintf \n\n\n end - % table_comment => tableInfo.comment - % primary_key => tableInfo.primary_key - % attribute_sql => tableInfo.attribute_sql - % foreign_key_sql => tableInfo.foreign_key_sql - % index_sql => tableInfo.index_sql - % external_stores => tableInfo.external_stores function fieldInfo = parseAttrDef(line) % fieldInfo = PARSEATTRDEF(line) @@ -316,8 +300,8 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name hash = dj.internal.shorthash([{hash rel.fullTableName} newattrs]); fk_sql = sprintf(... ['%sCONSTRAINT `%s` FOREIGN KEY (%s) REFERENCES %s (%s) ' ... - 'ON UPDATE CASCADE ON DELETE RESTRICT'], fk_sql, hash, backquotedList(fkattrs), ... - rel.fullTableName, backquotedList(rel.primaryKey)); + 'ON UPDATE CASCADE ON DELETE RESTRICT'], fk_sql, hash, ... + backquotedList(fkattrs), rel.fullTableName, backquotedList(rel.primaryKey)); end function [field, foreignKeySql] = substituteSpecialType(field, category, foreignKeySql) @@ -330,7 +314,10 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name elseif any(strcmpi(category, dj.internal.Declare.EXTERNAL_TYPES)) field.store = field.type((strfind(field.type,'@')+1):end); field.type = dj.internal.Declare.UUID_DATA_TYPE; - foreignKeySql = [foreignKeySql, sprintf('FOREIGN KEY (`%s`) REFERENCES `{database}`.`%s_%s` (`hash`) ON UPDATE RESTRICT ON DELETE RESTRICT', field.name, dj.internal.Declare.EXTERNAL_TABLE_ROOT, field.store)]; %#ok + foreignKeySql = [foreignKeySql, sprintf( ... + ['FOREIGN KEY (`%s`) REFERENCES `{database}`.`%s_%s` (`hash`) ON ' ... + 'UPDATE RESTRICT ON DELETE RESTRICT'], field.name, ... + dj.internal.Declare.EXTERNAL_TABLE_ROOT, field.store)]; %#ok end end @@ -363,7 +350,8 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name store = []; if any(strcmpi(category, dj.internal.Declare.SPECIAL_TYPES)) field.comment = [':' strip(field.type) ':' field.comment]; - [field, foreignKeySql] = dj.internal.Declare.substituteSpecialType(field, category, foreignKeySql); + [field, foreignKeySql] = dj.internal.Declare.substituteSpecialType(field, ... + category, foreignKeySql); if isfield(field, 'store') store = field.store; end diff --git a/+dj/+internal/External.m b/+dj/+internal/External.m deleted file mode 100644 index 40dced10..00000000 --- a/+dj/+internal/External.m +++ /dev/null @@ -1,265 +0,0 @@ -% dj.internal.External - an external static method class. -classdef External - methods (Static) - function config = buildConfig(config, validation_config, store_name) - function validateInput(address, target) - for k=1:numel(fieldnames(target)) - fn = fieldnames(target); - address{end+1} = '.'; - address{end+1} = fn{k}; - if ~isstruct(target.(fn{k})) - subscript = substruct(address{:}); - try - value = subsref(config, subscript); - vconfig = subsref(validation_config, subscript); - type_check = vconfig.type_check; - if ~type_check(value) - % Throw error for config that fails type validation - error('DataJoint:StoreConfig:WrongType', ... - 'Unexpected type `%s` for config `%s` in store `%s`. Expecting `%s`.', class(value), strjoin(address, ''), store_name, char(type_check)); - end - catch ME - if strcmp(ME.identifier,'MATLAB:nonExistentField') - % Throw error for extra config - error('DataJoint:StoreConfig:ExtraConfig', ... - 'Unexpected additional config `%s` specified in store `%s`.', strjoin(address, ''), store_name); - else - rethrow(ME); - end - end - else - validateInput(address, target.(fn{k})); - end - address(end) = []; - address(end) = []; - end - end - function validateConfig(address, target) - for k=1:numel(fieldnames(target)) - fn = fieldnames(target); - address{end+1} = '.'; - address{end+1} = fn{k}; - if any(strcmp('required',fieldnames(target))) - address(end) = []; - address(end) = []; - subscript = substruct(address{:}); - vconfig = subsref(validation_config, subscript); - required = vconfig.required; - try - value = subsref(config, subscript); - catch ME - if required && strcmp(ME.identifier,'MATLAB:nonExistentField') - % Throw error for required config - error('DataJoint:StoreConfig:MissingRequired', ... - 'Missing required config `%s` in store `%s`.', strjoin(address, ''), store_name); - elseif strcmp(ME.identifier,'MATLAB:nonExistentField') - % Set default for optional config - default = vconfig.default; - config = subsasgn(config, subscript, default); - end - end - break; - else - validateConfig(address, target.(fn{k})); - end - address(end) = []; - address(end) = []; - end - end - validateInput({}, config); - validateConfig({}, validation_config); - end - function store_targets = test() - % test correct multi - c = struct('local', ... - struct('store_config', ... - struct(... - 'protocol', 'file', ... - 'location', '/tmp/raphael'... - ), 'type_config', ... - struct(... - 'blob', struct(... - 'subfolding', [3,4], ... - 'cache', '/tmp/cache'... - )... - )... - ), ... - 'remote', ... - struct('store_config', ... - struct(... - 'protocol', 'file', ... - 'location', '/tmp/john'... - )... - )... - ); - -% % test incorrect string used -% c = struct('local', ... -% struct('store_config', ... -% struct(... -% 'protocol', "file", ... -% 'location', '/tmp/raphael'... -% ), 'type_config', ... -% struct(... -% 'blob', struct(... -% 'subfolding', [3,4], ... -% 'cache', '/tmp/cache'... -% )... -% )... -% ), ... -% 'remote', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', '/tmp/john'... -% )... -% )... -% ); - - % % test store not exists -% c = struct('local', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', '/tmp/raphael'... -% ), 'type_config', ... -% struct(... -% 'blob', struct(... -% 'subfolding', [3,4], ... -% 'cache', '/tmp/cache'... -% )... -% )... -% ), ... -% 'remote', ... -% struct('store_config', ... -% struct(... -% 'protocol', 's3', ... -% 'location', '/tmp/john'... -% )... -% )... -% ); - - % % no required config -% c = struct('local', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', '/tmp/raphael'... -% ), 'type_config', ... -% struct(... -% 'blob', struct(... -% 'subfolding', [3,4], ... -% 'cache', '/tmp/cache'... -% )... -% )... -% ), ... -% 'remote', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file' ... -% )... -% )... -% ); - - - % % primary config invalid type -% c = struct('local', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', '/tmp/raphael'... -% ), 'type_config', ... -% struct(... -% 'blob', struct(... -% 'subfolding', [3,4], ... -% 'cache', '/tmp/cache'... -% )... -% )... -% ), ... -% 'remote', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', 10 ... -% )... -% )... -% ); - - % % secondary config invalid type -% c = struct('local', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', '/tmp/raphael'... -% ), 'type_config', ... -% struct(... -% 'blob', struct(... -% 'subfolding', [3,4.2], ... -% 'cache', '/tmp/cache'... -% )... -% )... -% ), ... -% 'remote', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', '/tmp/john'... -% )... -% )... -% ); - - % % extra config -% c = struct('local', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', '/tmp/raphael'... -% ), 'type_config', ... -% struct(... -% 'blob', struct(... -% 'subfolding', [3,4], ... -% 'cache', '/tmp/cache',... -% 'stage', '/tmp/stage'... -% )... -% )... -% ), ... -% 'remote', ... -% struct('store_config', ... -% struct(... -% 'protocol', 'file', ... -% 'location', '/tmp/john'... -% )... -% )... -% ); - - store_map = fieldnames(c); - store_targets = struct(); - for k=1:numel(store_map) - assert(isstruct(c.(store_map{k})), 'Store `%s` not configured as struct.', store_map{k}); - assert(any(strcmp('store_config', fieldnames(c.(store_map{k})))), 'Store `%s` missing `store_config` key.', store_map{k}); - assert(isstruct(c.(store_map{k}).store_config), 'Store `%s` set `store_config` as `%s` but expecting `struct`.', store_map{k}, class(c.(store_map{k}).store_config)); - assert(any(strcmp('protocol', fieldnames(c.(store_map{k}).store_config))), 'Store `%s` missing `store_config.protocol` key.', store_map{k}); - if isstring(c.(store_map{k}).store_config.protocol) - storePlugin = char(c.(store_map{k}).store_config.protocol); - else - assert(ischar(c.(store_map{k}).store_config.protocol), 'Store `%s` set `store_config.protocol` as `%s` but expecting `char||string`.', store_map{k}, class(c.(store_map{k}).store_config.protocol)); - storePlugin = c.(store_map{k}).store_config.protocol; - end - - storePlugin(1) = upper(storePlugin(1)); - try - config = dj.internal.External.buildConfig(c.(store_map{k}), dj.store_plugins.(storePlugin).validation_config, store_map{k}); - store_targets.(store_map{k}) = dj.store_plugins.(storePlugin)(config); - catch ME - if strcmp(ME.identifier,'MATLAB:undefinedVarOrClass') - % Throw error if plugin not found - error('DataJoint:StorePlugin:Missing', ... - 'Missing store plugin `%s`.', storePlugin); - else - rethrow(ME); - end - end - end - end - end -end diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m index 8ca60b5c..26106489 100644 --- a/+dj/+internal/ExternalTable.m +++ b/+dj/+internal/ExternalTable.m @@ -16,19 +16,27 @@ self.connection = connection; stores = dj.config('stores'); assert(isstruct(stores.(store)), 'Store `%s` not configured as struct.', store); - assert(any(strcmp('store_config', fieldnames(stores.(store)))), 'Store `%s` missing `store_config` key.', store); - assert(isstruct(stores.(store).store_config), 'Store `%s` set `store_config` as `%s` but expecting `struct`.', store, class(stores.(store).store_config)); - assert(any(strcmp('protocol', fieldnames(stores.(store).store_config))), 'Store `%s` missing `store_config.protocol` key.', store); + assert(any(strcmp('store_config', fieldnames(stores.(store)))), ... + 'Store `%s` missing `store_config` key.', store); + assert(isstruct(stores.(store).store_config), ... + 'Store `%s` set `store_config` as `%s` but expecting `struct`.', store, ... + class(stores.(store).store_config)); + assert(any(strcmp('protocol', fieldnames(stores.(store).store_config))), ... + 'Store `%s` missing `store_config.protocol` key.', store); if isstring(stores.(store).store_config.protocol) storePlugin = char(stores.(store).store_config.protocol); else - assert(ischar(stores.(store).store_config.protocol), 'Store `%s` set `store_config.protocol` as `%s` but expecting `char||string`.', store, class(stores.(store).store_config.protocol)); + assert(ischar(stores.(store).store_config.protocol), ... + ['Store `%s` set `store_config.protocol` as `%s` but ' ... + 'expecting `char||string`.'], store, ... + class(stores.(store).store_config.protocol)); storePlugin = stores.(store).store_config.protocol; end storePlugin(1) = upper(storePlugin(1)); try - config = buildConfig(stores.(store), dj.store_plugins.(storePlugin).validation_config, store); + config = buildConfig(stores.(store), ... + dj.store_plugins.(storePlugin).validation_config, store); self.spec = dj.store_plugins.(storePlugin)(config); catch ME if strcmp(ME.identifier,'MATLAB:undefinedVarOrClass') @@ -68,7 +76,8 @@ function create(self) end function uuid_path = make_uuid_path(self, uuid, suffix) uuid = strrep(uuid, '-', ''); - uuid_path = self.spec.make_external_filepath([self.schema.dbname '/' strjoin(subfold(uuid, self.spec.blob_config.subfolding), '/') '/' uuid suffix]); + uuid_path = self.spec.make_external_filepath([self.schema.dbname '/' strjoin(... + subfold(uuid, self.spec.blob_config.subfolding), '/') '/' uuid suffix]); end function uuid = upload_buffer(self, blob) packed_cell = mym('serialize {M}', blob); @@ -76,11 +85,14 @@ function create(self) uuid = dj.lib.DataHash(packed_cell{1}, 'bin', 'hex', 'MD5'); self.spec.upload_buffer(packed_cell{1}, self.make_uuid_path(uuid, '')); % insert tracking info - sql = sprintf('INSERT INTO %s (hash, size) VALUES (X''%s'', %i) ON DUPLICATE KEY UPDATE timestamp=CURRENT_TIMESTAMP', self.fullTableName, uuid, length(packed_cell{1})); + sql = sprintf(['INSERT INTO %s (hash, size) VALUES (X''%s'', %i) ON ' ... + 'DUPLICATE KEY UPDATE timestamp=CURRENT_TIMESTAMP'], self.fullTableName, ... + uuid, length(packed_cell{1})); self.connection.query(sql); end function blob = download_buffer(self, uuid) - blob = mym('deserialize', uint8(self.spec.download_buffer(self.make_uuid_path(uuid, '')))); + blob = mym('deserialize', uint8(self.spec.download_buffer(self.make_uuid_path(... + uuid, '')))); end function refs = references(self) sql = {... @@ -93,11 +105,15 @@ function create(self) end function used = used(self) ref = self.references; - used = self & cellfun(@(column, table) sprintf('hex(`hash`) in (select hex(`%s`) from %s)', column, table), ref.column_name, ref.referencing_table, 'UniformOutput', false); + used = self & cellfun(@(column, table) sprintf(... + 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... + ref.column_name, ref.referencing_table, 'UniformOutput', false); end function unused = unused(self) ref = self.references; - unused = self - cellfun(@(column, table) sprintf('hex(`hash`) in (select hex(`%s`) from %s)', column, table), ref.column_name, ref.referencing_table, 'UniformOutput', false); + unused = self - cellfun(@(column, table) sprintf(... + 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... + ref.column_name, ref.referencing_table, 'UniformOutput', false); end function paths = fetch_external_paths(self, varargin) external_content = fetch(self, 'hash', 'attachment_name', 'filepath', varargin{:}); @@ -130,7 +146,8 @@ function delete(self, delete_external_files, limit) end end function folded_array = subfold(name, folds) - folded_array = arrayfun(@(len,idx,s) name(s-len+1:s), folds, 1:length(folds), cumsum(folds), 'UniformOutput', false); + folded_array = arrayfun(@(len,idx,s) name(s-len+1:s), folds, 1:length(folds), ... + cumsum(folds), 'UniformOutput', false); end function config = buildConfig(config, validation_config, store_name) function validateInput(address, target) @@ -147,13 +164,16 @@ function validateInput(address, target) if ~type_check(value) % Throw error for config that fails type validation error('DataJoint:StoreConfig:WrongType', ... - 'Unexpected type `%s` for config `%s` in store `%s`. Expecting `%s`.', class(value), strjoin(address, ''), store_name, char(type_check)); + ['Unexpected type `%s` for config `%s` in store `%s`. ' ... + 'Expecting `%s`.'], class(value), strjoin(address, ''), ... + store_name, char(type_check)); end catch ME if strcmp(ME.identifier,'MATLAB:nonExistentField') % Throw error for extra config error('DataJoint:StoreConfig:ExtraConfig', ... - 'Unexpected additional config `%s` specified in store `%s`.', strjoin(address, ''), store_name); + 'Unexpected additional config `%s` specified in store `%s`.', ... + strjoin(address, ''), store_name); else rethrow(ME); end @@ -182,7 +202,8 @@ function validateConfig(address, target) if required && strcmp(ME.identifier,'MATLAB:nonExistentField') % Throw error for required config error('DataJoint:StoreConfig:MissingRequired', ... - 'Missing required config `%s` in store `%s`.', strjoin(address, ''), store_name); + 'Missing required config `%s` in store `%s`.', ... + strjoin(address, ''), store_name); elseif strcmp(ME.identifier,'MATLAB:nonExistentField') % Set default for optional config default = vconfig.default; diff --git a/+dj/+internal/GeneralRelvar.m b/+dj/+internal/GeneralRelvar.m index fab70440..a31a2f0f 100644 --- a/+dj/+internal/GeneralRelvar.m +++ b/+dj/+internal/GeneralRelvar.m @@ -433,10 +433,6 @@ function restrict(self, varargin) function ret = minus(self, arg) % MINUS -- relational antijoin -% if iscell(arg) -% throwAsCaller(MException('DataJoint:invalidOperator',... -% 'Antijoin only accepts single restrictions')) -% end ret = self.copy; ret.restrict('not', arg) end @@ -946,7 +942,9 @@ case isa(cond, 'dj.internal.GeneralRelvar') for j = 1:length(data) if ~isempty(data(j).(attr(i).name)) uuid = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); - data(j).(attr(i).name) = connection.schemas.(attr(i).database).external.tables.(attr(i).store).download_buffer(uuid); + data(j).(attr(i).name) = connection.schemas.(... + attr(i).database).external.tables.(... + attr(i).store).download_buffer(uuid); end end end diff --git a/+dj/+internal/Header.m b/+dj/+internal/Header.m index 09bff794..6faff671 100644 --- a/+dj/+internal/Header.m +++ b/+dj/+internal/Header.m @@ -110,7 +110,7 @@ if any(strcmpi(category, dj.internal.Declare.EXTERNAL_TYPES)) attrs.isExternal(i) = true; attrs.store{i} = attrs.type{i}(regexp(attrs.type{i}, '@', 'once')+1:end); - end + end % strip field lengths off integer types attrs.type{i} = regexprep(sprintf('%s',attrs.type{i}), ... '((tiny|small|medium|big)?int)\(\d+\)','$1'); diff --git a/+dj/+internal/Table.m b/+dj/+internal/Table.m index bda2bbd7..ea26d037 100755 --- a/+dj/+internal/Table.m +++ b/+dj/+internal/Table.m @@ -248,7 +248,8 @@ function erd(self, up, down) % get foreign keys fk = self.schema.conn.foreignKeys; if ~isempty(fk) - fk = fk(arrayfun(@(s) strcmp(s.from, self.fullTableName) && ~contains(s.ref, '~external'), fk)); + fk = fk(arrayfun(@(s) strcmp(s.from, self.fullTableName) && ... + ~contains(s.ref, '~external'), fk)); end attributes_thus_far = {}; @@ -335,8 +336,8 @@ function addAttribute(self, definition, after) after = [' ' after]; end - [sql, ~, ~] = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... - definition), NaN); + [sql, ~, ~] = dj.internal.Declare.compileAttribute(... + dj.internal.Declare.parseAttrDef(definition), NaN); self.alter(sprintf('ADD COLUMN %s%s', sql, after)); end @@ -350,8 +351,8 @@ function alterAttribute(self, attrName, newDefinition) % dj.Table/alterAttribute - Modify the definition of attribute % attrName using its new line from the table definition % "newDefinition" - [sql, ~, ~] = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... - newDefinition), NaN); + [sql, ~, ~] = dj.internal.Declare.compileAttribute(... + dj.internal.Declare.parseAttrDef(newDefinition), NaN); self.alter(sprintf('CHANGE COLUMN `%s` %s', attrName, sql)); end @@ -477,7 +478,8 @@ function syncDef(self) fprintf('File %s.m is not found\n', self.className); else if dj.config('safemode') ... - && ~strcmpi('yes', dj.internal.ask(sprintf('Update the table definition and class definition in %s?',path))) + && ~strcmpi('yes', dj.internal.ask(sprintf(... + 'Update the table definition and class definition in %s?',path))) disp 'No? Table definition left untouched.' else % read old file diff --git a/+dj/+store_plugins/@File/File.m b/+dj/+store_plugins/File.m similarity index 84% rename from +dj/+store_plugins/@File/File.m rename to +dj/+store_plugins/File.m index cd08b466..07983a0e 100644 --- a/+dj/+store_plugins/@File/File.m +++ b/+dj/+store_plugins/File.m @@ -63,17 +63,3 @@ function upload_buffer(buffer, external_filepath) end end end - - -%x make_external_filepath -- (validation) (for file use filesystem style -% directly, for s3 convert to posix path) - -%x upload_file -- (for uploading filepath, attach) -%x download_file -- (for downloading filepath, attach) -%x upload_buffer -- (for uploading blob) -%x download_buffer -- (for downloading blob) -%x remove_object -- (for deleting object from storage) -%x exists -- (verify if object exists in storage) - - - diff --git a/.gitignore b/.gitignore index 561b275d..72042dfc 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ mym/ notebook *getSchema.m docker-compose.yml -.vscode \ No newline at end of file +.vscode +matlab.prf \ No newline at end of file diff --git a/matlab.prf b/matlab.prf deleted file mode 100644 index c2801867..00000000 --- a/matlab.prf +++ /dev/null @@ -1,12 +0,0 @@ -#MATLAB Preferences -#Thu Feb 13 17:52:58 UTC 2020 -MatlabExitConfirm=Bfalse -RLOrderB2_WB_2014b_1=S0:1: -RLHiddenB2_WB_2014b_1=I16376 -CommandWindowClearConfirmation=Btrue -CurrentKeyBindingSet=SWindows -RLWidthB2_WB_2014b_1_1=I130 -RLWidthB2_WB_2014b_1_0=I130 -RLPrevInitB2_WB_2014b_1=Btrue -NV_08002751cda240809722=S1584207724571 -HasAppendedMlxToFileExtensionsKey=Btrue \ No newline at end of file From 87fb0c4379c384ec227d7f53aabfea2f9a07b7f6 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 3 Mar 2020 16:04:21 -0600 Subject: [PATCH 10/21] Clean up2. --- +dj/+internal/Declare.m | 2 +- +dj/+internal/Table.m | 4 ++-- +dj/Relvar.m | 9 ++++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index fc8ffb17..98ce7e62 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -290,7 +290,7 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name rel.tableHeader.names)); fieldInfo.name = newattrs{i}; fieldInfo.nullabe = ~inKey; % nonprimary references are nullable - [attr_sql, ~, ~] = dj.internal.Declare.compileAttribute(fieldInfo, NaN); + [attr_sql, ~, ~] = dj.internal.Declare.compileAttribute(fieldInfo, []); all_attr_sql = sprintf('%s%s,\n', all_attr_sql, attr_sql); end all_attr_sql = all_attr_sql(1:end-2); diff --git a/+dj/+internal/Table.m b/+dj/+internal/Table.m index ea26d037..1751bd06 100755 --- a/+dj/+internal/Table.m +++ b/+dj/+internal/Table.m @@ -337,7 +337,7 @@ function addAttribute(self, definition, after) end [sql, ~, ~] = dj.internal.Declare.compileAttribute(... - dj.internal.Declare.parseAttrDef(definition), NaN); + dj.internal.Declare.parseAttrDef(definition), []); self.alter(sprintf('ADD COLUMN %s%s', sql, after)); end @@ -352,7 +352,7 @@ function alterAttribute(self, attrName, newDefinition) % attrName using its new line from the table definition % "newDefinition" [sql, ~, ~] = dj.internal.Declare.compileAttribute(... - dj.internal.Declare.parseAttrDef(newDefinition), NaN); + dj.internal.Declare.parseAttrDef(newDefinition), []); self.alter(sprintf('CHANGE COLUMN `%s` %s', attrName, sql)); end diff --git a/+dj/Relvar.m b/+dj/Relvar.m index 2395a203..050645f1 100755 --- a/+dj/Relvar.m +++ b/+dj/Relvar.m @@ -27,7 +27,8 @@ self.schema.conn.query(sprintf('DELETE FROM %s', self.sql)) count = []; if nargin > 1 && getCount - count = self.schema.conn.query(sprintf('SELECT count(*) as count FROM %s', self.sql)).count; + count = self.schema.conn.query(sprintf('SELECT count(*) as count FROM %s', ... + self.sql)).count; end end @@ -108,7 +109,8 @@ function cleanup(self) rels = rels(counts>0); % confirm and delete - if dj.config('safemode') && ~strcmpi('yes',dj.internal.ask('Proceed to delete?')) + if dj.config('safemode') && ~strcmpi('yes', ... + dj.internal.ask('Proceed to delete?')) disp 'delete canceled' else self.schema.conn.startTransaction @@ -236,7 +238,8 @@ function insert(self, tuples, command) placeholder = '"{M}"'; else placeholder = '"{B}"'; - value = self.schema.external.tables.(header.attributes(attr_idx).store).upload_buffer(value); + value = self.schema.external.tables.(... + header.attributes(attr_idx).store).upload_buffer(value); hexstring = value'; reshapedString = reshape(hexstring,2,16); hexMtx = reshapedString.'; From 7cd9e2df9a1a7d319dc139c36a73eac991d8b8cc Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Wed, 4 Mar 2020 14:28:54 -0600 Subject: [PATCH 11/21] Add discussed config changes to external. --- +dj/+internal/ExternalTable.m | 57 ++++++++++++++++-------- +dj/+store_plugins/File.m | 81 +++++++++++++++++++++++------------ 2 files changed, 92 insertions(+), 46 deletions(-) diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m index 26106489..d37d3ed5 100644 --- a/+dj/+internal/ExternalTable.m +++ b/+dj/+internal/ExternalTable.m @@ -16,23 +16,34 @@ self.connection = connection; stores = dj.config('stores'); assert(isstruct(stores.(store)), 'Store `%s` not configured as struct.', store); - assert(any(strcmp('store_config', fieldnames(stores.(store)))), ... - 'Store `%s` missing `store_config` key.', store); - assert(isstruct(stores.(store).store_config), ... - 'Store `%s` set `store_config` as `%s` but expecting `struct`.', store, ... - class(stores.(store).store_config)); - assert(any(strcmp('protocol', fieldnames(stores.(store).store_config))), ... - 'Store `%s` missing `store_config.protocol` key.', store); - if isstring(stores.(store).store_config.protocol) - storePlugin = char(stores.(store).store_config.protocol); +% assert(any(strcmp('store_config', fieldnames(stores.(store)))), ... +% 'Store `%s` missing `store_config` key.', store); +% assert(isstruct(stores.(store).store_config), ... +% 'Store `%s` set `store_config` as `%s` but expecting `struct`.', store, ... +% class(stores.(store).store_config)); +% assert(any(strcmp('protocol', fieldnames(stores.(store).store_config))), ... +% 'Store `%s` missing `store_config.protocol` key.', store); + assert(any(strcmp('protocol', fieldnames(stores.(store)))), ... + 'Store `%s` missing `protocol` key.', store); +% if isstring(stores.(store).store_config.protocol) +% storePlugin = char(stores.(store).store_config.protocol); +% else +% assert(ischar(stores.(store).store_config.protocol), ... +% ['Store `%s` set `store_config.protocol` as `%s` but ' ... +% 'expecting `char||string`.'], store, ... +% class(stores.(store).store_config.protocol)); +% storePlugin = stores.(store).store_config.protocol; +% end + if isstring(stores.(store).protocol) + storePlugin = char(stores.(store).protocol); else - assert(ischar(stores.(store).store_config.protocol), ... - ['Store `%s` set `store_config.protocol` as `%s` but ' ... + assert(ischar(stores.(store).protocol), ... + ['Store `%s` set `protocol` as `%s` but ' ... 'expecting `char||string`.'], store, ... - class(stores.(store).store_config.protocol)); - storePlugin = stores.(store).store_config.protocol; + class(stores.(store).protocol)); + storePlugin = stores.(store).protocol; end - + storePlugin(1) = upper(storePlugin(1)); try config = buildConfig(stores.(store), ... @@ -77,7 +88,7 @@ function create(self) function uuid_path = make_uuid_path(self, uuid, suffix) uuid = strrep(uuid, '-', ''); uuid_path = self.spec.make_external_filepath([self.schema.dbname '/' strjoin(... - subfold(uuid, self.spec.blob_config.subfolding), '/') '/' uuid suffix]); + subfold(uuid, self.spec.type_config.subfolding), '/') '/' uuid suffix]); end function uuid = upload_buffer(self, blob) packed_cell = mym('serialize {M}', blob); @@ -190,24 +201,32 @@ function validateConfig(address, target) fn = fieldnames(target); address{end+1} = '.'; address{end+1} = fn{k}; - if any(strcmp('required',fieldnames(target))) + if any(strcmp('mode',fieldnames(target))) address(end) = []; address(end) = []; subscript = substruct(address{:}); vconfig = subsref(validation_config, subscript); - required = vconfig.required; + mode = vconfig.mode; + if mode(config.datajoint_type)==-1 + % Throw error for rejected config + error('DataJoint:StoreConfig:ExtraConfig', ... + 'Incompatible additional config `%s` specified in store `%s`.', ... + strjoin(address, ''), store_name); + end try value = subsref(config, subscript); catch ME - if required && strcmp(ME.identifier,'MATLAB:nonExistentField') + if mode(config.datajoint_type)==1 && strcmp(ME.identifier,'MATLAB:nonExistentField') % Throw error for required config error('DataJoint:StoreConfig:MissingRequired', ... 'Missing required config `%s` in store `%s`.', ... strjoin(address, ''), store_name); - elseif strcmp(ME.identifier,'MATLAB:nonExistentField') + elseif mode(config.datajoint_type)==0 && strcmp(ME.identifier,'MATLAB:nonExistentField') % Set default for optional config default = vconfig.default; config = subsasgn(config, subscript, default); + else + rethrow(ME); end end break; diff --git a/+dj/+store_plugins/File.m b/+dj/+store_plugins/File.m index 07983a0e..7da6f65e 100644 --- a/+dj/+store_plugins/File.m +++ b/+dj/+store_plugins/File.m @@ -1,37 +1,58 @@ % dj.internal.File - an external storage class for local file stores. classdef File properties (Hidden, Constant) +% validation_config = struct( ... +% 'store_config', struct( ... +% 'protocol', struct( ... +% 'required', true, ... +% 'type_check', @(x) ischar(x) ... +% ), ... +% 'location', struct( ... +% 'required', true, ... +% 'type_check', @(x) ischar(x) ... +% ) ... +% ), ... +% 'type_config', struct( ... +% 'blob', struct( ... +% 'subfolding', struct( ... +% 'required', false, ... +% 'type_check', @(x) all(floor(x) == x), ... +% 'default', [2, 2] ... +% ), ... +% 'cache', struct( ... +% 'required', false, ... +% 'type_check', @(x) ischar(x), ... +% 'default', [] ... +% ) ... +% ) ... +% ) ... +% ) + % mode = -1(reject), 0(optional), 1(require) validation_config = struct( ... - 'store_config', struct( ... - 'protocol', struct( ... - 'required', true, ... - 'type_check', @(x) ischar(x) ... + 'protocol', struct( ... + 'mode', @(datajoint_type) 1, ... + 'type_check', @(self) ischar(self) ... ), ... - 'location', struct( ... - 'required', true, ... - 'type_check', @(x) ischar(x) ... - ) ... - ), ... - 'type_config', struct( ... - 'blob', struct( ... - 'subfolding', struct( ... - 'required', false, ... - 'type_check', @(x) all(floor(x) == x), ... - 'default', [2, 2] ... - ), ... - 'cache', struct( ... - 'required', false, ... - 'type_check', @(x) ischar(x), ... - 'default', [] ... - ) ... + 'datajoint_type', struct( ... + 'mode', @(datajoint_type) 1, ... + 'type_check', @(self) ischar(self) && any(strcmpi(self, {'blob'}))... + ), ... + 'location', struct( ... + 'mode', @(datajoint_type) 1, ... + 'type_check', @(self) ischar(self) ... + ), ... + 'subfolding', struct( ... + 'mode', @(datajoint_type) -1 + any(strcmpi(datajoint_type, {'blob'})), ... + 'type_check', @(self) all(floor(self) == self), ... + 'default', [2, 2] ... ) ... - ) ... - ) + ) end properties protocol + datajoint_type location - blob_config + type_config end methods (Static) function result = exists(external_filepath) @@ -54,9 +75,15 @@ function upload_buffer(buffer, external_filepath) end methods function self = File(config) - self.protocol = config.store_config.protocol; - self.location = config.store_config.location; - self.blob_config = config.type_config.blob; +% self.protocol = config.store_config.protocol; +% self.location = config.store_config.location; + self.protocol = config.protocol; + self.datajoint_type = config.datajoint_type; + self.location = config.location; + self.type_config = struct(); + if strcmpi(self.datajoint_type, 'blob') + self.type_config.subfolding = config.subfolding; + end end function external_filepath = make_external_filepath(self, relative_filepath) external_filepath = [self.location '/' relative_filepath]; From 393e3e3dd82c5dd44a05d31462aa3b2f1880a5c0 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Wed, 4 Mar 2020 15:19:25 -0600 Subject: [PATCH 12/21] Add backward compatible configuration to current dj-python. --- +dj/+internal/ExternalTable.m | 27 +++++++++++++++++++-------- +dj/+store_plugins/File.m | 32 ++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m index d37d3ed5..9dd57155 100644 --- a/+dj/+internal/ExternalTable.m +++ b/+dj/+internal/ExternalTable.m @@ -1,6 +1,9 @@ % dj.internal.External - an external static method class. % classdef ExternalTable < dj.internal.Table classdef ExternalTable < dj.Relvar + properties (Hidden, Constant) + BACKWARD_SUPPORT = true + end properties store spec @@ -48,16 +51,19 @@ try config = buildConfig(stores.(store), ... dj.store_plugins.(storePlugin).validation_config, store); - self.spec = dj.store_plugins.(storePlugin)(config); catch ME if strcmp(ME.identifier,'MATLAB:undefinedVarOrClass') % Throw error if plugin not found error('DataJoint:StorePlugin:Missing', ... 'Missing store plugin `%s`.', storePlugin); + elseif dj.internal.ExternalTable.BACKWARD_SUPPORT && contains(ME.identifier,'DataJoint:StoreConfig') + config = buildConfig(stores.(store), ... + dj.store_plugins.(storePlugin).backward_validation_config, store); else rethrow(ME); end end + self.spec = dj.store_plugins.(storePlugin)(config); end function create(self) % parses the table declration and declares the table @@ -207,21 +213,20 @@ function validateConfig(address, target) subscript = substruct(address{:}); vconfig = subsref(validation_config, subscript); mode = vconfig.mode; - if mode(config.datajoint_type)==-1 - % Throw error for rejected config - error('DataJoint:StoreConfig:ExtraConfig', ... - 'Incompatible additional config `%s` specified in store `%s`.', ... - strjoin(address, ''), store_name); + if any(strcmp('datajoint_type', fieldnames(config))) + mode_result = mode(config.datajoint_type); + else + mode_result = mode('not_necessary'); end try value = subsref(config, subscript); catch ME - if mode(config.datajoint_type)==1 && strcmp(ME.identifier,'MATLAB:nonExistentField') + if mode_result==1 && strcmp(ME.identifier,'MATLAB:nonExistentField') % Throw error for required config error('DataJoint:StoreConfig:MissingRequired', ... 'Missing required config `%s` in store `%s`.', ... strjoin(address, ''), store_name); - elseif mode(config.datajoint_type)==0 && strcmp(ME.identifier,'MATLAB:nonExistentField') + elseif mode_result==0 && strcmp(ME.identifier,'MATLAB:nonExistentField') % Set default for optional config default = vconfig.default; config = subsasgn(config, subscript, default); @@ -229,6 +234,12 @@ function validateConfig(address, target) rethrow(ME); end end + if mode_result==-1 + % Throw error for rejected config + error('DataJoint:StoreConfig:ExtraConfig', ... + 'Incompatible additional config `%s` specified in store `%s`.', ... + strjoin(address, ''), store_name); + end break; else validateConfig(address, target.(fn{k})); diff --git a/+dj/+store_plugins/File.m b/+dj/+store_plugins/File.m index 7da6f65e..df9cbb0b 100644 --- a/+dj/+store_plugins/File.m +++ b/+dj/+store_plugins/File.m @@ -29,13 +29,13 @@ % ) % mode = -1(reject), 0(optional), 1(require) validation_config = struct( ... - 'protocol', struct( ... + 'datajoint_type', struct( ... 'mode', @(datajoint_type) 1, ... - 'type_check', @(self) ischar(self) ... + 'type_check', @(self) ischar(self) && any(strcmpi(self, {'blob', 'filepath'}))... ), ... - 'datajoint_type', struct( ... + 'protocol', struct( ... 'mode', @(datajoint_type) 1, ... - 'type_check', @(self) ischar(self) && any(strcmpi(self, {'blob'}))... + 'type_check', @(self) ischar(self) ... ), ... 'location', struct( ... 'mode', @(datajoint_type) 1, ... @@ -47,6 +47,21 @@ 'default', [2, 2] ... ) ... ) + backward_validation_config = struct( ... + 'protocol', struct( ... + 'mode', @(unused) 1, ... + 'type_check', @(self) ischar(self) ... + ), ... + 'location', struct( ... + 'mode', @(unused) 1, ... + 'type_check', @(self) ischar(self) ... + ), ... + 'subfolding', struct( ... + 'mode', @(unused) 0, ... + 'type_check', @(self) all(floor(self) == self), ... + 'default', [2, 2] ... + ) ... + ) end properties protocol @@ -78,11 +93,16 @@ function upload_buffer(buffer, external_filepath) % self.protocol = config.store_config.protocol; % self.location = config.store_config.location; self.protocol = config.protocol; - self.datajoint_type = config.datajoint_type; self.location = config.location; self.type_config = struct(); - if strcmpi(self.datajoint_type, 'blob') + + if dj.internal.ExternalTable.BACKWARD_SUPPORT && ~any(strcmp('datajoint_type', fieldnames(config))) self.type_config.subfolding = config.subfolding; + else + self.datajoint_type = config.datajoint_type; + if strcmpi(self.datajoint_type, 'blob') + self.type_config.subfolding = config.subfolding; + end end end function external_filepath = make_external_filepath(self, relative_filepath) From 17cb122f18a1d8de2195832e1ff5668f3f6bece3 Mon Sep 17 00:00:00 2001 From: Raphael Guzman Date: Wed, 4 Mar 2020 16:54:50 -0600 Subject: [PATCH 13/21] Fix WIN path issues. --- +dj/+store_plugins/File.m | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/+dj/+store_plugins/File.m b/+dj/+store_plugins/File.m index df9cbb0b..17816497 100644 --- a/+dj/+store_plugins/File.m +++ b/+dj/+store_plugins/File.m @@ -86,6 +86,7 @@ function upload_buffer(buffer, external_filepath) function result = download_buffer(external_filepath) fileID = fopen(external_filepath, 'r'); result = fread(fileID); + fclose(fileID); end end methods @@ -93,7 +94,7 @@ function upload_buffer(buffer, external_filepath) % self.protocol = config.store_config.protocol; % self.location = config.store_config.location; self.protocol = config.protocol; - self.location = config.location; + self.location = strrep(config.location, '\', '/'); self.type_config = struct(); if dj.internal.ExternalTable.BACKWARD_SUPPORT && ~any(strcmp('datajoint_type', fieldnames(config))) From f69a14c06b74ac760954c1d961ab88024fd03bcc Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Thu, 5 Mar 2020 08:41:52 -0600 Subject: [PATCH 14/21] Add blob caching feature with backward compatibility. --- +dj/+internal/ExternalTable.m | 45 +++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m index 9dd57155..e27798de 100644 --- a/+dj/+internal/ExternalTable.m +++ b/+dj/+internal/ExternalTable.m @@ -108,8 +108,49 @@ function create(self) self.connection.query(sql); end function blob = download_buffer(self, uuid) - blob = mym('deserialize', uint8(self.spec.download_buffer(self.make_uuid_path(... - uuid, '')))); + try + cache_folder = strrep(dj.config('blobCache'), '\', '/'); + catch ME + if strcmp(ME.identifier,'DataJoint:Config:InvalidKey') + cache_folder = []; + else + rethrow(ME); + end + end + if dj.internal.ExternalTable.BACKWARD_SUPPORT && isempty(cache_folder) + try + cache_folder = strrep(dj.config('cache'), '\', '/'); + catch ME + if strcmp(ME.identifier,'DataJoint:Config:InvalidKey') + cache_folder = []; + else + rethrow(ME); + end + end + end + blob = []; + if ~isempty(cache_folder) + cache_file = [cache_folder '/' self.schema.dbname '/' strjoin(... + subfold(uuid, self.spec.type_config.subfolding), '/') '/' uuid '']; + try + fileID = fopen(cache_file, 'r'); + result = fread(fileID); + fclose(fileID); + blob = mym('deserialize', uint8(result)); + catch + end + end + if isempty(blob) + blob_binary = uint8(self.spec.download_buffer(self.make_uuid_path(uuid, ''))); + blob = mym('deserialize', blob_binary); + if ~isempty(cache_folder) + [~,start_idx,~] = regexp(cache_file, '/', 'match', 'start', 'end'); + mkdir(cache_file(1:(start_idx(end)-1))); + fileID = fopen(cache_file, 'w'); + fwrite(fileID, blob_binary); + fclose(fileID); + end + end end function refs = references(self) sql = {... From 5ae3b81872a547ed6b6a81d1ee60d0ce86cd03ae Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Thu, 5 Mar 2020 13:24:27 -0600 Subject: [PATCH 15/21] Clean up. --- +dj/+internal/ExternalMapping.m | 6 ++- +dj/+internal/ExternalTable.m | 72 +++++++++++++++++---------------- +dj/+store_plugins/File.m | 42 +++++-------------- +dj/Connection.m | 8 ++-- +dj/Schema.m | 18 ++++++--- 5 files changed, 69 insertions(+), 77 deletions(-) diff --git a/+dj/+internal/ExternalMapping.m b/+dj/+internal/ExternalMapping.m index 05dce9a0..9dd3313d 100644 --- a/+dj/+internal/ExternalMapping.m +++ b/+dj/+internal/ExternalMapping.m @@ -1,4 +1,8 @@ -% dj.internal.External - an external static method class. +% dj.internal.ExternalMapping - The external manager contains all the tables for all external +% stores for a given schema. +% :Example: +% e = dj.internal.ExternalMapping(schema) +% external_table = e.table(store) classdef ExternalMapping < handle properties schema diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m index e27798de..dcfffde5 100644 --- a/+dj/+internal/ExternalTable.m +++ b/+dj/+internal/ExternalTable.m @@ -1,8 +1,8 @@ -% dj.internal.External - an external static method class. -% classdef ExternalTable < dj.internal.Table +% dj.internal.ExternalTable - The table tracking externally stored objects. +% Declare as dj.internal.ExternalTable(connection, store, schema) classdef ExternalTable < dj.Relvar properties (Hidden, Constant) - BACKWARD_SUPPORT = true + BACKWARD_SUPPORT_DJPY012 = true end properties store @@ -13,30 +13,14 @@ end methods function self = ExternalTable(connection, store, schema) -% curr_schema = self.schema; + % construct table using config validation criteria supplied by store plugin self.store = store; self.schema = schema; self.connection = connection; stores = dj.config('stores'); assert(isstruct(stores.(store)), 'Store `%s` not configured as struct.', store); -% assert(any(strcmp('store_config', fieldnames(stores.(store)))), ... -% 'Store `%s` missing `store_config` key.', store); -% assert(isstruct(stores.(store).store_config), ... -% 'Store `%s` set `store_config` as `%s` but expecting `struct`.', store, ... -% class(stores.(store).store_config)); -% assert(any(strcmp('protocol', fieldnames(stores.(store).store_config))), ... -% 'Store `%s` missing `store_config.protocol` key.', store); assert(any(strcmp('protocol', fieldnames(stores.(store)))), ... 'Store `%s` missing `protocol` key.', store); -% if isstring(stores.(store).store_config.protocol) -% storePlugin = char(stores.(store).store_config.protocol); -% else -% assert(ischar(stores.(store).store_config.protocol), ... -% ['Store `%s` set `store_config.protocol` as `%s` but ' ... -% 'expecting `char||string`.'], store, ... -% class(stores.(store).store_config.protocol)); -% storePlugin = stores.(store).store_config.protocol; -% end if isstring(stores.(store).protocol) storePlugin = char(stores.(store).protocol); else @@ -56,7 +40,8 @@ % Throw error if plugin not found error('DataJoint:StorePlugin:Missing', ... 'Missing store plugin `%s`.', storePlugin); - elseif dj.internal.ExternalTable.BACKWARD_SUPPORT && contains(ME.identifier,'DataJoint:StoreConfig') + elseif dj.internal.ExternalTable.BACKWARD_SUPPORT_DJPY012 && contains(... + ME.identifier,'DataJoint:StoreConfig') config = buildConfig(stores.(store), ... dj.store_plugins.(storePlugin).backward_validation_config, store); else @@ -92,11 +77,14 @@ function create(self) self.schema.reload end function uuid_path = make_uuid_path(self, uuid, suffix) + % create external path based on the uuid hash uuid = strrep(uuid, '-', ''); uuid_path = self.spec.make_external_filepath([self.schema.dbname '/' strjoin(... subfold(uuid, self.spec.type_config.subfolding), '/') '/' uuid suffix]); end + % -- BLOBS -- function uuid = upload_buffer(self, blob) + % put blob packed_cell = mym('serialize {M}', blob); % https://www.mathworks.com/matlabcentral/fileexchange/25921-getmd5 uuid = dj.lib.DataHash(packed_cell{1}, 'bin', 'hex', 'MD5'); @@ -108,6 +96,7 @@ function create(self) self.connection.query(sql); end function blob = download_buffer(self, uuid) + % get blob via uuid (with caching support) try cache_folder = strrep(dj.config('blobCache'), '\', '/'); catch ME @@ -117,7 +106,7 @@ function create(self) rethrow(ME); end end - if dj.internal.ExternalTable.BACKWARD_SUPPORT && isempty(cache_folder) + if dj.internal.ExternalTable.BACKWARD_SUPPORT_DJPY012 && isempty(cache_folder) try cache_folder = strrep(dj.config('cache'), '\', '/'); catch ME @@ -152,7 +141,9 @@ function create(self) end end end + % -- UTILITIES -- function refs = references(self) + % generator of referencing table names and their referencing columns sql = {... 'SELECT concat(''`'', table_schema, ''`.`'', table_name, ''`'') as referencing_table, column_name ' 'FROM information_schema.key_column_usage ' @@ -161,19 +152,9 @@ function create(self) sql = sprintf('%s',sql{:}); refs = self.connection.query(sql, self.plainTableName, self.schema.dbname); end - function used = used(self) - ref = self.references; - used = self & cellfun(@(column, table) sprintf(... - 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... - ref.column_name, ref.referencing_table, 'UniformOutput', false); - end - function unused = unused(self) - ref = self.references; - unused = self - cellfun(@(column, table) sprintf(... - 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... - ref.column_name, ref.referencing_table, 'UniformOutput', false); - end function paths = fetch_external_paths(self, varargin) + % generate complete external filepaths from the query. + % Each element is a cell: {uuid, path} external_content = fetch(self, 'hash', 'attachment_name', 'filepath', varargin{:}); paths = cell(length(external_content),1); for i = 1:length(external_content) @@ -185,7 +166,26 @@ function create(self) paths{i}{1} = external_content(i).hash; end end + function unused = unused(self) + % query expression for unused hashes + ref = self.references; + unused = self - cellfun(@(column, table) sprintf(... + 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... + ref.column_name, ref.referencing_table, 'UniformOutput', false); + end + function used = used(self) + % query expression for used hashes + ref = self.references; + used = self & cellfun(@(column, table) sprintf(... + 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... + ref.column_name, ref.referencing_table, 'UniformOutput', false); + end function delete(self, delete_external_files, limit) + % DELETE(self, delete_external_files, limit) + % Remove external tracking table records and optionally remove from ext storage + % self: Store Table instance. + % delete_external_files: Remove from external storage. + % limit: Limit the number of external objects to remove if ~delete_external_files delQuick(self.unused); else @@ -204,11 +204,14 @@ function delete(self, delete_external_files, limit) end end function folded_array = subfold(name, folds) + % subfolding for external storage: e.g. subfold('aBCdefg', [2, 3]) --> {'ab','cde'} folded_array = arrayfun(@(len,idx,s) name(s-len+1:s), folds, 1:length(folds), ... cumsum(folds), 'UniformOutput', false); end function config = buildConfig(config, validation_config, store_name) + % builds out store config with defaults set function validateInput(address, target) + % validates supplied config for k=1:numel(fieldnames(target)) fn = fieldnames(target); address{end+1} = '.'; @@ -244,6 +247,7 @@ function validateInput(address, target) end end function validateConfig(address, target) + % verifies if input contains all expected config for k=1:numel(fieldnames(target)) fn = fieldnames(target); address{end+1} = '.'; diff --git a/+dj/+store_plugins/File.m b/+dj/+store_plugins/File.m index 17816497..f51c1575 100644 --- a/+dj/+store_plugins/File.m +++ b/+dj/+store_plugins/File.m @@ -1,37 +1,12 @@ % dj.internal.File - an external storage class for local file stores. classdef File properties (Hidden, Constant) -% validation_config = struct( ... -% 'store_config', struct( ... -% 'protocol', struct( ... -% 'required', true, ... -% 'type_check', @(x) ischar(x) ... -% ), ... -% 'location', struct( ... -% 'required', true, ... -% 'type_check', @(x) ischar(x) ... -% ) ... -% ), ... -% 'type_config', struct( ... -% 'blob', struct( ... -% 'subfolding', struct( ... -% 'required', false, ... -% 'type_check', @(x) all(floor(x) == x), ... -% 'default', [2, 2] ... -% ), ... -% 'cache', struct( ... -% 'required', false, ... -% 'type_check', @(x) ischar(x), ... -% 'default', [] ... -% ) ... -% ) ... -% ) ... -% ) % mode = -1(reject), 0(optional), 1(require) validation_config = struct( ... 'datajoint_type', struct( ... 'mode', @(datajoint_type) 1, ... - 'type_check', @(self) ischar(self) && any(strcmpi(self, {'blob', 'filepath'}))... + 'type_check', @(self) ischar(self) && any(strcmpi(... + self, {'blob', 'filepath'}))... ), ... 'protocol', struct( ... 'mode', @(datajoint_type) 1, ... @@ -70,13 +45,12 @@ type_config end methods (Static) - function result = exists(external_filepath) - result = isfile(external_filepath); - end function remove_object(external_filepath) + % delete an object from the store delete(external_filepath); end function upload_buffer(buffer, external_filepath) + % put blob [~,start_idx,~] = regexp(external_filepath, '/', 'match', 'start', 'end'); mkdir(external_filepath(1:(start_idx(end)-1))); fileID = fopen(external_filepath, 'w'); @@ -84,6 +58,7 @@ function upload_buffer(buffer, external_filepath) fclose(fileID); end function result = download_buffer(external_filepath) + % get blob fileID = fopen(external_filepath, 'r'); result = fread(fileID); fclose(fileID); @@ -91,13 +66,13 @@ function upload_buffer(buffer, external_filepath) end methods function self = File(config) -% self.protocol = config.store_config.protocol; -% self.location = config.store_config.location; + % initialize store self.protocol = config.protocol; self.location = strrep(config.location, '\', '/'); self.type_config = struct(); - if dj.internal.ExternalTable.BACKWARD_SUPPORT && ~any(strcmp('datajoint_type', fieldnames(config))) + if dj.internal.ExternalTable.BACKWARD_SUPPORT_DJPY012 && ~any(strcmp(... + 'datajoint_type', fieldnames(config))) self.type_config.subfolding = config.subfolding; else self.datajoint_type = config.datajoint_type; @@ -107,6 +82,7 @@ function upload_buffer(buffer, external_filepath) end end function external_filepath = make_external_filepath(self, relative_filepath) + % resolve the complete external path based on the relative path external_filepath = [self.location '/' relative_filepath]; end end diff --git a/+dj/Connection.m b/+dj/Connection.m index 05139d2a..27af7324 100644 --- a/+dj/Connection.m +++ b/+dj/Connection.m @@ -11,7 +11,7 @@ schemas % registered schema objects % dependency lookups by table name - foreignKeys % maps table names to their referenced table names (primary foreign key) + foreignKeys % maps table names to their referenced table names (primary foreign key) end properties(Access = private) @@ -70,7 +70,8 @@ function loadDependencies(self, schema) '\((?[`\w, ]+)\)'); for tabName = schema.headers.keys - fk = self.query(sprintf('SHOW CREATE TABLE `%s`.`%s`', schema.dbname, tabName{1})); + fk = self.query(sprintf('SHOW CREATE TABLE `%s`.`%s`', schema.dbname, ... + tabName{1})); fk = strtrim(regexp(fk.('Create Table'){1},'\n','split')'); fk = regexp(fk, pat, 'names'); fk = [fk{:}]; @@ -136,7 +137,8 @@ function loadDependencies(self, schema) s = regexp(fullTableName, '^`(?.+)`.`(?[#~\w\d]+)`$','names'); className = fullTableName; if ~isempty(s) && self.packages.isKey(s.dbname) - className = sprintf('%s.%s',self.packages(s.dbname),dj.internal.toCamelCase(s.tablename)); + className = sprintf('%s.%s',self.packages(s.dbname),dj.internal.toCamelCase(... + s.tablename)); elseif strict error('Unknown package for "%s". Activate its schema first.', fullTableName) end diff --git a/+dj/Schema.m b/+dj/Schema.m index b1aca5a5..0bf3854a 100755 --- a/+dj/Schema.m +++ b/+dj/Schema.m @@ -8,14 +8,17 @@ % to exist in Matlab. Tab completion of table names is possible because the % table names are added as dynamic properties of TableAccessor. % -%Complete documentation is available at Datajoint wiki +%Complete documentation is available at +% Datajoint wiki classdef Schema < handle properties(SetAccess = private) - package % the package (directory starting with a +) that stores schema classes, must be on path + package % the package (directory starting with a +) that stores schema classes, + % must be on path dbname % database (schema) name - prefix='' % optional table prefix, allowing multiple schemas per database -- remove this feature if not used + prefix='' % optional table prefix, allowing multiple schemas per database -- remove + % this feature if not used conn % handle to the dj.Connection object loaded = false tableNames % tables indexed by classNames @@ -113,7 +116,8 @@ function makeClass(self, className) else existingTable = []; choice = dj.internal.ask(... - '\nChoose table tier:\n L=lookup\n M=manual\n I=imported\n C=computed\n P=part\n',... + ['\nChoose table tier:\n L=lookup\n M=manual\n I=imported\n ' ... + 'C=computed\n P=part\n'],... {'L','M','I','C','P'}); tierClass = tierClassMap.(choice); isAuto = ismember(tierClass, {'dj.Imported', 'dj.Computed'}); @@ -189,8 +193,9 @@ function reload(self, force) tableInfo = dj.struct.rename(tableInfo,'Name','name','Comment','comment'); % determine table tier (see dj.internal.Table) + % regular expressions to determine table tier re = cellfun(@(x) sprintf('^%s%s[a-z][a-z0-9_]*$',self.prefix,x), ... - dj.Schema.tierPrefixes, 'UniformOutput', false); % regular expressions to determine table tier + dj.Schema.tierPrefixes, 'UniformOutput', false); if strcmpi(dj.config('loglevel'), 'DEBUG') fprintf('%.3g s\nloading field information... ', toc), tic @@ -199,7 +204,8 @@ function reload(self, force) tierIdx = ~cellfun(@isempty, regexp(info.name, re, 'once')); assert(sum(tierIdx)==1) info.tier = dj.Schema.allowedTiers{tierIdx}; - self.tableNames(sprintf('%s.%s',self.package,dj.internal.toCamelCase(info.name(length(self.prefix)+1:end)))) = info.name; + self.tableNames(sprintf('%s.%s',self.package,dj.internal.toCamelCase(... + info.name(length(self.prefix)+1:end)))) = info.name; self.headers(info.name) = dj.internal.Header.initFromDatabase(self,info); end From 0d08250cf693587bbc6cb9114ba0b2a9e47875d0 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Thu, 5 Mar 2020 13:31:23 -0600 Subject: [PATCH 16/21] Update travis nginx use. --- LNX-docker-compose.yml | 28 ++++++++++++++++++++-------- local-docker-compose.yml | 38 +++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index d0e0dc03..d803e070 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -4,21 +4,38 @@ x-net: &net networks: - main services: + db: + <<: *net + image: datajoint/mysql:${MYSQL_TAG} + environment: + - MYSQL_ROOT_PASSWORD=simple + fakeservices.datajoint.io: + <<: *net + image: raphaelguzman/nginx:v0.0.3 + environment: + - ADD_db_TYPE=DATABASE + - ADD_db_ENDPOINT=db:3306 + ports: + - "443:443" + - "3306:3306" + depends_on: + db: + condition: service_healthy app: <<: *net environment: - DISPLAY - MATLAB_LICENSE - MATLAB_USER - - DJ_HOST=mysql + - DJ_HOST=fakeservices.datajoint.io - DJ_USER=root - DJ_PASS=simple - - DJ_TEST_HOST=mysql + - DJ_TEST_HOST=fakeservices.datajoint.io - DJ_TEST_USER=datajoint - DJ_TEST_PASSWORD=datajoint image: raphaelguzman/matlab:${MATLAB_VERSION}-MIN depends_on: - mysql: + fakeservices.datajoint.io: condition: service_healthy user: ${MATLAB_UID}:${MATLAB_GID} working_dir: /src @@ -34,10 +51,5 @@ services: volumes: - .:/src - /tmp/.X11-unix:/tmp/.X11-unix:rw - mysql: - <<: *net - image: datajoint/mysql:${MYSQL_TAG} - environment: - - MYSQL_ROOT_PASSWORD=simple networks: main: \ No newline at end of file diff --git a/local-docker-compose.yml b/local-docker-compose.yml index 17543866..6aaec39e 100644 --- a/local-docker-compose.yml +++ b/local-docker-compose.yml @@ -4,13 +4,35 @@ x-net: &net networks: - main services: + db: + <<: *net + image: datajoint/mysql:${MYSQL_TAG} + environment: + - MYSQL_ROOT_PASSWORD=simple + ports: + - "3306:3306" + ## To persist MySQL data + # volumes: + # - ./mysql/data:/var/lib/mysql + fakeservices.datajoint.io: + <<: *net + image: raphaelguzman/nginx:v0.0.3 + environment: + - ADD_db_TYPE=DATABASE + - ADD_db_ENDPOINT=db:3306 + ports: + - "443:443" + - "3306:3306" + depends_on: + db: + condition: service_healthy app: <<: *net environment: - - DJ_HOST=db + - DJ_HOST=fakeservices.datajoint.io - DJ_USER=root - DJ_PASS=simple - - DJ_TEST_HOST=db + - DJ_TEST_HOST=fakeservices.datajoint.io - DJ_TEST_USER=datajoint - DJ_TEST_PASSWORD=datajoint - MATLAB_USER @@ -19,7 +41,7 @@ services: - DISPLAY image: raphaelguzman/matlab:${MATLAB_VERSION}-GUI depends_on: - db: + fakeservices.datajoint.io: condition: service_healthy ports: - "8888:8888" @@ -44,15 +66,5 @@ services: - /tmp/.X11-unix:/tmp/.X11-unix:rw ## Additional mounts may go here # - ./notebook:/home/muser/notebooks - db: - <<: *net - image: datajoint/mysql:${MYSQL_TAG} - environment: - - MYSQL_ROOT_PASSWORD=simple - ports: - - "3306:3306" - ## To persist MySQL data - # volumes: - # - ./mysql/data:/var/lib/mysql networks: main: \ No newline at end of file From f6d89ca1d5bea7607120d348b33d35c6c62ebcd6 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Fri, 6 Mar 2020 11:03:32 -0600 Subject: [PATCH 17/21] Add tests and fix bugs. --- +dj/+internal/Declare.m | 10 +- +dj/+internal/ExternalTable.m | 18 ++-- +dj/Relvar.m | 2 +- +tests/Main.m | 1 + +tests/Prep.m | 8 ++ +tests/TestConfig.m | 38 +++---- +tests/TestConnection.m | 8 +- +tests/TestERD.m | 2 +- +tests/TestExternalFile.m | 118 ++++++++++++++++++++++ +tests/TestFetch.m | 4 +- +tests/TestProjection.m | 2 +- +tests/TestTls.m | 10 +- +tests/TestUuid.m | 6 +- +tests/test_schemas/+External/Dimension.m | 7 ++ +tests/test_schemas/+External/Image.m | 15 +++ +tests/test_schemas/+University/All.m | 8 +- +tests/test_schemas/store_config.json | 45 +++++++++ 17 files changed, 250 insertions(+), 52 deletions(-) create mode 100644 +tests/TestExternalFile.m create mode 100644 +tests/test_schemas/+External/Dimension.m create mode 100644 +tests/test_schemas/+External/Image.m create mode 100644 +tests/test_schemas/store_config.json diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index 98ce7e62..f0ebbc11 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -180,9 +180,11 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name % execute declaration - fprintf \n\n - fprintf(sql) - fprintf \n\n\n + if strcmpi(dj.config('loglevel'), 'DEBUG') + fprintf \n\n + fprintf(sql) + fprintf \n\n\n + end end function fieldInfo = parseAttrDef(line) @@ -312,7 +314,7 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name if strcmpi(category, 'UUID') field.type = dj.internal.Declare.UUID_DATA_TYPE; elseif any(strcmpi(category, dj.internal.Declare.EXTERNAL_TYPES)) - field.store = field.type((strfind(field.type,'@')+1):end); + field.store = strtrim(field.type((strfind(field.type,'@')+1):end)); field.type = dj.internal.Declare.UUID_DATA_TYPE; foreignKeySql = [foreignKeySql, sprintf( ... ['FOREIGN KEY (`%s`) REFERENCES `{database}`.`%s_%s` (`hash`) ON ' ... diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m index dcfffde5..8e537071 100644 --- a/+dj/+internal/ExternalTable.m +++ b/+dj/+internal/ExternalTable.m @@ -169,16 +169,18 @@ function create(self) function unused = unused(self) % query expression for unused hashes ref = self.references; - unused = self - cellfun(@(column, table) sprintf(... + query = strjoin(cellfun(@(column, table) sprintf(... 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... - ref.column_name, ref.referencing_table, 'UniformOutput', false); + ref.column_name, ref.referencing_table, 'UniformOutput', false), ' OR '); + if ~isempty(query) + unused = self - query; + else + unused = self; + end end function used = used(self) % query expression for used hashes - ref = self.references; - used = self & cellfun(@(column, table) sprintf(... - 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... - ref.column_name, ref.referencing_table, 'UniformOutput', false); + used = self - self.unused.proj(); end function delete(self, delete_external_files, limit) % DELETE(self, delete_external_files, limit) @@ -205,8 +207,8 @@ function delete(self, delete_external_files, limit) end function folded_array = subfold(name, folds) % subfolding for external storage: e.g. subfold('aBCdefg', [2, 3]) --> {'ab','cde'} - folded_array = arrayfun(@(len,idx,s) name(s-len+1:s), folds, 1:length(folds), ... - cumsum(folds), 'UniformOutput', false); + folded_array = arrayfun(@(len,idx,s) name(s-len+1:s), folds', 1:length(folds), ... + cumsum(folds'), 'UniformOutput', false); end function config = buildConfig(config, validation_config, store_name) % builds out store config with defaults set diff --git a/+dj/Relvar.m b/+dj/Relvar.m index 050645f1..9e531753 100755 --- a/+dj/Relvar.m +++ b/+dj/Relvar.m @@ -117,7 +117,7 @@ function cleanup(self) try for rel = fliplr(rels) fprintf('Deleting from %s\n', rel.className) - rel.delQuick + rel.delQuick; end self.schema.conn.commitTransaction disp committed diff --git a/+tests/Main.m b/+tests/Main.m index a0e31b35..5d60821a 100644 --- a/+tests/Main.m +++ b/+tests/Main.m @@ -2,6 +2,7 @@ tests.TestConfig & ... tests.TestConnection & ... tests.TestERD & ... + tests.TestExternalFile & ... tests.TestFetch & ... tests.TestProjection & ... tests.TestTls & ... diff --git a/+tests/Prep.m b/+tests/Prep.m index c2c3b8f5..c5cfe724 100644 --- a/+tests/Prep.m +++ b/+tests/Prep.m @@ -13,6 +13,7 @@ end properties test_root; + external_file_store_root; end methods function obj = Prep() @@ -20,6 +21,11 @@ test_pkg_details = what('tests'); [test_root, ~, ~] = fileparts(test_pkg_details.path); obj.test_root = [test_root '/+tests']; + if ispc + obj.external_file_store_root = '%TEMP%\root'; + else + obj.external_file_store_root = '/tmp/root'; + end end end methods (TestClassSetup) @@ -96,6 +102,7 @@ function dispose(testCase) curr_conn = dj.conn(testCase.CONN_INFO_ROOT.host, ... testCase.CONN_INFO_ROOT.user, testCase.CONN_INFO_ROOT.password, '',true); + % remove databases curr_conn.query('SET FOREIGN_KEY_CHECKS=0;'); res = curr_conn.query(['SHOW DATABASES LIKE "' testCase.PREFIX '_%";']); for i = 1:length(res.(['Database (' testCase.PREFIX '_%)'])) @@ -104,6 +111,7 @@ function dispose(testCase) end curr_conn.query('SET FOREIGN_KEY_CHECKS=1;'); + % remove users cmd = {... 'DROP USER ''datajoint''@''%%'';' 'DROP USER ''djview''@''%%'';' diff --git a/+tests/TestConfig.m b/+tests/TestConfig.m index 1c4cc29e..7cab67bc 100644 --- a/+tests/TestConfig.m +++ b/+tests/TestConfig.m @@ -1,7 +1,7 @@ classdef TestConfig < tests.Prep % TestConfig tests scenarios related to initializing DJ config. methods (Static) - function obj = configRemoveEnvVars(obj, type) + function obj = TestConfig_configRemoveEnvVars(obj, type) switch type case 'file' if isfield(obj, 'database_host') @@ -31,7 +31,7 @@ end end end - function configSingleFileTest(test_instance, type, fname, base) + function TestConfig_configSingleFileTest(test_instance, type, fname, base) switch type case 'save-local' dj.config.saveLocal(); @@ -46,7 +46,7 @@ function configSingleFileTest(test_instance, type, fname, base) end % load raw read_data = fileread(fname); - obj1 = tests.TestConfig.configRemoveEnvVars(jsondecode(read_data), 'file'); + obj1 = tests.TestConfig.TestConfig_configRemoveEnvVars(jsondecode(read_data), 'file'); % optional merge from base if strcmpi(type, 'load-custom') tmp = rmfield(base, intersect(fieldnames(base), fieldnames(obj1))); @@ -57,7 +57,7 @@ function configSingleFileTest(test_instance, type, fname, base) % stringify file = jsonencode(obj1); % load config - obj2 = tests.TestConfig.configRemoveEnvVars(dj.config(), 'config'); + obj2 = tests.TestConfig.TestConfig_configRemoveEnvVars(dj.config(), 'config'); curr = jsonencode(obj2); curr = regexprep(curr,'[a-z0-9][A-Z]','${$0(1)}_${lower($0(2))}'); % checks @@ -70,7 +70,7 @@ function configSingleFileTest(test_instance, type, fname, base) end end methods (Test) - function testGetSet(testCase) + function TestConfig_testGetSet(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); function verifyConfig(new, previous_value, subref, subref_value, subref_prev) @@ -134,7 +134,7 @@ function verifyConfig(new, previous_value, subref, subref_value, subref_prev) 'subfolding', [2,2] ... )}}), prev, 'stores{2}.protocol', 'http', 's3'); end - function testConfigChecks(testCase) + function TestConfig_testConfigChecks(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); testCase.verifyError(@() dj.config(9), ... @@ -142,33 +142,33 @@ function testConfigChecks(testCase) d = testCase.verifyError(@() dj.config('none'), ... 'DataJoint:Config:InvalidKey'); end - function testRestore(testCase) + function TestConfig_testRestore(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); dj.config.restore; - obj1 = tests.TestConfig.configRemoveEnvVars(dj.config(), 'config'); - obj2 = tests.TestConfig.configRemoveEnvVars( ... + obj1 = tests.TestConfig.TestConfig_configRemoveEnvVars(dj.config(), 'config'); + obj2 = tests.TestConfig.TestConfig_configRemoveEnvVars( ... orderfields(dj.internal.Settings.DEFAULTS), 'config'); testCase.verifyEqual(jsonencode(obj1), jsonencode(obj2)); end - function testSave(testCase) + function TestConfig_testSave(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); dj.config.restore; % local dj.config('font', 10); - tests.TestConfig.configSingleFileTest(testCase, 'save-local'); + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'save-local'); % global dj.config('font', 12); - tests.TestConfig.configSingleFileTest(testCase, 'save-global'); + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'save-global'); % custom dj.config('font', 16); - tests.TestConfig.configSingleFileTest(testCase, 'save-custom', './config.json'); + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'save-custom', './config.json'); dj.config.restore; end - function testLoad(testCase) + function TestConfig_testLoad(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); pkg = what('tests'); @@ -176,22 +176,22 @@ function testLoad(testCase) default_file = [pkg.path '/test_schemas/default.json']; dj.config.restore; dj.config.save(default_file); - defaults = tests.TestConfig.configRemoveEnvVars( ... + defaults = tests.TestConfig.TestConfig_configRemoveEnvVars( ... jsondecode(fileread(default_file)), 'file'); delete(default_file); % load test config - tests.TestConfig.configSingleFileTest(testCase, 'load-custom', ... + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'load-custom', ... [pkg.path '/test_schemas/config.json'], defaults); % load new config on top of existing - base = tests.TestConfig.configRemoveEnvVars(dj.config, 'config'); + base = tests.TestConfig.TestConfig_configRemoveEnvVars(dj.config, 'config'); base = jsonencode(base); base = regexprep(base,'[a-z0-9][A-Z]','${$0(1)}_${lower($0(2))}'); - tests.TestConfig.configSingleFileTest(testCase, 'load-custom', ... + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'load-custom', ... [pkg.path '/test_schemas/config_lite.json'], jsondecode(base)); % cleanup dj.config.restore; end - function testEnv(testCase) + function TestConfig_testEnv(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); function validateEnvVarConfig(type, values) diff --git a/+tests/TestConnection.m b/+tests/TestConnection.m index 61914dc7..92b113a3 100644 --- a/+tests/TestConnection.m +++ b/+tests/TestConnection.m @@ -1,7 +1,7 @@ classdef TestConnection < tests.Prep % TestConnection tests typical connection scenarios. methods (Test) - function testConnection(testCase) + function TestConnection_testConnection(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); testCase.verifyTrue(dj.conn(... @@ -9,7 +9,7 @@ function testConnection(testCase) testCase.CONN_INFO.user,... testCase.CONN_INFO.password,'',true).isConnected); end - function testConnectionExists(testCase) + function TestConnection_testConnectionExists(testCase) % testConnectionExists tests that will not fail if connection open % to the same host. % Fix https://github.com/datajoint/datajoint-matlab/issues/160 @@ -18,7 +18,7 @@ function testConnectionExists(testCase) dj.conn(testCase.CONN_INFO.host, '', '', '', '', true); dj.conn(testCase.CONN_INFO.host, '', '', '', '', true); end - function testConnectionDiffHost(testCase) + function TestConnection_testConnectionDiffHost(testCase) % testConnectionDiffHost tests that will fail if connection open % to a different host. % Fix https://github.com/datajoint/datajoint-matlab/issues/160 @@ -30,7 +30,7 @@ function testConnectionDiffHost(testCase) 'anything', '', '', '', '', true), ... 'DataJoint:Connection:AlreadyInstantiated'); end - function testPort(testCase) + function TestConnection_testPort(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); testCase.verifyError(@() dj.conn(... diff --git a/+tests/TestERD.m b/+tests/TestERD.m index 4f44ca51..c400adf2 100644 --- a/+tests/TestERD.m +++ b/+tests/TestERD.m @@ -1,7 +1,7 @@ classdef TestERD < tests.Prep % TestERD tests unusual ERD scenarios. methods (Test) - function testDraw(testCase) + function TestERD_testDraw(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; diff --git a/+tests/TestExternalFile.m b/+tests/TestExternalFile.m new file mode 100644 index 00000000..52838fe5 --- /dev/null +++ b/+tests/TestExternalFile.m @@ -0,0 +1,118 @@ +classdef TestExternalFile < tests.Prep + % TestExternalFile tests scenarios related to external file store. + methods (Static) + function TestExternalFile_checks(test_instance, store, cache) + % load config + pkg = what('tests'); + dj.config.load([pkg.path '/test_schemas/store_config.json']); + dj.config(['stores.' store '.location'], strrep(dj.config(['stores.' store '.location']), '{{external_file_store_root}}', test_instance.external_file_store_root)); + dj.config('stores.main', dj.config(['stores.' store])); + dj.config(cache, [test_instance.external_file_store_root '/cache']); + % create schema + package = 'External'; + dj.createSchema(package,[test_instance.test_root '/test_schemas'], ... + [test_instance.PREFIX '_external']); + schema = External.getSchema; + % test value + rng(5); + test_val1 = floor(rand(1,3)*100); + % insert and fetch + insert(External.Dimension, struct( ... + 'dimension_id', 4, ... + 'dimension', test_val1 ... + )); + q = External.Dimension & 'dimension_id=4'; + res = q.fetch('dimension'); + value_check = res(1).dimension; + test_instance.verifyEqual(value_check, test_val1); + % check subfolding + packed_cell = mym('serialize {M}', test_val1); + uuid = dj.lib.DataHash(packed_cell{1}, 'bin', 'hex', 'MD5'); + uuid_path = schema.external.table('main').make_uuid_path(uuid, ''); + subfold_path = strrep(uuid_path, dj.config('stores.main.location'), ''); + subfold_path = strrep(subfold_path, ['/' schema.dbname '/'], ''); + subfold_path = strrep(subfold_path, ['/' uuid], ''); + test_instance.verifyEqual(cellfun(@(x) length(x), split(subfold_path, '/')), schema.external.table('main').spec.type_config.subfolding); + % delete value to rely on cache + if ispc + [status,cmdout] = system(['rmdir /Q /s "' test_instance.external_file_store_root '\base"']); + else + [status,cmdout] = system(['rm -R ' test_instance.external_file_store_root '/base']); + end + res = q.fetch('dimension'); + value_check = res(1).dimension; + test_instance.verifyEqual(value_check, test_val1); + % populate + populate(External.Image); + q = External.Image & 'dimension_id=4'; + res = q.fetch('img'); + value_check = res(1).img; + test_instance.verifyEqual(size(value_check), test_val1); + % check used and unused + test_instance.verifyTrue(schema.external.table('main').used.count==2); + test_instance.verifyTrue(schema.external.table('main').unused.count==0); + % delete from Dimension + del(External.Dimension); + % check children + q = External.Image; + test_instance.verifyTrue(q.count==0); + % check used and unused + test_instance.verifyTrue(schema.external.table('main').used.count==0); + test_instance.verifyTrue(schema.external.table('main').unused.count==2); + % check delete from external + schema.external.table('main').delete(true, ''); + test_instance.verifyEqual(lastwarn, ['File ''' dj.config('stores.main.location') '/' schema.dbname '/' subfold_path '/' uuid ''' not found.']); + % reverse engineer + q = External.Dimension; + raw_def = dj.internal.Declare.getDefinition(q); + assembled_def = describe(q); + [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); + [assembled_sql, ~] = dj.internal.Declare.declare(q, assembled_def); + test_instance.verifyEqual(assembled_sql, raw_sql); + % drop table + drop(External.Dimension); + % check used and unused + test_instance.verifyTrue(schema.external.table('main').used.count==0); + test_instance.verifyTrue(schema.external.table('main').unused.count==0); + % remove external storage content + if ispc + [status,cmdout] = system(['rmdir /Q /s "' test_instance.external_file_store_root '"']); + else + [status,cmdout] = system(['rm -R ' test_instance.external_file_store_root]); + end + % drop database + schema.conn.query(['DROP DATABASE `' test_instance.PREFIX '_external`']); + dj.config.restore; + end + end + methods (Test) + function TestExternalFile_testLocal(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'new_local', 'blobCache'); + end + function TestExternalFile_testLocalDefault(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'new_local_default', 'blobCache'); + end + function TestExternalFile_testBackward(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'local', 'cache'); + end + function TestExternalFile_testBackwardDefault(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'local_default', 'cache'); + end + function TestExternalFile_testMD5Hash(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + v = int64([1;2]); + packed_cell = mym('serialize {M}', v); + uuid = dj.lib.DataHash(packed_cell{1}, 'bin', 'hex', 'MD5'); + testCase.verifyEqual(uuid, '1d751e2e1e74faf84ab485fde8ef72be'); + end + end +end \ No newline at end of file diff --git a/+tests/TestFetch.m b/+tests/TestFetch.m index 64ecdbbd..4959e98c 100644 --- a/+tests/TestFetch.m +++ b/+tests/TestFetch.m @@ -1,7 +1,7 @@ classdef TestFetch < tests.Prep % TestFetch tests typical insert/fetch scenarios. methods (Test) - function testVariousDatatypes(testCase) + function TestFetch_testVariousDatatypes(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; @@ -31,7 +31,7 @@ function testVariousDatatypes(testCase) testCase.verifyEqual(res(1).number, 3.213); testCase.verifyEqual(res(1).blob, [1, 2; 3, 4]); end - function testDescribe(testCase) + function TestFetch_testDescribe(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); q = University.All; diff --git a/+tests/TestProjection.m b/+tests/TestProjection.m index 48b809ad..0b0b2dac 100644 --- a/+tests/TestProjection.m +++ b/+tests/TestProjection.m @@ -1,7 +1,7 @@ classdef TestProjection < tests.Prep % TestProjection tests use of q.proj(...). methods (Test) - function testDateConversion(testCase) + function TestProjection_testDateConversion(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; diff --git a/+tests/TestTls.m b/+tests/TestTls.m index 6433f83e..a340afb6 100644 --- a/+tests/TestTls.m +++ b/+tests/TestTls.m @@ -1,7 +1,7 @@ classdef TestTls < tests.Prep % TestTls tests TLS connection scenarios. methods (Test) - function testSecureConn(testCase) + function TestTls_testSecureConn(testCase) % secure connection test st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -12,7 +12,7 @@ function testSecureConn(testCase) '',true,true).query(... 'SHOW STATUS LIKE ''Ssl_cipher''').Value{1}) > 0); end - function testInsecureConn(testCase) + function TestTls_testInsecureConn(testCase) % insecure connection test st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -24,7 +24,7 @@ function testInsecureConn(testCase) 'SHOW STATUS LIKE ''Ssl_cipher''').Value{1}, ... ''); end - function testPreferredConn(testCase) + function TestTls_testPreferredConn(testCase) % preferred connection test st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -35,7 +35,7 @@ function testPreferredConn(testCase) '',true).query(... 'SHOW STATUS LIKE ''Ssl_cipher''').Value{1}) > 0); end - function testRejectException(testCase) + function TestTls_testRejectException(testCase) % test exception on require TLS st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -54,7 +54,7 @@ function testRejectException(testCase) ["requires secure connection","Access denied"])); %MySQL8,MySQL5 end end - function testStructException(testCase) + function TestTls_testStructException(testCase) % test exception on TLS struct st = dbstack; disp(['---------------' st(1).name '---------------']); diff --git a/+tests/TestUuid.m b/+tests/TestUuid.m index fa999269..8b589b17 100644 --- a/+tests/TestUuid.m +++ b/+tests/TestUuid.m @@ -1,7 +1,7 @@ classdef TestUuid < tests.Prep % TestUuid tests uuid scenarios. methods (Test) - function testInsertFetch(testCase) + function TestUuid_testInsertFetch(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; @@ -32,7 +32,7 @@ function testInsertFetch(testCase) testCase.verifyEqual(value_check, test_val2); end - function testQuery(testCase) + function TestUuid_testQuery(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; @@ -51,7 +51,7 @@ function testQuery(testCase) testCase.verifyEqual(value_check, test_val1); end - function testReverseEngineering(testCase) + function TestUuid_testReverseEngineering(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); q = University.Message; diff --git a/+tests/test_schemas/+External/Dimension.m b/+tests/test_schemas/+External/Dimension.m new file mode 100644 index 00000000..9c5b62a6 --- /dev/null +++ b/+tests/test_schemas/+External/Dimension.m @@ -0,0 +1,7 @@ +%{ +dimension_id : int +--- +dimension=null : blob@main +%} +classdef Dimension < dj.Manual +end \ No newline at end of file diff --git a/+tests/test_schemas/+External/Image.m b/+tests/test_schemas/+External/Image.m new file mode 100644 index 00000000..6d2da9c8 --- /dev/null +++ b/+tests/test_schemas/+External/Image.m @@ -0,0 +1,15 @@ +%{ +-> External.Dimension +--- +img=null : blob@main +%} +classdef Image < dj.Computed + methods(Access=protected) + function makeTuples(self, key) + dim = num2cell(fetch1(External.Dimension & key, 'dimension')); + rng(5); + key.img = rand(dim{:}); + self.insert(key) + end + end +end \ No newline at end of file diff --git a/+tests/test_schemas/+University/All.m b/+tests/test_schemas/+University/All.m index d96bcdb8..16766659 100644 --- a/+tests/test_schemas/+University/All.m +++ b/+tests/test_schemas/+University/All.m @@ -2,10 +2,10 @@ # All id : int --- -string : varchar(30) -date : datetime -number : float -blob : longblob +string=null : varchar(30) +date=null : datetime +number=null : float +blob=null : longblob %} classdef All < dj.Manual end \ No newline at end of file diff --git a/+tests/test_schemas/store_config.json b/+tests/test_schemas/store_config.json new file mode 100644 index 00000000..0b3cb172 --- /dev/null +++ b/+tests/test_schemas/store_config.json @@ -0,0 +1,45 @@ +{ + "database.host": "env", + "database.password": "var", + "database.user": "override", + "database.port": 3306, + "database.reconnect": true, + "connection.init_function": null, + "connection.charset": "", + "loglevel": "DEBUG", + "safemode": false, + "fetch_format": "array", + "display.limit": 12, + "display.width": 14, + "display.show_tuple_count": true, + "database.use_tls": null, + "enable_python_native_blobs": false, + "stores": { + "local": { + "protocol": "file", + "location": "{{external_file_store_root}}/base", + "subfolding": [ + 3, + 4 + ] + }, + "new_local": { + "datajoint_type": "blob", + "protocol": "file", + "location": "{{external_file_store_root}}/base", + "subfolding": [ + 3, + 4 + ] + }, + "local_default": { + "protocol": "file", + "location": "{{external_file_store_root}}/base" + }, + "new_local_default": { + "datajoint_type": "blob", + "protocol": "file", + "location": "{{external_file_store_root}}/base" + } + } +} \ No newline at end of file From 001f1f41ffc5b22142adacee0cb4e768f20cbfa0 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Fri, 6 Mar 2020 11:13:14 -0600 Subject: [PATCH 18/21] Clean up linting. --- +tests/TestExternalFile.m | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/+tests/TestExternalFile.m b/+tests/TestExternalFile.m index 52838fe5..3ba9d357 100644 --- a/+tests/TestExternalFile.m +++ b/+tests/TestExternalFile.m @@ -5,7 +5,9 @@ function TestExternalFile_checks(test_instance, store, cache) % load config pkg = what('tests'); dj.config.load([pkg.path '/test_schemas/store_config.json']); - dj.config(['stores.' store '.location'], strrep(dj.config(['stores.' store '.location']), '{{external_file_store_root}}', test_instance.external_file_store_root)); + dj.config(['stores.' store '.location'], strrep(dj.config(... + ['stores.' store '.location']), '{{external_file_store_root}}', ... + test_instance.external_file_store_root)); dj.config('stores.main', dj.config(['stores.' store])); dj.config(cache, [test_instance.external_file_store_root '/cache']); % create schema @@ -32,12 +34,15 @@ function TestExternalFile_checks(test_instance, store, cache) subfold_path = strrep(uuid_path, dj.config('stores.main.location'), ''); subfold_path = strrep(subfold_path, ['/' schema.dbname '/'], ''); subfold_path = strrep(subfold_path, ['/' uuid], ''); - test_instance.verifyEqual(cellfun(@(x) length(x), split(subfold_path, '/')), schema.external.table('main').spec.type_config.subfolding); + test_instance.verifyEqual(cellfun(@(x) length(x), split(subfold_path, '/')), ... + schema.external.table('main').spec.type_config.subfolding); % delete value to rely on cache if ispc - [status,cmdout] = system(['rmdir /Q /s "' test_instance.external_file_store_root '\base"']); + [status,cmdout] = system(['rmdir /Q /s "' ... + test_instance.external_file_store_root '\base"']); else - [status,cmdout] = system(['rm -R ' test_instance.external_file_store_root '/base']); + [status,cmdout] = system(['rm -R ' ... + test_instance.external_file_store_root '/base']); end res = q.fetch('dimension'); value_check = res(1).dimension; @@ -61,7 +66,9 @@ function TestExternalFile_checks(test_instance, store, cache) test_instance.verifyTrue(schema.external.table('main').unused.count==2); % check delete from external schema.external.table('main').delete(true, ''); - test_instance.verifyEqual(lastwarn, ['File ''' dj.config('stores.main.location') '/' schema.dbname '/' subfold_path '/' uuid ''' not found.']); + test_instance.verifyEqual(lastwarn, ['File ''' ... + dj.config('stores.main.location') '/' schema.dbname '/' subfold_path '/' ... + uuid ''' not found.']); % reverse engineer q = External.Dimension; raw_def = dj.internal.Declare.getDefinition(q); @@ -76,9 +83,11 @@ function TestExternalFile_checks(test_instance, store, cache) test_instance.verifyTrue(schema.external.table('main').unused.count==0); % remove external storage content if ispc - [status,cmdout] = system(['rmdir /Q /s "' test_instance.external_file_store_root '"']); + [status,cmdout] = system(['rmdir /Q /s "' ... + test_instance.external_file_store_root '"']); else - [status,cmdout] = system(['rm -R ' test_instance.external_file_store_root]); + [status,cmdout] = system(['rm -R ' ... + test_instance.external_file_store_root]); end % drop database schema.conn.query(['DROP DATABASE `' test_instance.PREFIX '_external`']); @@ -94,7 +103,8 @@ function TestExternalFile_testLocal(testCase) function TestExternalFile_testLocalDefault(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); - tests.TestExternalFile.TestExternalFile_checks(testCase, 'new_local_default', 'blobCache'); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'new_local_default', ... + 'blobCache'); end function TestExternalFile_testBackward(testCase) st = dbstack; From b362a115c1e71c5fb63554fc86f8acb503c253ed Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Fri, 6 Mar 2020 11:16:10 -0600 Subject: [PATCH 19/21] Update port mapping bug. --- LNX-docker-compose.yml | 3 --- local-docker-compose.yml | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index d803e070..6d293123 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -15,9 +15,6 @@ services: environment: - ADD_db_TYPE=DATABASE - ADD_db_ENDPOINT=db:3306 - ports: - - "443:443" - - "3306:3306" depends_on: db: condition: service_healthy diff --git a/local-docker-compose.yml b/local-docker-compose.yml index 6aaec39e..e4262456 100644 --- a/local-docker-compose.yml +++ b/local-docker-compose.yml @@ -9,8 +9,8 @@ services: image: datajoint/mysql:${MYSQL_TAG} environment: - MYSQL_ROOT_PASSWORD=simple - ports: - - "3306:3306" + # ports: + # - "3306:3306" ## To persist MySQL data # volumes: # - ./mysql/data:/var/lib/mysql From 70543bf2c3ef3fbc80658c04c0fdf33e3a617378 Mon Sep 17 00:00:00 2001 From: Raphael Guzman Date: Fri, 6 Mar 2020 12:11:18 -0600 Subject: [PATCH 20/21] Update tests to work with WIN64. --- +dj/+internal/Settings.m | 6 +++++- +tests/Prep.m | 2 +- +tests/TestConfig.m | 3 +++ +tests/TestExternalFile.m | 7 ++++--- .gitignore | 3 ++- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/+dj/+internal/Settings.m b/+dj/+internal/Settings.m index 4e98b442..4989ba14 100644 --- a/+dj/+internal/Settings.m +++ b/+dj/+internal/Settings.m @@ -78,7 +78,11 @@ function saveLocal() dj.internal.Settings.save(dj.internal.Settings.LOCALFILE); end function saveGlobal() - dj.internal.Settings.save(dj.internal.Settings.GLOBALFILE); + location = dj.internal.Settings.GLOBALFILE; + if ispc + location = strrep(location, '~', strrep(getenv('USERPROFILE'), '\', '/')); + end + dj.internal.Settings.save(location); end end end diff --git a/+tests/Prep.m b/+tests/Prep.m index c5cfe724..63fe6c68 100644 --- a/+tests/Prep.m +++ b/+tests/Prep.m @@ -22,7 +22,7 @@ [test_root, ~, ~] = fileparts(test_pkg_details.path); obj.test_root = [test_root '/+tests']; if ispc - obj.external_file_store_root = '%TEMP%\root'; + obj.external_file_store_root = [getenv('TEMP') '\root']; else obj.external_file_store_root = '/tmp/root'; end diff --git a/+tests/TestConfig.m b/+tests/TestConfig.m index 7cab67bc..e56b13b3 100644 --- a/+tests/TestConfig.m +++ b/+tests/TestConfig.m @@ -39,6 +39,9 @@ function TestConfig_configSingleFileTest(test_instance, type, fname, base) case 'save-global' dj.config.saveGlobal(); fname = dj.internal.Settings.GLOBALFILE; + if ispc + fname = strrep(fname, '~', strrep(getenv('USERPROFILE'), '\', '/')); + end case 'save-custom' dj.config.save(fname); case 'load-custom' diff --git a/+tests/TestExternalFile.m b/+tests/TestExternalFile.m index 3ba9d357..7608b866 100644 --- a/+tests/TestExternalFile.m +++ b/+tests/TestExternalFile.m @@ -4,12 +4,13 @@ function TestExternalFile_checks(test_instance, store, cache) % load config pkg = what('tests'); - dj.config.load([pkg.path '/test_schemas/store_config.json']); + ext_root = strrep(test_instance.external_file_store_root, '\', '/'); + dj.config.load([strrep(pkg.path, '\', '/') '/test_schemas/store_config.json']); dj.config(['stores.' store '.location'], strrep(dj.config(... ['stores.' store '.location']), '{{external_file_store_root}}', ... - test_instance.external_file_store_root)); + ext_root)); dj.config('stores.main', dj.config(['stores.' store])); - dj.config(cache, [test_instance.external_file_store_root '/cache']); + dj.config(cache, [ext_root '/cache']); % create schema package = 'External'; dj.createSchema(package,[test_instance.test_root '/test_schemas'], ... diff --git a/.gitignore b/.gitignore index 72042dfc..9832f099 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ notebook *getSchema.m docker-compose.yml .vscode -matlab.prf \ No newline at end of file +matlab.prf +win.* \ No newline at end of file From 06d401c766172d5546112ffc164eb8b15e0b79a5 Mon Sep 17 00:00:00 2001 From: Raphael Guzman Date: Fri, 6 Mar 2020 10:29:28 -0800 Subject: [PATCH 21/21] Add mac ignore rule. --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9832f099..c9a8c43f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ notebook docker-compose.yml .vscode matlab.prf -win.* \ No newline at end of file +win.* +macos.* \ No newline at end of file