diff --git a/+dj/+internal/Declare.m b/+dj/+internal/Declare.m index d63cb8ed..f0ebbc11 100644 --- a/+dj/+internal/Declare.m +++ b/+dj/+internal/Declare.m @@ -3,19 +3,23 @@ % table definitions, and to declare the corresponding mysql tables. properties(Constant) + UUID_DATA_TYPE = 'binary(16)' CONSTANT_LITERALS = {'CURRENT_TIMESTAMP'} + EXTERNAL_TABLE_ROOT = '~external' TYPE_PATTERN = struct( ... 'NUMERIC', '^((tiny|small|medium|big)?int|decimal|double|float)', ... 'STRING', '^((var)?char|enum|date|(var)?year|time|timestamp)', ... - 'INTERNAL_BLOB', '^(tiny|medium|long)?blob', ... + 'INTERNAL_BLOB', '^(tiny|medium|long)?blob$', ... + 'EXTERNAL_BLOB', 'blob@(?[a-z]\w*)$', ... 'UUID', 'uuid$' ... ) - UUID_DATA_TYPE = 'binary(16)' - SPECIAL_TYPES = {'UUID'} + SPECIAL_TYPES = {'UUID', 'EXTERNAL_BLOB'} + EXTERNAL_TYPES = {'EXTERNAL_BLOB'} % data referenced by a UUID in external tables + SERIALIZED_TYPES = {'EXTERNAL_BLOB'} % requires packing data end methods(Static) - function sql = declare(table_instance, def) + function [sql, external_stores] = declare(table_instance, def) % sql = DECLARE(query, definition) % Parse table declaration and declares the table. % sql: Generated SQL to create a table. @@ -36,12 +40,13 @@ switch true case {isa(table_instance, 'dj.internal.UserRelation'), isa(table_instance, ... - 'dj.Part'), isa(table_instance, 'dj.Jobs')} + 'dj.Part'), isa(table_instance, 'dj.Jobs'), ... + isa(table_instance, 'dj.internal.ExternalTable')} % New-style declaration using special classes for each tier tableInfo = struct; if isa(table_instance, 'dj.Part') tableInfo.tier = 'part'; - else + elseif ~isa(table_instance, 'dj.internal.ExternalTable') specialClass = find(cellfun(@(c) isa(table_instance, c), ... dj.Schema.tierClasses)); assert(length(specialClass)==1, ... @@ -70,11 +75,14 @@ dj.internal.fromCamelCase(table_instance.className(length( ... table_instance.master.className)+1:end)))); %#ok - else + elseif ~isa(table_instance, 'dj.internal.ExternalTable') tableName = sprintf('%s%s%s', ... table_instance.schema.prefix, dj.Schema.tierPrefixes{ ... strcmp(tableInfo.tier, dj.Schema.allowedTiers)}, ... dj.internal.fromCamelCase(tableInfo.className)); + else + tableName = [dj.internal.Declare.EXTERNAL_TABLE_ROOT '_' ... + table_instance.store]; end otherwise @@ -105,12 +113,13 @@ stableInfo.className)); end - sql = sprintf('CREATE TABLE `%s`.`%s` (\n', table_instance.schema.dbname, ... - tableName); - % fields and foreign keys inKey = true; primaryFields = {}; + foreignKeySql = {}; + indexSql = {}; + attributeSql = {}; + external_stores = {}; fields = {}; for iLine = 1:length(def) line = def{iLine}; @@ -119,11 +128,12 @@ case strncmp(line,'---',3) inKey = false; % foreign key case regexp(line, '^(\s*\([^)]+\)\s*)?->.+$') - [sql, newFields] = dj.internal.Declare.makeFK( ... - sql, line, fields, inKey, ... + [fk_attr_sql, fk_sql, newFields] = dj.internal.Declare.makeFK( ... + line, fields, inKey, ... dj.internal.shorthash(sprintf('`%s`.`%s`', ... table_instance.schema.dbname, tableName))); - sql = sprintf('%s,\n', sql); + attributeSql = [attributeSql, fk_attr_sql]; %#ok + foreignKeySql = [foreignKeySql, fk_sql]; %#ok fields = [fields, newFields]; %#ok if inKey primaryFields = [primaryFields, newFields]; %#ok @@ -131,7 +141,7 @@ case regexp(line, '^(\s*\([^)]+\)\s*)?->.+$') % index case regexpi(line, '^(unique\s+)?index[^:]*$') - sql = sprintf('%s%s,\n', sql, line); % add checks + indexSql = [indexSql, line]; %#ok % attribute case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name @@ -144,26 +154,37 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name primaryFields{end+1} = fieldInfo.name; %#ok end fields{end+1} = fieldInfo.name; %#ok - sql = sprintf('%s%s', sql, ... - dj.internal.Declare.compileAttribute(fieldInfo)); - + [attr_sql, store, foreignKeySql] = ... + dj.internal.Declare.compileAttribute(fieldInfo, foreignKeySql); + attributeSql = [attributeSql, attr_sql]; %#ok + if ~isempty(store) + external_stores{end+1} = store; %#ok + end otherwise error('Invalid table declaration line "%s"', line) end end - % add primary key declaration + % create declaration + create_sql = sprintf('CREATE TABLE `%s`.`%s` (\n', table_instance.schema.dbname,... + tableName); + % add attribute, primary key, foreign key, and index declaration assert(~isempty(primaryFields), 'table must have a primary key') - sql = sprintf('%sPRIMARY KEY (%s),\n' ,sql, backquotedList(primaryFields)); - + table_sql = {attributeSql', {['PRIMARY KEY (`' strjoin(primaryFields, '`,`') ... + '`)']}, foreignKeySql', indexSql'}; + table_sql = sprintf([strjoin(cat(1, table_sql{:}), ',\n') '\n']); % finish the declaration - sql = sprintf('%s\n) ENGINE = InnoDB, COMMENT "%s"', sql(1:end-2), ... - tableInfo.comment); + engine_sql = sprintf(') ENGINE = InnoDB, COMMENT "%s"', tableInfo.comment); + + sql = sprintf('%s%s%s', create_sql, table_sql, engine_sql); + % execute declaration - fprintf \n\n - fprintf(sql) - fprintf \n\n\n + if strcmpi(dj.config('loglevel'), 'DEBUG') + fprintf \n\n + fprintf(sql) + fprintf \n\n\n + end end function fieldInfo = parseAttrDef(line) @@ -178,7 +199,7 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name '^(?[a-z][a-z\d_]*)\s*' % field name ['=\s*(?".*"|''.*''|\w+|[-+]?[0-9]*\.?[0-9]+([eE][-+]?' ... '[0-9]+)?)\s*'] % default value - [':\s*(?\w[\w\s]+(\(.*\))?(\s*[aA][uU][tT][oO]_[iI][nN]' ... + [':\s*(?\w[@\w\s]+(\(.*\))?(\s*[aA][uU][tT][oO]_[iI][nN]' ... '[cC][rR][eE][mM][eE][nN][tT])?)\s*'] % datatype '#(?.*)' % comment '$' % end of line @@ -208,7 +229,7 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name fieldInfo.isnullable = strcmpi(fieldInfo.default,'null'); end - function [sql, newattrs] = makeFK(sql, line, existingFields, inKey, hash) + function [all_attr_sql, fk_sql, newattrs] = makeFK(line, existingFields, inKey, hash) % [sql, newattrs] = MAKEFK(sql, line, existingFields, inKey, hash) % Add foreign key to SQL table definition. % sql: Modified in-place SQL to include foreign keys. @@ -217,6 +238,8 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name % existingFields: Existing field attributes. % inKey: Set as primary key. % hash: Current hash as base. + fk_sql = ''; + all_attr_sql = ''; pat = ['^(?\([\s\w,]*\))?' ... '\s*->\s*' ... '(?\w+\.[A-Z][A-Za-z0-9]*)' ... @@ -269,29 +292,38 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name rel.tableHeader.names)); fieldInfo.name = newattrs{i}; fieldInfo.nullabe = ~inKey; % nonprimary references are nullable - sql = sprintf('%s%s', sql, dj.internal.Declare.compileAttribute(fieldInfo)); + [attr_sql, ~, ~] = dj.internal.Declare.compileAttribute(fieldInfo, []); + all_attr_sql = sprintf('%s%s,\n', all_attr_sql, attr_sql); end + all_attr_sql = all_attr_sql(1:end-2); fkattrs = rel.primaryKey; fkattrs(ismember(fkattrs, attrs))=newattrs; hash = dj.internal.shorthash([{hash rel.fullTableName} newattrs]); - sql = sprintf(... + fk_sql = sprintf(... ['%sCONSTRAINT `%s` FOREIGN KEY (%s) REFERENCES %s (%s) ' ... - 'ON UPDATE CASCADE ON DELETE RESTRICT'], sql, hash, backquotedList(fkattrs), ... - rel.fullTableName, backquotedList(rel.primaryKey)); + 'ON UPDATE CASCADE ON DELETE RESTRICT'], fk_sql, hash, ... + backquotedList(fkattrs), rel.fullTableName, backquotedList(rel.primaryKey)); end - function field = substituteSpecialType(field, category) + function [field, foreignKeySql] = substituteSpecialType(field, category, foreignKeySql) % field = SUBSTITUTESPECIALTYPE(field, category) % Substitute DataJoint type with sql type. % field: Modified in-place field attributes. % category: DataJoint type match based on TYPE_PATTERN. if strcmpi(category, 'UUID') field.type = dj.internal.Declare.UUID_DATA_TYPE; + elseif any(strcmpi(category, dj.internal.Declare.EXTERNAL_TYPES)) + field.store = strtrim(field.type((strfind(field.type,'@')+1):end)); + field.type = dj.internal.Declare.UUID_DATA_TYPE; + foreignKeySql = [foreignKeySql, sprintf( ... + ['FOREIGN KEY (`%s`) REFERENCES `{database}`.`%s_%s` (`hash`) ON ' ... + 'UPDATE RESTRICT ON DELETE RESTRICT'], field.name, ... + dj.internal.Declare.EXTERNAL_TABLE_ROOT, field.store)]; %#ok end end - function sql = compileAttribute(field) + function [sql, store, foreignKeySql] = compileAttribute(field, foreignKeySql) % sql = COMPILEATTRIBUTE(field) % Convert the structure field with header {'name' 'type' 'default' 'comment'} % to the SQL column declaration. @@ -317,11 +349,16 @@ case regexp(line, ['^[a-z][a-z\d_]*\s*' ... % name 'illegal characters in attribute comment "%s"', field.comment) category = dj.internal.Declare.matchType(field.type); + store = []; if any(strcmpi(category, dj.internal.Declare.SPECIAL_TYPES)) field.comment = [':' strip(field.type) ':' field.comment]; - field = dj.internal.Declare.substituteSpecialType(field, category); + [field, foreignKeySql] = dj.internal.Declare.substituteSpecialType(field, ... + category, foreignKeySql); + if isfield(field, 'store') + store = field.store; + end end - sql = sprintf('`%s` %s %s COMMENT "%s",\n', ... + sql = sprintf('`%s` %s %s COMMENT "%s"', ... field.name, strtrim(field.type), default, field.comment); end diff --git a/+dj/+internal/ExternalMapping.m b/+dj/+internal/ExternalMapping.m new file mode 100644 index 00000000..9dd3313d --- /dev/null +++ b/+dj/+internal/ExternalMapping.m @@ -0,0 +1,25 @@ +% dj.internal.ExternalMapping - The external manager contains all the tables for all external +% stores for a given schema. +% :Example: +% e = dj.internal.ExternalMapping(schema) +% external_table = e.table(store) +classdef ExternalMapping < handle + properties + schema + tables + end + methods + function self = ExternalMapping(schema) + self.schema = schema; + self.tables = struct(); + end + function store_table = table(self, store) + keys = fieldnames(self.tables); + if all(~strcmp(store, keys)) + self.tables.(store) = dj.internal.ExternalTable(... + self.schema.conn, store, self.schema); + end + store_table = self.tables.(store); + end + end +end diff --git a/+dj/+internal/ExternalTable.m b/+dj/+internal/ExternalTable.m new file mode 100644 index 00000000..8e537071 --- /dev/null +++ b/+dj/+internal/ExternalTable.m @@ -0,0 +1,300 @@ +% dj.internal.ExternalTable - The table tracking externally stored objects. +% Declare as dj.internal.ExternalTable(connection, store, schema) +classdef ExternalTable < dj.Relvar + properties (Hidden, Constant) + BACKWARD_SUPPORT_DJPY012 = true + end + properties + store + spec + end + properties (Hidden) + connection + end + methods + function self = ExternalTable(connection, store, schema) + % construct table using config validation criteria supplied by store plugin + self.store = store; + self.schema = schema; + self.connection = connection; + stores = dj.config('stores'); + assert(isstruct(stores.(store)), 'Store `%s` not configured as struct.', store); + assert(any(strcmp('protocol', fieldnames(stores.(store)))), ... + 'Store `%s` missing `protocol` key.', store); + if isstring(stores.(store).protocol) + storePlugin = char(stores.(store).protocol); + else + assert(ischar(stores.(store).protocol), ... + ['Store `%s` set `protocol` as `%s` but ' ... + 'expecting `char||string`.'], store, ... + class(stores.(store).protocol)); + storePlugin = stores.(store).protocol; + end + + storePlugin(1) = upper(storePlugin(1)); + try + config = buildConfig(stores.(store), ... + dj.store_plugins.(storePlugin).validation_config, store); + catch ME + if strcmp(ME.identifier,'MATLAB:undefinedVarOrClass') + % Throw error if plugin not found + error('DataJoint:StorePlugin:Missing', ... + 'Missing store plugin `%s`.', storePlugin); + elseif dj.internal.ExternalTable.BACKWARD_SUPPORT_DJPY012 && contains(... + ME.identifier,'DataJoint:StoreConfig') + config = buildConfig(stores.(store), ... + dj.store_plugins.(storePlugin).backward_validation_config, store); + else + rethrow(ME); + end + end + self.spec = dj.store_plugins.(storePlugin)(config); + end + function create(self) + % parses the table declration and declares the table + + if self.isCreated + return + end + self.schema.reload % ensure that the table does not already exist + if self.isCreated + return + end + def = {... + '# external storage tracking' + 'hash : uuid # hash of contents (blob), of filename + contents (attach), or relative filepath (filepath)' + '---' + 'size :bigint unsigned # size of object in bytes' + 'attachment_name=null : varchar(255) # the filename of an attachment' + 'filepath=null : varchar(1000) # relative filepath or attachment filename' + 'contents_hash=null : uuid # used for the filepath datatype' + 'timestamp=CURRENT_TIMESTAMP :timestamp # automatic timestamp' + }; + def = sprintf('%s\n',def{:}); + + [sql, ~] = dj.internal.Declare.declare(self, def); + self.schema.conn.query(sql); + self.schema.reload + end + function uuid_path = make_uuid_path(self, uuid, suffix) + % create external path based on the uuid hash + uuid = strrep(uuid, '-', ''); + uuid_path = self.spec.make_external_filepath([self.schema.dbname '/' strjoin(... + subfold(uuid, self.spec.type_config.subfolding), '/') '/' uuid suffix]); + end + % -- BLOBS -- + function uuid = upload_buffer(self, blob) + % put blob + packed_cell = mym('serialize {M}', blob); + % https://www.mathworks.com/matlabcentral/fileexchange/25921-getmd5 + uuid = dj.lib.DataHash(packed_cell{1}, 'bin', 'hex', 'MD5'); + self.spec.upload_buffer(packed_cell{1}, self.make_uuid_path(uuid, '')); + % insert tracking info + sql = sprintf(['INSERT INTO %s (hash, size) VALUES (X''%s'', %i) ON ' ... + 'DUPLICATE KEY UPDATE timestamp=CURRENT_TIMESTAMP'], self.fullTableName, ... + uuid, length(packed_cell{1})); + self.connection.query(sql); + end + function blob = download_buffer(self, uuid) + % get blob via uuid (with caching support) + try + cache_folder = strrep(dj.config('blobCache'), '\', '/'); + catch ME + if strcmp(ME.identifier,'DataJoint:Config:InvalidKey') + cache_folder = []; + else + rethrow(ME); + end + end + if dj.internal.ExternalTable.BACKWARD_SUPPORT_DJPY012 && isempty(cache_folder) + try + cache_folder = strrep(dj.config('cache'), '\', '/'); + catch ME + if strcmp(ME.identifier,'DataJoint:Config:InvalidKey') + cache_folder = []; + else + rethrow(ME); + end + end + end + blob = []; + if ~isempty(cache_folder) + cache_file = [cache_folder '/' self.schema.dbname '/' strjoin(... + subfold(uuid, self.spec.type_config.subfolding), '/') '/' uuid '']; + try + fileID = fopen(cache_file, 'r'); + result = fread(fileID); + fclose(fileID); + blob = mym('deserialize', uint8(result)); + catch + end + end + if isempty(blob) + blob_binary = uint8(self.spec.download_buffer(self.make_uuid_path(uuid, ''))); + blob = mym('deserialize', blob_binary); + if ~isempty(cache_folder) + [~,start_idx,~] = regexp(cache_file, '/', 'match', 'start', 'end'); + mkdir(cache_file(1:(start_idx(end)-1))); + fileID = fopen(cache_file, 'w'); + fwrite(fileID, blob_binary); + fclose(fileID); + end + end + end + % -- UTILITIES -- + function refs = references(self) + % generator of referencing table names and their referencing columns + sql = {... + 'SELECT concat(''`'', table_schema, ''`.`'', table_name, ''`'') as referencing_table, column_name ' + 'FROM information_schema.key_column_usage ' + 'WHERE referenced_table_name="{S}" and referenced_table_schema="{S}"' + }; + sql = sprintf('%s',sql{:}); + refs = self.connection.query(sql, self.plainTableName, self.schema.dbname); + end + function paths = fetch_external_paths(self, varargin) + % generate complete external filepaths from the query. + % Each element is a cell: {uuid, path} + external_content = fetch(self, 'hash', 'attachment_name', 'filepath', varargin{:}); + paths = cell(length(external_content),1); + for i = 1:length(external_content) + if ~isempty(external_content(i).attachment_name) + elseif ~isempty(external_content(i).filepath) + else + paths{i}{2} = self.make_uuid_path(external_content(i).hash, ''); + end + paths{i}{1} = external_content(i).hash; + end + end + function unused = unused(self) + % query expression for unused hashes + ref = self.references; + query = strjoin(cellfun(@(column, table) sprintf(... + 'hex(`hash`) in (select hex(`%s`) from %s)', column, table), ... + ref.column_name, ref.referencing_table, 'UniformOutput', false), ' OR '); + if ~isempty(query) + unused = self - query; + else + unused = self; + end + end + function used = used(self) + % query expression for used hashes + used = self - self.unused.proj(); + end + function delete(self, delete_external_files, limit) + % DELETE(self, delete_external_files, limit) + % Remove external tracking table records and optionally remove from ext storage + % self: Store Table instance. + % delete_external_files: Remove from external storage. + % limit: Limit the number of external objects to remove + if ~delete_external_files + delQuick(self.unused); + else + if ~isempty(limit) + items = fetch_external_paths(self.unused, sprintf('LIMIT %i', limit)); + else + items = fetch_external_paths(self.unused); + end + for i = 1:length(items) + count = delQuick(self & struct('hash',items{i}{1}), true); + assert(count == 0); + self.spec.remove_object(items{i}{2}); + end + end + end + end +end +function folded_array = subfold(name, folds) + % subfolding for external storage: e.g. subfold('aBCdefg', [2, 3]) --> {'ab','cde'} + folded_array = arrayfun(@(len,idx,s) name(s-len+1:s), folds', 1:length(folds), ... + cumsum(folds'), 'UniformOutput', false); +end +function config = buildConfig(config, validation_config, store_name) + % builds out store config with defaults set + function validateInput(address, target) + % validates supplied config + for k=1:numel(fieldnames(target)) + fn = fieldnames(target); + address{end+1} = '.'; + address{end+1} = fn{k}; + if ~isstruct(target.(fn{k})) + subscript = substruct(address{:}); + try + value = subsref(config, subscript); + vconfig = subsref(validation_config, subscript); + type_check = vconfig.type_check; + if ~type_check(value) + % Throw error for config that fails type validation + error('DataJoint:StoreConfig:WrongType', ... + ['Unexpected type `%s` for config `%s` in store `%s`. ' ... + 'Expecting `%s`.'], class(value), strjoin(address, ''), ... + store_name, char(type_check)); + end + catch ME + if strcmp(ME.identifier,'MATLAB:nonExistentField') + % Throw error for extra config + error('DataJoint:StoreConfig:ExtraConfig', ... + 'Unexpected additional config `%s` specified in store `%s`.', ... + strjoin(address, ''), store_name); + else + rethrow(ME); + end + end + else + validateInput(address, target.(fn{k})); + end + address(end) = []; + address(end) = []; + end + end + function validateConfig(address, target) + % verifies if input contains all expected config + for k=1:numel(fieldnames(target)) + fn = fieldnames(target); + address{end+1} = '.'; + address{end+1} = fn{k}; + if any(strcmp('mode',fieldnames(target))) + address(end) = []; + address(end) = []; + subscript = substruct(address{:}); + vconfig = subsref(validation_config, subscript); + mode = vconfig.mode; + if any(strcmp('datajoint_type', fieldnames(config))) + mode_result = mode(config.datajoint_type); + else + mode_result = mode('not_necessary'); + end + try + value = subsref(config, subscript); + catch ME + if mode_result==1 && strcmp(ME.identifier,'MATLAB:nonExistentField') + % Throw error for required config + error('DataJoint:StoreConfig:MissingRequired', ... + 'Missing required config `%s` in store `%s`.', ... + strjoin(address, ''), store_name); + elseif mode_result==0 && strcmp(ME.identifier,'MATLAB:nonExistentField') + % Set default for optional config + default = vconfig.default; + config = subsasgn(config, subscript, default); + else + rethrow(ME); + end + end + if mode_result==-1 + % Throw error for rejected config + error('DataJoint:StoreConfig:ExtraConfig', ... + 'Incompatible additional config `%s` specified in store `%s`.', ... + strjoin(address, ''), store_name); + end + break; + else + validateConfig(address, target.(fn{k})); + end + address(end) = []; + address(end) = []; + end + end + validateInput({}, config); + validateConfig({}, validation_config); +end \ No newline at end of file diff --git a/+dj/+internal/GeneralRelvar.m b/+dj/+internal/GeneralRelvar.m index 7c4c2caa..a31a2f0f 100644 --- a/+dj/+internal/GeneralRelvar.m +++ b/+dj/+internal/GeneralRelvar.m @@ -210,7 +210,7 @@ function clip(self) ret = self.conn.query(sprintf('SELECT %s FROM %s%s', ... hdr.sql, sql_, limit)); ret = dj.struct.fromFields(ret); - ret = get(self.header.attributes, ret); + ret = get(self.conn, self.header.attributes, ret); if nargout>1 % return primary key structure array @@ -433,10 +433,6 @@ function restrict(self, varargin) function ret = minus(self, arg) % MINUS -- relational antijoin - if iscell(arg) - throwAsCaller(MException('DataJoint:invalidOperator',... - 'Antijoin only accepts single restrictions')) - end ret = self.copy; ret.restrict('not', arg) end @@ -924,7 +920,7 @@ case isa(cond, 'dj.internal.GeneralRelvar') str = strrep(str, '\', '\\'); end -function data = get(attr, data) +function data = get(connection, attr, data) % data = GET(attr, data) % Process in place fetched data. % data: Fetched records. @@ -932,13 +928,24 @@ case isa(cond, 'dj.internal.GeneralRelvar') for i = 1:length(attr) if attr(i).isUuid for j = 1:length(data) - new_value = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); - new_value = [new_value(1:8) '-' ... - new_value(9:12) '-' ... - new_value(13:16) '-' ... - new_value(17:20) '-' ... - new_value(21:end)]; - data(j).(attr(i).name) = new_value; + if ~isempty(data(j).(attr(i).name)) + new_value = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); + new_value = [new_value(1:8) '-' ... + new_value(9:12) '-' ... + new_value(13:16) '-' ... + new_value(17:20) '-' ... + new_value(21:end)]; + data(j).(attr(i).name) = new_value; + end + end + elseif attr(i).isBlob && attr(i).isExternal + for j = 1:length(data) + if ~isempty(data(j).(attr(i).name)) + uuid = reshape(lower(dec2hex(data(j).(attr(i).name))).',1,[]); + data(j).(attr(i).name) = connection.schemas.(... + attr(i).database).external.tables.(... + attr(i).store).download_buffer(uuid); + end end end end diff --git a/+dj/+internal/Header.m b/+dj/+internal/Header.m index 9d62e18b..6faff671 100644 --- a/+dj/+internal/Header.m +++ b/+dj/+internal/Header.m @@ -78,31 +78,39 @@ attrs.isautoincrement = false(length(attrs.isnullable), 1); attrs.isNumeric = false(length(attrs.isnullable), 1); attrs.isString = false(length(attrs.isnullable), 1); - attrs.isBlob = false(length(attrs.isnullable), 1); attrs.isUuid = false(length(attrs.isnullable), 1); + attrs.isBlob = false(length(attrs.isnullable), 1); + attrs.isExternal = false(length(attrs.isnullable), 1); + attrs.database = cell(length(attrs.isnullable),1); + attrs.store = cell(length(attrs.isnullable),1); attrs.alias = cell(length(attrs.isnullable),1); attrs.sqlType = cell(length(attrs.isnullable),1); attrs.sqlComment = cell(length(attrs.isnullable),1); for i = 1:length(attrs.isnullable) + attrs.database{i} = schema.dbname; attrs.sqlType{i} = attrs.type{i}; attrs.sqlComment{i} = attrs.comment{i}; special = regexp(attrs.comment{i}, ':([^:]+):(.*)', 'tokens'); if ~isempty(special) attrs.type{i} = special{1}{1}; attrs.comment{i} = special{1}{2}; + category = dj.internal.Declare.matchType(attrs.type{i}); + assert(any(strcmpi(category, dj.internal.Declare.SPECIAL_TYPES))); + else + category = dj.internal.Declare.matchType(attrs.sqlType{i}); end attrs.isnullable{i} = strcmpi(attrs.isnullable{i}, 'YES'); attrs.iskey{i} = strcmpi(char(attrs.iskey{i}), 'PRI'); attrs.isautoincrement(i) = ~isempty(regexpi(attrs.Extra{i}, ... 'auto_increment', 'once')); - attrs.isNumeric(i) = any(strcmpi( ... - dj.internal.Declare.matchType(attrs.type{i}), {'NUMERIC'})); - attrs.isString(i) = strcmpi(dj.internal.Declare.matchType(attrs.type{i}), ... - 'STRING'); - attrs.isBlob(i) = strcmpi(dj.internal.Declare.matchType(attrs.type{i}), ... - 'INTERNAL_BLOB'); - attrs.isUuid(i) = strcmpi(dj.internal.Declare.matchType(attrs.type{i}), ... - 'UUID'); + attrs.isNumeric(i) = any(strcmpi(category, {'NUMERIC'})); + attrs.isString(i) = strcmpi(category, 'STRING'); + attrs.isUuid(i) = strcmpi(category, 'UUID'); + attrs.isBlob(i) = any(strcmpi(category, {'INTERNAL_BLOB', 'EXTERNAL_BLOB'})); + if any(strcmpi(category, dj.internal.Declare.EXTERNAL_TYPES)) + attrs.isExternal(i) = true; + attrs.store{i} = attrs.type{i}(regexp(attrs.type{i}, '@', 'once')+1:end); + end % strip field lengths off integer types attrs.type{i} = regexprep(sprintf('%s',attrs.type{i}), ... '((tiny|small|medium|big)?int)\(\d+\)','$1'); @@ -170,6 +178,9 @@ function project(self, params) 'isString', false, ... 'isBlob', false, ... 'isUuid', false, ... + 'isExternal', false, ... + 'store', [], ... + 'database', [], ... 'alias', toks{1}{1}, ... 'sqlType', self.computedTypeString, ... 'sqlComment', '' ... diff --git a/+dj/+internal/Settings.m b/+dj/+internal/Settings.m index 4e98b442..4989ba14 100644 --- a/+dj/+internal/Settings.m +++ b/+dj/+internal/Settings.m @@ -78,7 +78,11 @@ function saveLocal() dj.internal.Settings.save(dj.internal.Settings.LOCALFILE); end function saveGlobal() - dj.internal.Settings.save(dj.internal.Settings.GLOBALFILE); + location = dj.internal.Settings.GLOBALFILE; + if ispc + location = strrep(location, '~', strrep(getenv('USERPROFILE'), '\', '/')); + end + dj.internal.Settings.save(location); end end end diff --git a/+dj/+internal/Table.m b/+dj/+internal/Table.m index 1a631ecb..1751bd06 100755 --- a/+dj/+internal/Table.m +++ b/+dj/+internal/Table.m @@ -20,10 +20,10 @@ properties(SetAccess = protected) className % the name of the corresponding base dj.Relvar class + schema % handle to a schema object end properties(SetAccess = private) - schema % handle to a schema object plainTableName % just the table name tableHeader % attribute information end @@ -54,6 +54,10 @@ name = class(self); if any(strcmp(name,{'dj.Table','dj.Relvar'})) name = ''; + elseif isa(self,'dj.internal.ExternalTable') + store = self.store; + store(1) = upper(store(1)); + name = [self.schema.package '.External' store]; end end end @@ -244,7 +248,8 @@ function erd(self, up, down) % get foreign keys fk = self.schema.conn.foreignKeys; if ~isempty(fk) - fk = fk(arrayfun(@(s) strcmp(s.from, self.fullTableName), fk)); + fk = fk(arrayfun(@(s) strcmp(s.from, self.fullTableName) && ... + ~contains(s.ref, '~external'), fk)); end attributes_thus_far = {}; @@ -331,9 +336,9 @@ function addAttribute(self, definition, after) after = [' ' after]; end - sql = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... - definition)); - self.alter(sprintf('ADD COLUMN %s%s', sql(1:end-2), after)); + [sql, ~, ~] = dj.internal.Declare.compileAttribute(... + dj.internal.Declare.parseAttrDef(definition), []); + self.alter(sprintf('ADD COLUMN %s%s', sql, after)); end function dropAttribute(self, attrName) @@ -346,9 +351,9 @@ function alterAttribute(self, attrName, newDefinition) % dj.Table/alterAttribute - Modify the definition of attribute % attrName using its new line from the table definition % "newDefinition" - sql = dj.internal.Declare.compileAttribute(dj.internal.Declare.parseAttrDef( ... - newDefinition)); - self.alter(sprintf('CHANGE COLUMN `%s` %s', attrName, sql(1:end-2))); + [sql, ~, ~] = dj.internal.Declare.compileAttribute(... + dj.internal.Declare.parseAttrDef(newDefinition), []); + self.alter(sprintf('CHANGE COLUMN `%s` %s', attrName, sql)); end function addForeignKey(self, target) @@ -364,9 +369,9 @@ function addForeignKey(self, target) if isa(target, 'dj.Table') target = sprintf('->%s', target.className); end - sql = dj.internal.Declare.makeFK('', target, self.primaryKey, ... + [attr_sql, fk_sql, ~] = dj.internal.Declare.makeFK('', target, self.primaryKey, ... true, dj.internal.shorthash(self.fullTableName)); - self.alter(sprintf('ADD %s', sql)) + self.alter(sprintf('ADD %s%s', attr_sql, fk_sql)) end function dropForeignKey(self, target) @@ -473,7 +478,8 @@ function syncDef(self) fprintf('File %s.m is not found\n', self.className); else if dj.config('safemode') ... - && ~strcmpi('yes', dj.internal.ask(sprintf('Update the table definition and class definition in %s?',path))) + && ~strcmpi('yes', dj.internal.ask(sprintf(... + 'Update the table definition and class definition in %s?',path))) disp 'No? Table definition left untouched.' else % read old file @@ -582,11 +588,6 @@ function drop(self) methods(Access=private) - function yes = isCreated(self) - yes = self.schema.tableNames.isKey(self.className); - end - - function alter(self, alterStatement) % dj.Table/alter % alter(self, alterStatement) @@ -606,6 +607,10 @@ function alter(self, alterStatement) methods + function yes = isCreated(self) + yes = self.schema.tableNames.isKey(self.className); + end + function create(self) % parses the table declration and declares the table @@ -618,7 +623,12 @@ function create(self) end def = dj.internal.Declare.getDefinition(self); - sql = dj.internal.Declare.declare(self, def); + [sql, external_stores] = dj.internal.Declare.declare(self, def); + sql = strrep(sql, '{database}', self.schema.dbname); + for k=1:length(external_stores) + table = self.schema.external.table(external_stores{k}); + table.create; + end self.schema.conn.query(sql); self.schema.reload end diff --git a/+dj/+lib/DataHash-license.txt b/+dj/+lib/DataHash-license.txt deleted file mode 100644 index 8b137891..00000000 --- a/+dj/+lib/DataHash-license.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/+dj/+lib/DataHash.m b/+dj/+lib/DataHash.m index 6809618d..a47847a2 100644 --- a/+dj/+lib/DataHash.m +++ b/+dj/+lib/DataHash.m @@ -1,484 +1,495 @@ -function Hash = DataHash(Data, Opt) -% DATAHASH - Checksum for Matlab array of any type -% This function creates a hash value for an input of any type. The type and -% dimensions of the input are considered as default, such that UINT8([0,0]) and -% UINT16(0) have different hash values. Nested STRUCTs and CELLs are parsed -% recursively. -% -% Hash = DataHash(Data, Opt) -% INPUT: -% Data: Array of these built-in types: -% (U)INT8/16/32/64, SINGLE, DOUBLE, (real/complex, full/sparse) -% CHAR, LOGICAL, CELL (nested), STRUCT (scalar or array, nested), -% function_handle. -% Opt: Struct to specify the hashing algorithm and the output format. -% Opt and all its fields are optional. -% Opt.Method: String, known methods for Java 1.6 (Matlab 2011b): -% 'SHA-1', 'SHA-256', 'SHA-384', 'SHA-512', 'MD2', 'MD5'. -% Call DataHash without inputs to get a list of available methods. -% Default: 'MD5'. -% Opt.Format: String specifying the output format: -% 'hex', 'HEX': Lower/uppercase hexadecimal string. -% 'double', 'uint8': Numerical vector. -% 'base64': Base64 encoded string, only printable ASCII -% characters, shorter than 'hex', no padding. -% Default: 'hex'. -% Opt.Input: Type of the input as string, not case-sensitive: -% 'array': The contents, type and size of the input [Data] are -% considered for the creation of the hash. Nested CELLs -% and STRUCT arrays are parsed recursively. Empty arrays of -% different type reply different hashs. -% 'file': [Data] is treated as file name and the hash is calculated -% for the files contents. -% 'bin': [Data] is a numerical, LOGICAL or CHAR array. Only the -% binary contents of the array is considered, such that -% e.g. empty arrays of different type reply the same hash. -% 'ascii': Same as 'bin', but only the 8-bit ASCII part of the 16-bit -% Matlab CHARs is considered. -% Default: 'array'. -% -% OUTPUT: -% Hash: String, DOUBLE or UINT8 vector. The length depends on the hashing -% method. -% -% EXAMPLES: -% % Default: MD5, hex: -% DataHash([]) % 5b302b7b2099a97ba2a276640a192485 -% % MD5, Base64: -% Opt = struct('Format', 'base64', 'Method', 'MD5'); -% DataHash(int32(1:10), Opt) % +tJN9yeF89h3jOFNN55XLg -% % SHA-1, Base64: -% S.a = uint8([]); -% S.b = {{1:10}, struct('q', uint64(415))}; -% Opt.Method = 'SHA-1'; -% Opt.Format = 'HEX'; -% DataHash(S, Opt) % 18672BE876463B25214CA9241B3C79CC926F3093 -% % SHA-1 of binary values: -% Opt = struct('Method', 'SHA-1', 'Input', 'bin'); -% DataHash(1:8, Opt) % 826cf9d3a5d74bbe415e97d4cecf03f445f69225 -% % SHA-256, consider ASCII part only (Matlab's CHAR has 16 bits!): -% Opt.Method = 'SHA-256'; -% Opt.Input = 'ascii'; -% DataHash('abc', Opt) -% % ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad -% % Or equivalently: -% Opt.Input = 'bin'; -% DataHash(uint8('abc'), Opt) -% -% NOTES: -% Function handles and user-defined objects cannot be converted uniquely: -% - The subfunction ConvertFuncHandle uses the built-in function FUNCTIONS, -% but the replied struct can depend on the Matlab version. -% - It is tried to convert objects to UINT8 streams in the subfunction -% ConvertObject. A conversion by STRUCT() might be more appropriate. -% Adjust these subfunctions on demand. -% -% MATLAB CHARs have 16 bits! Use Opt.Input='ascii' for comparisons with e.g. -% online hash generators. -% -% Matt Raum suggested this for e.g. user-defined objects: -% DataHash(getByteStreamFromArray(Data) -% This works very well, but unfortunately getByteStreamFromArray is -% undocumented, such that it might vanish in the future or reply different -% output. -% -% For arrays the calculated hash value might be changed in new versions. -% Calling this function without inputs replies the version of the hash. -% -% The C-Mex function GetMD5 is 2 to 100 times faster, but obtains MD5 only: -% http://www.mathworks.com/matlabcentral/fileexchange/25921 -% -% Tested: Matlab 7.7, 7.8, 7.13, 8.6, WinXP/32, Win7/64 -% Author: Jan Simon, Heidelberg, (C) 2011-2016 matlab.2010(a)n(MINUS)simon.de -% -% See also: TYPECAST, CAST. -% -% Michael Kleder, "Compute Hash", no structs and cells: -% http://www.mathworks.com/matlabcentral/fileexchange/8944 -% Tim, "Serialize/Deserialize", converts structs and cells to a byte stream: -% http://www.mathworks.com/matlabcentral/fileexchange/29457 - -% $JRev: R-H V:033 Sum:R+m7rAPNLvlw Date:18-Jun-2016 14:33:17 $ -% $License: BSD (use/copy/change/redistribute on own risk, mention the author) $ -% $File: Tools\GLFile\DataHash.m $ -% History: -% 001: 01-May-2011 21:52, First version. -% 007: 10-Jun-2011 10:38, [Opt.Input], binary data, complex values considered. -% 011: 26-May-2012 15:57, Fixed: Failed for binary input and empty data. -% 014: 04-Nov-2012 11:37, Consider Mex-, MDL- and P-files also. -% Thanks to David (author 243360), who found this bug. -% Jan Achterhold (author 267816) suggested to consider Java objects. -% 016: 01-Feb-2015 20:53, Java heap space exhausted for large files. -% Now files are process in chunks to save memory. -% 017: 15-Feb-2015 19:40, Collsions: Same hash for different data. -% Examples: zeros(1,1) and zeros(1,1,0) -% complex(0) and zeros(1,1,0,0) -% Now the number of dimensions is included, to avoid this. -% 022: 30-Mar-2015 00:04, Bugfix: Failed for strings and [] without TYPECASTX. -% Ross found these 2 bugs, which occur when TYPECASTX is not installed. -% If you need the base64 format padded with '=' characters, adjust -% fBase64_enc as you like. -% 026: 29-Jun-2015 00:13, Changed hash for STRUCTs. -% Struct arrays are analysed field by field now, which is much faster. -% 027: 13-Sep-2015 19:03, 'ascii' input as abbrev. for Input='bin' and UINT8(). -% 028: 15-Oct-2015 23:11, Example values in help section updated to v022. -% 029: 16-Oct-2015 22:32, Use default options for empty input. -% 031: 28-Feb-2016 15:10, New hash value to get same reply as GetMD5. -% New Matlab version (at least 2015b) use a fast method for TYPECAST, such -% that calling James Tursa's TYPECASTX is not needed anymore. -% Matlab 6.5 not supported anymore: MException for CATCH. -% 033: 18-Jun-2016 14:28, BUGFIX: Failed on empty files. -% Thanks to Christian (AuthorID 2918599). - -% OPEN BUGS: -% Nath wrote: -% function handle refering to struct containing the function will create -% infinite loop. Is there any workaround ? -% Example: -% d= dynamicprops(); -% addprop(d,'f'); -% d.f= @(varargin) struct2cell(d); -% DataHash(d.f) % infinite loop -% This is caught with an error message concerning the recursion limit now. - -% Main function: =============================================================== -% Default options: ------------------------------------------------------------- -Method = 'MD5'; -OutFormat = 'hex'; -isFile = false; -isBin = false; - -% Check number and type of inputs: --------------------------------------------- -nArg = nargin; -if nArg == 2 - if isa(Opt, 'struct') == 0 % Bad type of 2nd input: - Error_L('BadInput2', '2nd input [Opt] must be a struct.'); - end - - % Specify hash algorithm: - if isfield(Opt, 'Method') && ~isempty(Opt.Method) % Short-circuiting - Method = upper(Opt.Method); +function Hash = DataHash(Data, varargin) + % DATAHASH - Checksum for Matlab array of any type + % This function creates a hash value for an input of any type. The type and + % dimensions of the input are considered as default, such that UINT8([0,0]) and + % UINT16(0) have different hash values. Nested STRUCTs and CELLs are parsed + % recursively. + % + % Hash = DataHash(Data, Opts...) + % INPUT: + % Data: Array of these built-in types: + % (U)INT8/16/32/64, SINGLE, DOUBLE, (real/complex, full/sparse) + % CHAR, LOGICAL, CELL (nested), STRUCT (scalar or array, nested), + % function_handle, string. + % Opts: Char strings to specify the method, the input and theoutput types: + % Input types: + % 'array': The contents, type and size of the input [Data] are + % considered for the creation of the hash. Nested CELLs + % and STRUCT arrays are parsed recursively. Empty arrays of + % different type reply different hashs. + % 'file': [Data] is treated as file name and the hash is calculated + % for the files contents. + % 'bin': [Data] is a numerical, LOGICAL or CHAR array. Only the + % binary contents of the array is considered, such that + % e.g. empty arrays of different type reply the same hash. + % 'ascii': Same as 'bin', but only the 8-bit ASCII part of the 16-bit + % Matlab CHARs is considered. + % Output types: + % 'hex', 'HEX': Lower/uppercase hexadecimal string. + % 'double', 'uint8': Numerical vector. + % 'base64': Base64. + % 'short': Base64 without padding. + % Hashing method: + % 'SHA-1', 'SHA-256', 'SHA-384', 'SHA-512', 'MD2', 'MD5'. + % Call DataHash without inputs to get a list of available methods. + % + % Default: 'MD5', 'hex', 'array' + % + % OUTPUT: + % Hash: String, DOUBLE or UINT8 vector. The length depends on the hashing + % method. + % If DataHash is called without inputs, a struct is replied: + % .HashVersion: Version number of the hashing method of this tool. In + % case of bugs or additions, the output can change. + % .Date: Date of release of the current HashVersion. + % .HashMethod: Cell string of the recognized hash methods. + % + % EXAMPLES: + % % Default: MD5, hex: + % DataHash([]) % 5b302b7b2099a97ba2a276640a192485 + % % MD5, Base64: + % DataHash(int32(1:10), 'short', 'MD5') % +tJN9yeF89h3jOFNN55XLg + % % SHA-1, Base64: + % S.a = uint8([]); + % S.b = {{1:10}, struct('q', uint64(415))}; + % DataHash(S, 'SHA-1', 'HEX') % 18672BE876463B25214CA9241B3C79CC926F3093 + % % SHA-1 of binary values: + % DataHash(1:8, 'SHA-1', 'bin') % 826cf9d3a5d74bbe415e97d4cecf03f445f69225 + % % SHA-256, consider ASCII part only (Matlab's CHAR has 16 bits!): + % DataHash('abc', 'SHA-256', 'ascii') + % % ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad + % % Or equivalently by converting the input to UINT8: + % DataHash(uint8('abc'), 'SHA-256', 'bin') + % + % NOTES: + % Function handles and user-defined objects cannot be converted uniquely: + % - The subfunction ConvertFuncHandle uses the built-in function FUNCTIONS, + % but the replied struct can depend on the Matlab version. + % - It is tried to convert objects to UINT8 streams in the subfunction + % ConvertObject. A conversion by STRUCT() might be more appropriate. + % Adjust these subfunctions on demand. + % + % MATLAB CHARs have 16 bits! Use Opt.Input='ascii' for comparisons with e.g. + % online hash generators. + % + % Matt Raum suggested this for e.g. user-defined objects: + % DataHash(getByteStreamFromArray(Data)) + % This works very well, but unfortunately getByteStreamFromArray is + % undocumented, such that it might vanish in the future or reply different + % output. + % + % For arrays the calculated hash value might be changed in new versions. + % Calling this function without inputs replies the version of the hash. + % + % The older style for input arguments is accepted also: Struct with fields + % 'Input', 'Method', 'OutFormat'. + % + % The C-Mex function GetMD5 is 2 to 100 times faster, but obtains MD5 only: + % http://www.mathworks.com/matlabcentral/fileexchange/25921 + % + % Tested: Matlab 2009a, 2015b(32/64), 2016b, 2018b, Win7/10 + % Author: Jan Simon, Heidelberg, (C) 2011-2019 matlab.2010(a)n(MINUS)simon.de + % + % See also: TYPECAST, CAST. + % + % Michael Kleder, "Compute Hash", no structs and cells: + % http://www.mathworks.com/matlabcentral/fileexchange/8944 + % Tim, "Serialize/Deserialize", converts structs and cells to a byte stream: + % http://www.mathworks.com/matlabcentral/fileexchange/29457 + % $JRev: R-R V:043 Sum:VbfXFn6217Hp Date:18-Apr-2019 12:11:42 $ + % $License: BSD (use/copy/change/redistribute on own risk, mention the author) $ + % $UnitTest: uTest_DataHash $ + % $File: Tools\GLFile\DataHash.m $ + % History: + % 001: 01-May-2011 21:52, First version. + % 007: 10-Jun-2011 10:38, [Opt.Input], binary data, complex values considered. + % 011: 26-May-2012 15:57, Fixed: Failed for binary input and empty data. + % 014: 04-Nov-2012 11:37, Consider Mex-, MDL- and P-files also. + % Thanks to David (author 243360), who found this bug. + % Jan Achterhold (author 267816) suggested to consider Java objects. + % 016: 01-Feb-2015 20:53, Java heap space exhausted for large files. + % Now files are process in chunks to save memory. + % 017: 15-Feb-2015 19:40, Collsions: Same hash for different data. + % Examples: zeros(1,1) and zeros(1,1,0) + % complex(0) and zeros(1,1,0,0) + % Now the number of dimensions is included, to avoid this. + % 022: 30-Mar-2015 00:04, Bugfix: Failed for strings and [] without TYPECASTX. + % Ross found these 2 bugs, which occur when TYPECASTX is not installed. + % If you need the base64 format padded with '=' characters, adjust + % fBase64_enc as you like. + % 026: 29-Jun-2015 00:13, Changed hash for STRUCTs. + % Struct arrays are analysed field by field now, which is much faster. + % 027: 13-Sep-2015 19:03, 'ascii' input as abbrev. for Input='bin' and UINT8(). + % 028: 15-Oct-2015 23:11, Example values in help section updated to v022. + % 029: 16-Oct-2015 22:32, Use default options for empty input. + % 031: 28-Feb-2016 15:10, New hash value to get same reply as GetMD5. + % New Matlab version (at least 2015b) use a fast method for TYPECAST, such + % that calling James Tursa's TYPECASTX is not needed anymore. + % Matlab 6.5 not supported anymore: MException for CATCH. + % 033: 18-Jun-2016 14:28, BUGFIX: Failed on empty files. + % Thanks to Christian (AuthorID 2918599). + % 035: 19-May-2018 01:11, STRING type considered. + % 040: 13-Nov-2018 01:20, Fields of Opt not case-sensitive anymore. + % 041: 09-Feb-2019 18:12, ismethod(class(V),) to support R2018b. + % 042: 02-Mar-2019 18:39, base64: in Java, short: Base64 with padding. + % Unit test. base64->short. + % OPEN BUGS: + % Nath wrote: + % function handle refering to struct containing the function will create + % infinite loop. Is there any workaround ? + % Example: + % d= dynamicprops(); + % addprop(d,'f'); + % d.f= @(varargin) struct2cell(d); + % DataHash(d.f) % infinite loop + % This is caught with an error message concerning the recursion limit now. + %#ok<*CHARTEN> + % Reply current version if called without inputs: ------------------------------ + if nargin == 0 + R = Version_L; + + if nargout == 0 + disp(R); + else + Hash = R; + end + + return; end - - % Specify output format: - if isfield(Opt, 'Format') && ~isempty(Opt.Format) % Short-circuiting - OutFormat = Opt.Format; + % Parse inputs: ---------------------------------------------------------------- + [Method, OutFormat, isFile, isBin, Data] = ParseInput(Data, varargin{:}); + % Create the engine: ----------------------------------------------------------- + try + Engine = java.security.MessageDigest.getInstance(Method); + + catch ME % Handle errors during initializing the engine: + if ~usejava('jvm') + Error_L('needJava', 'DataHash needs Java.'); + end + Error_L('BadInput2', 'Invalid hashing algorithm: [%s]. %s', ... + Method, ME.message); end - - % Check if the Input type is specified - default: 'array': - if isfield(Opt, 'Input') && ~isempty(Opt.Input) % Short-circuiting - if strcmpi(Opt.Input, 'File') - if ischar(Data) == 0 - Error_L('CannotOpen', '1st input FileName must be a string'); - end - isFile = true; - - elseif strncmpi(Opt.Input, 'bin', 3) % Accept 'binary' also - if (isnumeric(Data) || ischar(Data) || islogical(Data)) == 0 || ... - issparse(Data) - Error_L('BadDataType', ... - '1st input must be numeric, CHAR or LOGICAL for binary input.'); + % Create the hash value: ------------------------------------------------------- + if isFile + [FID, Msg] = fopen(Data, 'r'); % Open the file + if FID < 0 + Error_L('BadFile', ['Cannot open file: %s', char(10), '%s'], Data, Msg); + end + + % Read file in chunks to save memory and Java heap space: + Chunk = 1e6; % Fastest for 1e6 on Win7/64, HDD + Count = Chunk; % Dummy value to satisfy WHILE condition + while Count == Chunk + [Data, Count] = fread(FID, Chunk, '*uint8'); + if Count ~= 0 % Avoid error for empty file + Engine.update(Data); end - isBin = true; + end + fclose(FID); - elseif strncmpi(Opt.Input, 'asc', 3) % 8-bit ASCII characters - if ~ischar(Data) - Error_L('BadDataType', ... - '1st input must be a CHAR for the input type ASCII.'); + elseif isBin % Contents of an elementary array, type tested already: + if ~isempty(Data) % Engine.update fails for empty input! + if isnumeric(Data) + if isreal(Data) + Engine.update(typecast(Data(:), 'uint8')); + else + Engine.update(typecast(real(Data(:)), 'uint8')); + Engine.update(typecast(imag(Data(:)), 'uint8')); + end + elseif islogical(Data) % TYPECAST cannot handle LOGICAL + Engine.update(typecast(uint8(Data(:)), 'uint8')); + elseif ischar(Data) % TYPECAST cannot handle CHAR + Engine.update(typecast(uint16(Data(:)), 'uint8')); + % Bugfix: Line removed + elseif myIsString(Data) + if isscalar(Data) + Engine.update(typecast(uint16(Data{1}), 'uint8')); + else + Error_L('BadBinData', 'Bin type requires scalar string.'); + end + else % This should have been caught above! + Error_L('BadBinData', 'Data type not handled: %s', class(Data)); end - isBin = true; - Data = uint8(Data); end + else % Array with type: + Engine = CoreHash(Data, Engine); + end + % Calculate the hash: ---------------------------------------------------------- + Hash = typecast(Engine.digest, 'uint8'); + + % Convert hash specific output format: ----------------------------------------- + switch OutFormat + case 'hex' + Hash = sprintf('%.2x', double(Hash)); + case 'HEX' + Hash = sprintf('%.2X', double(Hash)); + case 'double' + Hash = double(reshape(Hash, 1, [])); + case 'uint8' + Hash = reshape(Hash, 1, []); + case 'short' + Hash = fBase64_enc(double(Hash), 0); + case 'base64' + Hash = fBase64_enc(double(Hash), 1); + + otherwise + Error_L('BadOutFormat', ... + '[Opt.Format] must be: HEX, hex, uint8, double, base64.'); end - -elseif nArg == 0 % Reply version of this function: - R = Version_L; - - if nargout == 0 - disp(R); - else - Hash = R; end - - return; - -elseif nArg ~= 1 % Bad number of arguments: - Error_L('BadNInput', '1 or 2 inputs required.'); -end - -% Create the engine: ----------------------------------------------------------- -try - Engine = java.security.MessageDigest.getInstance(Method); -catch - Error_L('BadInput2', 'Invalid algorithm: [%s].', Method); -end - -% Create the hash value: ------------------------------------------------------- -if isFile - % Open the file: - FID = fopen(Data, 'r'); - if FID < 0 - % Check existence of file: - Found = FileExist_L(Data); - if Found - Error_L('CantOpenFile', 'Cannot open file: %s.', Data); + % ****************************************************************************** + function Engine = CoreHash(Data, Engine) + % Consider the type and dimensions of the array to distinguish arrays with the + % same data, but different shape: [0 x 0] and [0 x 1], [1,2] and [1;2], + % DOUBLE(0) and SINGLE([0,0]): + % < v016: [class, size, data]. BUG! 0 and zeros(1,1,0) had the same hash! + % >= v016: [class, ndims, size, data] + Engine.update([uint8(class(Data)), ... + typecast(uint64([ndims(Data), size(Data)]), 'uint8')]); + + if issparse(Data) % Sparse arrays to struct: + [S.Index1, S.Index2, S.Value] = find(Data); + Engine = CoreHash(S, Engine); + elseif isstruct(Data) % Hash for all array elements and fields: + F = sort(fieldnames(Data)); % Ignore order of fields + for iField = 1:length(F) % Loop over fields + aField = F{iField}; + Engine.update(uint8(aField)); + for iS = 1:numel(Data) % Loop over elements of struct array + Engine = CoreHash(Data(iS).(aField), Engine); + end + end + elseif iscell(Data) % Get hash for all cell elements: + for iS = 1:numel(Data) + Engine = CoreHash(Data{iS}, Engine); + end + elseif isempty(Data) % Nothing to do + elseif isnumeric(Data) + if isreal(Data) + Engine.update(typecast(Data(:), 'uint8')); else - Error_L('FileNotFound', 'File not found: %s.', Data); + Engine.update(typecast(real(Data(:)), 'uint8')); + Engine.update(typecast(imag(Data(:)), 'uint8')); end - end - - % Read file in chunks to save memory and Java heap space: - Chunk = 1e6; % Fastest for 1e6 on Win7/64, HDD - Count = Chunk; % Dummy value to satisfy WHILE condition - while Count == Chunk - [Data, Count] = fread(FID, Chunk, '*uint8'); - if Count ~= 0 % Avoid error for empty file - Engine.update(Data); + elseif islogical(Data) % TYPECAST cannot handle LOGICAL + Engine.update(typecast(uint8(Data(:)), 'uint8')); + elseif ischar(Data) % TYPECAST cannot handle CHAR + Engine.update(typecast(uint16(Data(:)), 'uint8')); + elseif myIsString(Data) % [19-May-2018] String class in >= R2016b + classUint8 = uint8([117, 105, 110, 116, 49, 54]); % 'uint16' + for iS = 1:numel(Data) + % Emulate without recursion: Engine = CoreHash(uint16(Data{iS}), Engine) + aString = uint16(Data{iS}); + Engine.update([classUint8, ... + typecast(uint64([ndims(aString), size(aString)]), 'uint8')]); + if ~isempty(aString) + Engine.update(typecast(uint16(aString), 'uint8')); + end end - end - fclose(FID); - - % Calculate the hash: - Hash = typecast(Engine.digest, 'uint8'); - -elseif isBin % Contents of an elementary array, type tested already: - if isempty(Data) % Nothing to do, Engine.update fails for empty input! - Hash = typecast(Engine.digest, 'uint8'); - else % Matlab's TYPECAST is less elegant: - if isnumeric(Data) - if isreal(Data) - Engine.update(typecast(Data(:), 'uint8')); - else - Engine.update(typecast(real(Data(:)), 'uint8')); - Engine.update(typecast(imag(Data(:)), 'uint8')); + + elseif isa(Data, 'function_handle') + Engine = CoreHash(ConvertFuncHandle(Data), Engine); + elseif (isobject(Data) || isjava(Data)) && ismethod(class(Data), 'hashCode') + Engine = CoreHash(char(Data.hashCode), Engine); + else % Most likely a user-defined object: + try + BasicData = ConvertObject(Data); + catch ME + error(['JSimon:', mfilename, ':BadDataType'], ... + '%s: Cannot create elementary array for type: %s\n %s', ... + mfilename, class(Data), ME.message); + end + + try + Engine = CoreHash(BasicData, Engine); + catch ME + if strcmpi(ME.identifier, 'MATLAB:recursionLimit') + ME = MException(['JSimon:', mfilename, ':RecursiveType'], ... + '%s: Cannot create hash for recursive data type: %s', ... + mfilename, class(Data)); end - elseif islogical(Data) % TYPECAST cannot handle LOGICAL - Engine.update(typecast(uint8(Data(:)), 'uint8')); - elseif ischar(Data) % TYPECAST cannot handle CHAR - Engine.update(typecast(uint16(Data(:)), 'uint8')); - % Bugfix: Line removed + throw(ME); end - Hash = typecast(Engine.digest, 'uint8'); end -else % Array with type: - Engine = CoreHash(Data, Engine); - Hash = typecast(Engine.digest, 'uint8'); -end - -% Convert hash specific output format: ----------------------------------------- -switch OutFormat - case 'hex' - Hash = sprintf('%.2x', double(Hash)); - case 'HEX' - Hash = sprintf('%.2X', double(Hash)); - case 'double' - Hash = double(reshape(Hash, 1, [])); - case 'uint8' - Hash = reshape(Hash, 1, []); - case 'base64' - Hash = fBase64_enc(double(Hash)); - otherwise - Error_L('BadOutFormat', ... - '[Opt.Format] must be: HEX, hex, uint8, double, base64.'); -end - -% return; - -% ****************************************************************************** -function Engine = CoreHash(Data, Engine) -% This methods uses the slower TYPECAST of Matlab - -% Consider the type and dimensions of the array to distinguish arrays with the -% same data, but different shape: [0 x 0] and [0 x 1], [1,2] and [1;2], -% DOUBLE(0) and SINGLE([0,0]): -% < v016: [class, size, data]. BUG! 0 and zeros(1,1,0) had the same hash! -% >= v016: [class, ndims, size, data] -Engine.update([uint8(class(Data)), ... - typecast(uint64([ndims(Data), size(Data)]), 'uint8')]); - -if issparse(Data) % Sparse arrays to struct: - [S.Index1, S.Index2, S.Value] = find(Data); - Engine = CoreHash(S, Engine); -elseif isstruct(Data) % Hash for all array elements and fields: - F = sort(fieldnames(Data)); % Ignore order of fields - for iField = 1:length(F) % Loop over fields - aField = F{iField}; - Engine.update(uint8(aField)); - for iS = 1:numel(Data) % Loop over elements of struct array - Engine = CoreHash(Data(iS).(aField), Engine); + end + % ****************************************************************************** + function [Method, OutFormat, isFile, isBin, Data] = ParseInput(Data, varargin) + % Default options: ------------------------------------------------------------- + Method = 'MD5'; + OutFormat = 'hex'; + isFile = false; + isBin = false; + % Check number and type of inputs: --------------------------------------------- + nOpt = nargin - 1; + Opt = varargin; + if nOpt == 1 && isa(Opt{1}, 'struct') % Old style Options as struct: + Opt = struct2cell(Opt{1}); + nOpt = numel(Opt); + end + % Loop over strings in the input: ---------------------------------------------- + for iOpt = 1:nOpt + aOpt = Opt{iOpt}; + if ~ischar(aOpt) + Error_L('BadInputType', '[Opt] must be a struct or chars.'); + end + + switch lower(aOpt) + case 'file' % Data contains the file name: + isFile = true; + case {'bin', 'binary'} % Just the contents of the data: + if (isnumeric(Data) || ischar(Data) || islogical(Data) || ... + myIsString(Data)) == 0 || issparse(Data) + Error_L('BadDataType', ['[Bin] input needs data type: ', ... + 'numeric, CHAR, LOGICAL, STRING.']); + end + isBin = true; + case 'array' + isBin = false; % Is the default already + case {'asc', 'ascii'} % 8-bit part of MATLAB CHAR or STRING: + isBin = true; + if ischar(Data) + Data = uint8(Data); + elseif myIsString(Data) && numel(Data) == 1 + Data = uint8(char(Data)); + else + Error_L('BadDataType', ... + 'ASCII method: Data must be a CHAR or scalar STRING.'); + end + case 'hex' + if aOpt(1) == 'H' + OutFormat = 'HEX'; + else + OutFormat = 'hex'; + end + case {'double', 'uint8', 'short', 'base64'} + OutFormat = lower(aOpt); + otherwise % Guess that this is the method: + Method = upper(aOpt); end end -elseif iscell(Data) % Get hash for all cell elements: - for iS = 1:numel(Data) - Engine = CoreHash(Data{iS}, Engine); end -elseif isempty(Data) % Nothing to do -elseif isnumeric(Data) - if isreal(Data) - Engine.update(typecast(Data(:), 'uint8')); - else - Engine.update(typecast(real(Data(:)), 'uint8')); - Engine.update(typecast(imag(Data(:)), 'uint8')); + % ****************************************************************************** + function FuncKey = ConvertFuncHandle(FuncH) + % The subfunction ConvertFuncHandle converts function_handles to a struct + % using the Matlab function FUNCTIONS. The output of this function changes + % with the Matlab version, such that DataHash(@sin) replies different hashes + % under Matlab 6.5 and 2009a. + % An alternative is using the function name and name of the file for + % function_handles, but this is not unique for nested or anonymous functions. + % If the MATLABROOT is removed from the file's path, at least the hash of + % Matlab's toolbox functions is (usually!) not influenced by the version. + % Finally I'm in doubt if there is a unique method to hash function handles. + % Please adjust the subfunction ConvertFuncHandles to your needs. + % The Matlab version influences the conversion by FUNCTIONS: + % 1. The format of the struct replied FUNCTIONS is not fixed, + % 2. The full paths of toolbox function e.g. for @mean differ. + FuncKey = functions(FuncH); + % Include modification file time and file size. Suggested by Aslak Grinsted: + if ~isempty(FuncKey.file) + d = dir(FuncKey.file); + if ~isempty(d) + FuncKey.filebytes = d.bytes; + FuncKey.filedate = d.datenum; + end + end + % ALTERNATIVE: Use name and path. The part of the toolbox functions + % is replaced such that the hash for @mean does not depend on the Matlab + % version. + % Drawbacks: Anonymous functions, nested functions... + % funcStruct = functions(FuncH); + % funcfile = strrep(funcStruct.file, matlabroot, ''); + % FuncKey = uint8([funcStruct.function, ' ', funcfile]); + % Finally I'm afraid there is no unique method to get a hash for a function + % handle. Please adjust this conversion to your needs. + end + % ****************************************************************************** + function DataBin = ConvertObject(DataObj) + % Convert a user-defined object to a binary stream. There cannot be a unique + % solution, so this part is left for the user... + try % Perhaps a direct conversion is implemented: + DataBin = uint8(DataObj); + + % Matt Raum had this excellent idea - unfortunately this function is + % undocumented and might not be supported in te future: + % DataBin = getByteStreamFromArray(DataObj); + + catch % Or perhaps this is better: + WarnS = warning('off', 'MATLAB:structOnObject'); + DataBin = struct(DataObj); + warning(WarnS); end -elseif islogical(Data) % TYPECAST cannot handle LOGICAL - Engine.update(typecast(uint8(Data(:)), 'uint8')); -elseif ischar(Data) % TYPECAST cannot handle CHAR - Engine.update(typecast(uint16(Data(:)), 'uint8')); -elseif isa(Data, 'function_handle') - Engine = CoreHash(ConvertFuncHandle(Data), Engine); -elseif (isobject(Data) || isjava(Data)) && ismethod(Data, 'hashCode') - Engine = CoreHash(char(Data.hashCode), Engine); -else % Most likely a user-defined object: - try - BasicData = ConvertObject(Data); - catch ME - error(['JSimon:', mfilename, ':BadDataType'], ... - '%s: Cannot create elementary array for type: %s\n %s', ... - mfilename, class(Data), ME.message); end - + % ****************************************************************************** + function Out = fBase64_enc(In, doPad) + % Encode numeric vector of UINT8 values to base64 string. + B64 = org.apache.commons.codec.binary.Base64; + Out = char(B64.encode(In)).'; + if ~doPad + Out(Out == '=') = []; + end + % Matlab method: + % Pool = [65:90, 97:122, 48:57, 43, 47]; % [0:9, a:z, A:Z, +, /] + % v8 = [128; 64; 32; 16; 8; 4; 2; 1]; + % v6 = [32, 16, 8, 4, 2, 1]; + % + % In = reshape(In, 1, []); + % X = rem(floor(bsxfun(@rdivide, In, v8)), 2); + % d6 = rem(numel(X), 6); + % if d6 ~= 0 + % X = [X(:); zeros(6 - d6, 1)]; + % end + % Out = char(Pool(1 + v6 * reshape(X, 6, []))); + % + % p = 3 - rem(numel(Out) - 1, 4); + % if doPad && p ~= 0 % Standard base64 string with trailing padding: + % Out = [Out, repmat('=', 1, p)]; + % end + end + % ****************************************************************************** + function T = myIsString(S) + % isstring was introduced in R2016: + persistent hasString + if isempty(hasString) + matlabVer = [100, 1] * sscanf(version, '%d.', 2); + hasString = (matlabVer >= 901); % isstring existing since R2016b + end + T = hasString && isstring(S); % Short-circuting + end + % ****************************************************************************** + function R = Version_L() + % The output differs between versions of this function. So give the user a + % chance to recognize the version: + % 1: 01-May-2011, Initial version + % 2: 15-Feb-2015, The number of dimensions is considered in addition. + % In version 1 these variables had the same hash: + % zeros(1,1) and zeros(1,1,0), complex(0) and zeros(1,1,0,0) + % 3: 29-Jun-2015, Struct arrays are processed field by field and not element + % by element, because this is much faster. In consequence the hash value + % differs, if the input contains a struct. + % 4: 28-Feb-2016 15:20, same output as GetMD5 for MD5 sums. Therefore the + % dimensions are casted to UINT64 at first. + % 19-May-2018 01:13, STRING type considered. + R.HashVersion = 4; + R.Date = [2018, 5, 19]; + R.HashMethod = {}; try - Engine = CoreHash(BasicData, Engine); - catch ME - if strcmpi(ME.identifier, 'MATLAB:recursionLimit') - ME = MException(['JSimon:', mfilename, ':RecursiveType'], ... - '%s: Cannot create hash for recursive data type: %s', ... - mfilename, class(Data)); + Provider = java.security.Security.getProviders; + for iProvider = 1:numel(Provider) + S = char(Provider(iProvider).getServices); + Index = strfind(S, 'MessageDigest.'); + for iDigest = 1:length(Index) + Digest = strtok(S(Index(iDigest):end)); + Digest = strrep(Digest, 'MessageDigest.', ''); + R.HashMethod = cat(2, R.HashMethod, {Digest}); + end end - throw(ME); + catch ME + fprintf(2, '%s\n', ME.message); + R.HashMethod = 'error'; end -end - -% return; - -% ****************************************************************************** -function FuncKey = ConvertFuncHandle(FuncH) -% The subfunction ConvertFuncHandle converts function_handles to a struct -% using the Matlab function FUNCTIONS. The output of this function changes -% with the Matlab version, such that DataHash(@sin) replies different hashes -% under Matlab 6.5 and 2009a. -% An alternative is using the function name and name of the file for -% function_handles, but this is not unique for nested or anonymous functions. -% If the MATLABROOT is removed from the file's path, at least the hash of -% Matlab's toolbox functions is (usually!) not influenced by the version. -% Finally I'm in doubt if there is a unique method to hash function handles. -% Please adjust the subfunction ConvertFuncHandles to your needs. - -% The Matlab version influences the conversion by FUNCTIONS: -% 1. The format of the struct replied FUNCTIONS is not fixed, -% 2. The full paths of toolbox function e.g. for @mean differ. -FuncKey = functions(FuncH); - -% Include modification file time and file size. Suggested by Aslak Grinsted: -if ~isempty(FuncKey.file) - d = dir(FuncKey.file); - if ~isempty(d) - FuncKey.filebytes = d.bytes; - FuncKey.filedate = d.datenum; - end -end - -% ALTERNATIVE: Use name and path. The part of the toolbox functions -% is replaced such that the hash for @mean does not depend on the Matlab -% version. -% Drawbacks: Anonymous functions, nested functions... -% funcStruct = functions(FuncH); -% funcfile = strrep(funcStruct.file, matlabroot, ''); -% FuncKey = uint8([funcStruct.function, ' ', funcfile]); - -% Finally I'm afraid there is no unique method to get a hash for a function -% handle. Please adjust this conversion to your needs. - -% return; - -% ****************************************************************************** -function DataBin = ConvertObject(DataObj) -% Convert a user-defined object to a binary stream. There cannot be a unique -% solution, so this part is left for the user... - -try % Perhaps a direct conversion is implemented: - DataBin = uint8(DataObj); - - % Matt Raum had this excellent idea - unfortunately this function is - % undocumented and might not be supported in te future: - % DataBin = getByteStreamFromArray(DataObj); - -catch % Or perhaps this is better: - WarnS = warning('off', 'MATLAB:structOnObject'); - DataBin = struct(DataObj); - warning(WarnS); -end - -% return; - -% ****************************************************************************** -function Out = fBase64_enc(In) -% Encode numeric vector of UINT8 values to base64 string. -% The intention of this is to create a shorter hash than the HEX format. -% Therefore a padding with '=' characters is omitted on purpose. - -Pool = [65:90, 97:122, 48:57, 43, 47]; % [0:9, a:z, A:Z, +, /] -v8 = [128; 64; 32; 16; 8; 4; 2; 1]; -v6 = [32, 16, 8, 4, 2, 1]; - -In = reshape(In, 1, []); -X = rem(floor(In(ones(8, 1), :) ./ v8(:, ones(length(In), 1))), 2); -Y = reshape([X(:); zeros(6 - rem(numel(X), 6), 1)], 6, []); -Out = char(Pool(1 + v6 * Y)); - -% return; - -% ****************************************************************************** -function Ex = FileExist_L(FileName) -% A more reliable version of EXIST(FileName, 'file'): -dirFile = dir(FileName); -if length(dirFile) == 1 - Ex = ~(dirFile.isdir); -else - Ex = false; -end - -% return; - -% ****************************************************************************** -function R = Version_L() -% The output differs between versions of this function. So give the user a -% chance to recognize the version: -% 1: 01-May-2011, Initial version -% 2: 15-Feb-2015, The number of dimensions is considered in addition. -% In version 1 these variables had the same hash: -% zeros(1,1) and zeros(1,1,0), complex(0) and zeros(1,1,0,0) -% 3: 29-Jun-2015, Struct arrays are processed field by field and not element -% by element, because this is much faster. In consequence the hash value -% differs, if the input contains a struct. -% 4: 28-Feb-2016 15:20, same output as GetMD5 for MD5 sums. Therefore the -% dimensions are casted to UINT64 at first. -R.HashVersion = 4; -R.Date = [2016, 2, 28]; - -R.HashMethod = {}; -try - Provider = java.security.Security.getProviders; - for iProvider = 1:numel(Provider) - S = char(Provider(iProvider).getServices); - Index = strfind(S, 'MessageDigest.'); - for iDigest = 1:length(Index) - Digest = strtok(S(Index(iDigest):end)); - Digest = strrep(Digest, 'MessageDigest.', ''); - R.HashMethod = cat(2, R.HashMethod, {Digest}); - end end -catch ME - fprintf(2, '%s\n', ME.message); - R.HashMethod = 'error'; -end - -% return; - -% ****************************************************************************** -function Error_L(ID, varargin) - -error(['JSimon:', mfilename, ':', ID], ['*** %s: ', varargin{1}], ... - mfilename, varargin{2:nargin - 1}); - -% return; + % ****************************************************************************** + function Error_L(ID, varargin) + error(['JSimon:', mfilename, ':', ID], ['*** %s: ', varargin{1}], ... + mfilename, varargin{2:nargin - 1}); + end \ No newline at end of file diff --git a/+dj/+store_plugins/File.m b/+dj/+store_plugins/File.m new file mode 100644 index 00000000..f51c1575 --- /dev/null +++ b/+dj/+store_plugins/File.m @@ -0,0 +1,89 @@ +% dj.internal.File - an external storage class for local file stores. +classdef File + properties (Hidden, Constant) + % mode = -1(reject), 0(optional), 1(require) + validation_config = struct( ... + 'datajoint_type', struct( ... + 'mode', @(datajoint_type) 1, ... + 'type_check', @(self) ischar(self) && any(strcmpi(... + self, {'blob', 'filepath'}))... + ), ... + 'protocol', struct( ... + 'mode', @(datajoint_type) 1, ... + 'type_check', @(self) ischar(self) ... + ), ... + 'location', struct( ... + 'mode', @(datajoint_type) 1, ... + 'type_check', @(self) ischar(self) ... + ), ... + 'subfolding', struct( ... + 'mode', @(datajoint_type) -1 + any(strcmpi(datajoint_type, {'blob'})), ... + 'type_check', @(self) all(floor(self) == self), ... + 'default', [2, 2] ... + ) ... + ) + backward_validation_config = struct( ... + 'protocol', struct( ... + 'mode', @(unused) 1, ... + 'type_check', @(self) ischar(self) ... + ), ... + 'location', struct( ... + 'mode', @(unused) 1, ... + 'type_check', @(self) ischar(self) ... + ), ... + 'subfolding', struct( ... + 'mode', @(unused) 0, ... + 'type_check', @(self) all(floor(self) == self), ... + 'default', [2, 2] ... + ) ... + ) + end + properties + protocol + datajoint_type + location + type_config + end + methods (Static) + function remove_object(external_filepath) + % delete an object from the store + delete(external_filepath); + end + function upload_buffer(buffer, external_filepath) + % put blob + [~,start_idx,~] = regexp(external_filepath, '/', 'match', 'start', 'end'); + mkdir(external_filepath(1:(start_idx(end)-1))); + fileID = fopen(external_filepath, 'w'); + fwrite(fileID, buffer); + fclose(fileID); + end + function result = download_buffer(external_filepath) + % get blob + fileID = fopen(external_filepath, 'r'); + result = fread(fileID); + fclose(fileID); + end + end + methods + function self = File(config) + % initialize store + self.protocol = config.protocol; + self.location = strrep(config.location, '\', '/'); + self.type_config = struct(); + + if dj.internal.ExternalTable.BACKWARD_SUPPORT_DJPY012 && ~any(strcmp(... + 'datajoint_type', fieldnames(config))) + self.type_config.subfolding = config.subfolding; + else + self.datajoint_type = config.datajoint_type; + if strcmpi(self.datajoint_type, 'blob') + self.type_config.subfolding = config.subfolding; + end + end + end + function external_filepath = make_external_filepath(self, relative_filepath) + % resolve the complete external path based on the relative path + external_filepath = [self.location '/' relative_filepath]; + end + end +end diff --git a/+dj/Connection.m b/+dj/Connection.m index 1d393fa8..27af7324 100644 --- a/+dj/Connection.m +++ b/+dj/Connection.m @@ -8,9 +8,10 @@ inTransaction = false connId % connection handle packages % maps database names to package names + schemas % registered schema objects % dependency lookups by table name - foreignKeys % maps table names to their referenced table names (primary foreign key) + foreignKeys % maps table names to their referenced table names (primary foreign key) end properties(Access = private) @@ -48,9 +49,14 @@ end self.foreignKeys = struct([]); self.packages = containers.Map; + self.schemas = struct(); end + function register(self, schema) + self.schemas.(schema.dbname) = schema; + end + function addPackage(self, dbname, package) self.packages(dbname) = package; end @@ -64,7 +70,8 @@ function loadDependencies(self, schema) '\((?[`\w, ]+)\)'); for tabName = schema.headers.keys - fk = self.query(sprintf('SHOW CREATE TABLE `%s`.`%s`', schema.dbname, tabName{1})); + fk = self.query(sprintf('SHOW CREATE TABLE `%s`.`%s`', schema.dbname, ... + tabName{1})); fk = strtrim(regexp(fk.('Create Table'){1},'\n','split')'); fk = regexp(fk, pat, 'names'); fk = [fk{:}]; @@ -130,7 +137,8 @@ function loadDependencies(self, schema) s = regexp(fullTableName, '^`(?.+)`.`(?[#~\w\d]+)`$','names'); className = fullTableName; if ~isempty(s) && self.packages.isKey(s.dbname) - className = sprintf('%s.%s',self.packages(s.dbname),dj.internal.toCamelCase(s.tablename)); + className = sprintf('%s.%s',self.packages(s.dbname),dj.internal.toCamelCase(... + s.tablename)); elseif strict error('Unknown package for "%s". Activate its schema first.', fullTableName) end diff --git a/+dj/Relvar.m b/+dj/Relvar.m index 03769f6f..9e531753 100755 --- a/+dj/Relvar.m +++ b/+dj/Relvar.m @@ -20,11 +20,16 @@ id = ret.lid; end - function delQuick(self) + function count = delQuick(self, getCount) % DELQUICK - remove all tuples of the relation from its table. % Unlike del, delQuick does not prompt for user % confirmation, nor does it attempt to cascade down to the dependent tables. self.schema.conn.query(sprintf('DELETE FROM %s', self.sql)) + count = []; + if nargin > 1 && getCount + count = self.schema.conn.query(sprintf('SELECT count(*) as count FROM %s', ... + self.sql)).count; + end end @@ -104,14 +109,15 @@ function cleanup(self) rels = rels(counts>0); % confirm and delete - if dj.config('safemode') && ~strcmpi('yes',dj.internal.ask('Proceed to delete?')) + if dj.config('safemode') && ~strcmpi('yes', ... + dj.internal.ask('Proceed to delete?')) disp 'delete canceled' else self.schema.conn.startTransaction try for rel = fliplr(rels) fprintf('Deleting from %s\n', rel.className) - rel.delQuick + rel.delQuick; end self.schema.conn.commitTransaction disp committed @@ -228,7 +234,18 @@ function insert(self, tuples, command) decMtx = hex2dec(hexMtx); value = uint8(decMtx); elseif header.attributes(attr_idx).isBlob - placeholder = '"{M}"'; + if ~header.attributes(attr_idx).isExternal + placeholder = '"{M}"'; + else + placeholder = '"{B}"'; + value = self.schema.external.tables.(... + header.attributes(attr_idx).store).upload_buffer(value); + hexstring = value'; + reshapedString = reshape(hexstring,2,16); + hexMtx = reshapedString.'; + decMtx = hex2dec(hexMtx); + value = uint8(decMtx); + end else assert((isnumeric(value) || islogical(value)) && (isscalar( ... value) || isempty(value)),... diff --git a/+dj/Schema.m b/+dj/Schema.m index 759dd81f..0bf3854a 100755 --- a/+dj/Schema.m +++ b/+dj/Schema.m @@ -8,19 +8,23 @@ % to exist in Matlab. Tab completion of table names is possible because the % table names are added as dynamic properties of TableAccessor. % -%Complete documentation is available at Datajoint wiki +%Complete documentation is available at +% Datajoint wiki classdef Schema < handle properties(SetAccess = private) - package % the package (directory starting with a +) that stores schema classes, must be on path + package % the package (directory starting with a +) that stores schema classes, + % must be on path dbname % database (schema) name - prefix='' % optional table prefix, allowing multiple schemas per database -- remove this feature if not used + prefix='' % optional table prefix, allowing multiple schemas per database -- remove + % this feature if not used conn % handle to the dj.Connection object loaded = false tableNames % tables indexed by classNames headers % dj.internal.Header objects indexed by table names v % virtual class generator + external end @@ -63,6 +67,8 @@ self.headers = containers.Map('KeyType','char','ValueType','any'); self.tableNames = containers.Map('KeyType','char','ValueType','char'); self.v = dj.internal.TableAccessor(self); + self.external = dj.internal.ExternalMapping(self); + conn.register(self); end @@ -110,7 +116,8 @@ function makeClass(self, className) else existingTable = []; choice = dj.internal.ask(... - '\nChoose table tier:\n L=lookup\n M=manual\n I=imported\n C=computed\n P=part\n',... + ['\nChoose table tier:\n L=lookup\n M=manual\n I=imported\n ' ... + 'C=computed\n P=part\n'],... {'L','M','I','C','P'}); tierClass = tierClassMap.(choice); isAuto = ismember(tierClass, {'dj.Imported', 'dj.Computed'}); @@ -186,8 +193,9 @@ function reload(self, force) tableInfo = dj.struct.rename(tableInfo,'Name','name','Comment','comment'); % determine table tier (see dj.internal.Table) + % regular expressions to determine table tier re = cellfun(@(x) sprintf('^%s%s[a-z][a-z0-9_]*$',self.prefix,x), ... - dj.Schema.tierPrefixes, 'UniformOutput', false); % regular expressions to determine table tier + dj.Schema.tierPrefixes, 'UniformOutput', false); if strcmpi(dj.config('loglevel'), 'DEBUG') fprintf('%.3g s\nloading field information... ', toc), tic @@ -196,7 +204,8 @@ function reload(self, force) tierIdx = ~cellfun(@isempty, regexp(info.name, re, 'once')); assert(sum(tierIdx)==1) info.tier = dj.Schema.allowedTiers{tierIdx}; - self.tableNames(sprintf('%s.%s',self.package,dj.internal.toCamelCase(info.name(length(self.prefix)+1:end)))) = info.name; + self.tableNames(sprintf('%s.%s',self.package,dj.internal.toCamelCase(... + info.name(length(self.prefix)+1:end)))) = info.name; self.headers(info.name) = dj.internal.Header.initFromDatabase(self,info); end diff --git a/+tests/Main.m b/+tests/Main.m index a0e31b35..5d60821a 100644 --- a/+tests/Main.m +++ b/+tests/Main.m @@ -2,6 +2,7 @@ tests.TestConfig & ... tests.TestConnection & ... tests.TestERD & ... + tests.TestExternalFile & ... tests.TestFetch & ... tests.TestProjection & ... tests.TestTls & ... diff --git a/+tests/Prep.m b/+tests/Prep.m index c2c3b8f5..63fe6c68 100644 --- a/+tests/Prep.m +++ b/+tests/Prep.m @@ -13,6 +13,7 @@ end properties test_root; + external_file_store_root; end methods function obj = Prep() @@ -20,6 +21,11 @@ test_pkg_details = what('tests'); [test_root, ~, ~] = fileparts(test_pkg_details.path); obj.test_root = [test_root '/+tests']; + if ispc + obj.external_file_store_root = [getenv('TEMP') '\root']; + else + obj.external_file_store_root = '/tmp/root'; + end end end methods (TestClassSetup) @@ -96,6 +102,7 @@ function dispose(testCase) curr_conn = dj.conn(testCase.CONN_INFO_ROOT.host, ... testCase.CONN_INFO_ROOT.user, testCase.CONN_INFO_ROOT.password, '',true); + % remove databases curr_conn.query('SET FOREIGN_KEY_CHECKS=0;'); res = curr_conn.query(['SHOW DATABASES LIKE "' testCase.PREFIX '_%";']); for i = 1:length(res.(['Database (' testCase.PREFIX '_%)'])) @@ -104,6 +111,7 @@ function dispose(testCase) end curr_conn.query('SET FOREIGN_KEY_CHECKS=1;'); + % remove users cmd = {... 'DROP USER ''datajoint''@''%%'';' 'DROP USER ''djview''@''%%'';' diff --git a/+tests/TestConfig.m b/+tests/TestConfig.m index 1c4cc29e..e56b13b3 100644 --- a/+tests/TestConfig.m +++ b/+tests/TestConfig.m @@ -1,7 +1,7 @@ classdef TestConfig < tests.Prep % TestConfig tests scenarios related to initializing DJ config. methods (Static) - function obj = configRemoveEnvVars(obj, type) + function obj = TestConfig_configRemoveEnvVars(obj, type) switch type case 'file' if isfield(obj, 'database_host') @@ -31,7 +31,7 @@ end end end - function configSingleFileTest(test_instance, type, fname, base) + function TestConfig_configSingleFileTest(test_instance, type, fname, base) switch type case 'save-local' dj.config.saveLocal(); @@ -39,6 +39,9 @@ function configSingleFileTest(test_instance, type, fname, base) case 'save-global' dj.config.saveGlobal(); fname = dj.internal.Settings.GLOBALFILE; + if ispc + fname = strrep(fname, '~', strrep(getenv('USERPROFILE'), '\', '/')); + end case 'save-custom' dj.config.save(fname); case 'load-custom' @@ -46,7 +49,7 @@ function configSingleFileTest(test_instance, type, fname, base) end % load raw read_data = fileread(fname); - obj1 = tests.TestConfig.configRemoveEnvVars(jsondecode(read_data), 'file'); + obj1 = tests.TestConfig.TestConfig_configRemoveEnvVars(jsondecode(read_data), 'file'); % optional merge from base if strcmpi(type, 'load-custom') tmp = rmfield(base, intersect(fieldnames(base), fieldnames(obj1))); @@ -57,7 +60,7 @@ function configSingleFileTest(test_instance, type, fname, base) % stringify file = jsonencode(obj1); % load config - obj2 = tests.TestConfig.configRemoveEnvVars(dj.config(), 'config'); + obj2 = tests.TestConfig.TestConfig_configRemoveEnvVars(dj.config(), 'config'); curr = jsonencode(obj2); curr = regexprep(curr,'[a-z0-9][A-Z]','${$0(1)}_${lower($0(2))}'); % checks @@ -70,7 +73,7 @@ function configSingleFileTest(test_instance, type, fname, base) end end methods (Test) - function testGetSet(testCase) + function TestConfig_testGetSet(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); function verifyConfig(new, previous_value, subref, subref_value, subref_prev) @@ -134,7 +137,7 @@ function verifyConfig(new, previous_value, subref, subref_value, subref_prev) 'subfolding', [2,2] ... )}}), prev, 'stores{2}.protocol', 'http', 's3'); end - function testConfigChecks(testCase) + function TestConfig_testConfigChecks(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); testCase.verifyError(@() dj.config(9), ... @@ -142,33 +145,33 @@ function testConfigChecks(testCase) d = testCase.verifyError(@() dj.config('none'), ... 'DataJoint:Config:InvalidKey'); end - function testRestore(testCase) + function TestConfig_testRestore(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); dj.config.restore; - obj1 = tests.TestConfig.configRemoveEnvVars(dj.config(), 'config'); - obj2 = tests.TestConfig.configRemoveEnvVars( ... + obj1 = tests.TestConfig.TestConfig_configRemoveEnvVars(dj.config(), 'config'); + obj2 = tests.TestConfig.TestConfig_configRemoveEnvVars( ... orderfields(dj.internal.Settings.DEFAULTS), 'config'); testCase.verifyEqual(jsonencode(obj1), jsonencode(obj2)); end - function testSave(testCase) + function TestConfig_testSave(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); dj.config.restore; % local dj.config('font', 10); - tests.TestConfig.configSingleFileTest(testCase, 'save-local'); + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'save-local'); % global dj.config('font', 12); - tests.TestConfig.configSingleFileTest(testCase, 'save-global'); + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'save-global'); % custom dj.config('font', 16); - tests.TestConfig.configSingleFileTest(testCase, 'save-custom', './config.json'); + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'save-custom', './config.json'); dj.config.restore; end - function testLoad(testCase) + function TestConfig_testLoad(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); pkg = what('tests'); @@ -176,22 +179,22 @@ function testLoad(testCase) default_file = [pkg.path '/test_schemas/default.json']; dj.config.restore; dj.config.save(default_file); - defaults = tests.TestConfig.configRemoveEnvVars( ... + defaults = tests.TestConfig.TestConfig_configRemoveEnvVars( ... jsondecode(fileread(default_file)), 'file'); delete(default_file); % load test config - tests.TestConfig.configSingleFileTest(testCase, 'load-custom', ... + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'load-custom', ... [pkg.path '/test_schemas/config.json'], defaults); % load new config on top of existing - base = tests.TestConfig.configRemoveEnvVars(dj.config, 'config'); + base = tests.TestConfig.TestConfig_configRemoveEnvVars(dj.config, 'config'); base = jsonencode(base); base = regexprep(base,'[a-z0-9][A-Z]','${$0(1)}_${lower($0(2))}'); - tests.TestConfig.configSingleFileTest(testCase, 'load-custom', ... + tests.TestConfig.TestConfig_configSingleFileTest(testCase, 'load-custom', ... [pkg.path '/test_schemas/config_lite.json'], jsondecode(base)); % cleanup dj.config.restore; end - function testEnv(testCase) + function TestConfig_testEnv(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); function validateEnvVarConfig(type, values) diff --git a/+tests/TestConnection.m b/+tests/TestConnection.m index 61914dc7..92b113a3 100644 --- a/+tests/TestConnection.m +++ b/+tests/TestConnection.m @@ -1,7 +1,7 @@ classdef TestConnection < tests.Prep % TestConnection tests typical connection scenarios. methods (Test) - function testConnection(testCase) + function TestConnection_testConnection(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); testCase.verifyTrue(dj.conn(... @@ -9,7 +9,7 @@ function testConnection(testCase) testCase.CONN_INFO.user,... testCase.CONN_INFO.password,'',true).isConnected); end - function testConnectionExists(testCase) + function TestConnection_testConnectionExists(testCase) % testConnectionExists tests that will not fail if connection open % to the same host. % Fix https://github.com/datajoint/datajoint-matlab/issues/160 @@ -18,7 +18,7 @@ function testConnectionExists(testCase) dj.conn(testCase.CONN_INFO.host, '', '', '', '', true); dj.conn(testCase.CONN_INFO.host, '', '', '', '', true); end - function testConnectionDiffHost(testCase) + function TestConnection_testConnectionDiffHost(testCase) % testConnectionDiffHost tests that will fail if connection open % to a different host. % Fix https://github.com/datajoint/datajoint-matlab/issues/160 @@ -30,7 +30,7 @@ function testConnectionDiffHost(testCase) 'anything', '', '', '', '', true), ... 'DataJoint:Connection:AlreadyInstantiated'); end - function testPort(testCase) + function TestConnection_testPort(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); testCase.verifyError(@() dj.conn(... diff --git a/+tests/TestERD.m b/+tests/TestERD.m index 4f44ca51..c400adf2 100644 --- a/+tests/TestERD.m +++ b/+tests/TestERD.m @@ -1,7 +1,7 @@ classdef TestERD < tests.Prep % TestERD tests unusual ERD scenarios. methods (Test) - function testDraw(testCase) + function TestERD_testDraw(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; diff --git a/+tests/TestExternalFile.m b/+tests/TestExternalFile.m new file mode 100644 index 00000000..7608b866 --- /dev/null +++ b/+tests/TestExternalFile.m @@ -0,0 +1,129 @@ +classdef TestExternalFile < tests.Prep + % TestExternalFile tests scenarios related to external file store. + methods (Static) + function TestExternalFile_checks(test_instance, store, cache) + % load config + pkg = what('tests'); + ext_root = strrep(test_instance.external_file_store_root, '\', '/'); + dj.config.load([strrep(pkg.path, '\', '/') '/test_schemas/store_config.json']); + dj.config(['stores.' store '.location'], strrep(dj.config(... + ['stores.' store '.location']), '{{external_file_store_root}}', ... + ext_root)); + dj.config('stores.main', dj.config(['stores.' store])); + dj.config(cache, [ext_root '/cache']); + % create schema + package = 'External'; + dj.createSchema(package,[test_instance.test_root '/test_schemas'], ... + [test_instance.PREFIX '_external']); + schema = External.getSchema; + % test value + rng(5); + test_val1 = floor(rand(1,3)*100); + % insert and fetch + insert(External.Dimension, struct( ... + 'dimension_id', 4, ... + 'dimension', test_val1 ... + )); + q = External.Dimension & 'dimension_id=4'; + res = q.fetch('dimension'); + value_check = res(1).dimension; + test_instance.verifyEqual(value_check, test_val1); + % check subfolding + packed_cell = mym('serialize {M}', test_val1); + uuid = dj.lib.DataHash(packed_cell{1}, 'bin', 'hex', 'MD5'); + uuid_path = schema.external.table('main').make_uuid_path(uuid, ''); + subfold_path = strrep(uuid_path, dj.config('stores.main.location'), ''); + subfold_path = strrep(subfold_path, ['/' schema.dbname '/'], ''); + subfold_path = strrep(subfold_path, ['/' uuid], ''); + test_instance.verifyEqual(cellfun(@(x) length(x), split(subfold_path, '/')), ... + schema.external.table('main').spec.type_config.subfolding); + % delete value to rely on cache + if ispc + [status,cmdout] = system(['rmdir /Q /s "' ... + test_instance.external_file_store_root '\base"']); + else + [status,cmdout] = system(['rm -R ' ... + test_instance.external_file_store_root '/base']); + end + res = q.fetch('dimension'); + value_check = res(1).dimension; + test_instance.verifyEqual(value_check, test_val1); + % populate + populate(External.Image); + q = External.Image & 'dimension_id=4'; + res = q.fetch('img'); + value_check = res(1).img; + test_instance.verifyEqual(size(value_check), test_val1); + % check used and unused + test_instance.verifyTrue(schema.external.table('main').used.count==2); + test_instance.verifyTrue(schema.external.table('main').unused.count==0); + % delete from Dimension + del(External.Dimension); + % check children + q = External.Image; + test_instance.verifyTrue(q.count==0); + % check used and unused + test_instance.verifyTrue(schema.external.table('main').used.count==0); + test_instance.verifyTrue(schema.external.table('main').unused.count==2); + % check delete from external + schema.external.table('main').delete(true, ''); + test_instance.verifyEqual(lastwarn, ['File ''' ... + dj.config('stores.main.location') '/' schema.dbname '/' subfold_path '/' ... + uuid ''' not found.']); + % reverse engineer + q = External.Dimension; + raw_def = dj.internal.Declare.getDefinition(q); + assembled_def = describe(q); + [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); + [assembled_sql, ~] = dj.internal.Declare.declare(q, assembled_def); + test_instance.verifyEqual(assembled_sql, raw_sql); + % drop table + drop(External.Dimension); + % check used and unused + test_instance.verifyTrue(schema.external.table('main').used.count==0); + test_instance.verifyTrue(schema.external.table('main').unused.count==0); + % remove external storage content + if ispc + [status,cmdout] = system(['rmdir /Q /s "' ... + test_instance.external_file_store_root '"']); + else + [status,cmdout] = system(['rm -R ' ... + test_instance.external_file_store_root]); + end + % drop database + schema.conn.query(['DROP DATABASE `' test_instance.PREFIX '_external`']); + dj.config.restore; + end + end + methods (Test) + function TestExternalFile_testLocal(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'new_local', 'blobCache'); + end + function TestExternalFile_testLocalDefault(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'new_local_default', ... + 'blobCache'); + end + function TestExternalFile_testBackward(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'local', 'cache'); + end + function TestExternalFile_testBackwardDefault(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + tests.TestExternalFile.TestExternalFile_checks(testCase, 'local_default', 'cache'); + end + function TestExternalFile_testMD5Hash(testCase) + st = dbstack; + disp(['---------------' st(1).name '---------------']); + v = int64([1;2]); + packed_cell = mym('serialize {M}', v); + uuid = dj.lib.DataHash(packed_cell{1}, 'bin', 'hex', 'MD5'); + testCase.verifyEqual(uuid, '1d751e2e1e74faf84ab485fde8ef72be'); + end + end +end \ No newline at end of file diff --git a/+tests/TestFetch.m b/+tests/TestFetch.m index 39120208..4959e98c 100644 --- a/+tests/TestFetch.m +++ b/+tests/TestFetch.m @@ -1,7 +1,7 @@ classdef TestFetch < tests.Prep % TestFetch tests typical insert/fetch scenarios. methods (Test) - function testVariousDatatypes(testCase) + function TestFetch_testVariousDatatypes(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; @@ -31,13 +31,13 @@ function testVariousDatatypes(testCase) testCase.verifyEqual(res(1).number, 3.213); testCase.verifyEqual(res(1).blob, [1, 2; 3, 4]); end - function testDescribe(testCase) + function TestFetch_testDescribe(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); q = University.All; raw_def = dj.internal.Declare.getDefinition(q); assembled_def = describe(q); - raw_sql = dj.internal.Declare.declare(q, raw_def); + [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); assembled_sql = dj.internal.Declare.declare(q, assembled_def); testCase.verifyEqual(raw_sql, assembled_sql); end diff --git a/+tests/TestProjection.m b/+tests/TestProjection.m index 48b809ad..0b0b2dac 100644 --- a/+tests/TestProjection.m +++ b/+tests/TestProjection.m @@ -1,7 +1,7 @@ classdef TestProjection < tests.Prep % TestProjection tests use of q.proj(...). methods (Test) - function testDateConversion(testCase) + function TestProjection_testDateConversion(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; diff --git a/+tests/TestTls.m b/+tests/TestTls.m index cdda43d0..a340afb6 100644 --- a/+tests/TestTls.m +++ b/+tests/TestTls.m @@ -1,7 +1,7 @@ classdef TestTls < tests.Prep % TestTls tests TLS connection scenarios. methods (Test) - function testSecureConn(testCase) + function TestTls_testSecureConn(testCase) % secure connection test st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -12,7 +12,7 @@ function testSecureConn(testCase) '',true,true).query(... 'SHOW STATUS LIKE ''Ssl_cipher''').Value{1}) > 0); end - function testInsecureConn(testCase) + function TestTls_testInsecureConn(testCase) % insecure connection test st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -24,7 +24,7 @@ function testInsecureConn(testCase) 'SHOW STATUS LIKE ''Ssl_cipher''').Value{1}, ... ''); end - function testPreferredConn(testCase) + function TestTls_testPreferredConn(testCase) % preferred connection test st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -35,7 +35,7 @@ function testPreferredConn(testCase) '',true).query(... 'SHOW STATUS LIKE ''Ssl_cipher''').Value{1}) > 0); end - function testRejectException(testCase) + function TestTls_testRejectException(testCase) % test exception on require TLS st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -54,7 +54,7 @@ function testRejectException(testCase) ["requires secure connection","Access denied"])); %MySQL8,MySQL5 end end - function testStructException(testCase) + function TestTls_testStructException(testCase) % test exception on TLS struct st = dbstack; disp(['---------------' st(1).name '---------------']); @@ -63,7 +63,7 @@ function testStructException(testCase) testCase.CONN_INFO.user, ... testCase.CONN_INFO.password, ... '',true,struct('ca','fake/path/some/where')), ... - 'DataJoint:TLS:InvalidStruct'); + 'mYm:TLS:InvalidStruct'); end end end \ No newline at end of file diff --git a/+tests/TestUuid.m b/+tests/TestUuid.m index 33b2182d..8b589b17 100644 --- a/+tests/TestUuid.m +++ b/+tests/TestUuid.m @@ -1,7 +1,7 @@ classdef TestUuid < tests.Prep % TestUuid tests uuid scenarios. methods (Test) - function testInsertFetch(testCase) + function TestUuid_testInsertFetch(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; @@ -32,7 +32,7 @@ function testInsertFetch(testCase) testCase.verifyEqual(value_check, test_val2); end - function testQuery(testCase) + function TestUuid_testQuery(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); package = 'University'; @@ -51,14 +51,14 @@ function testQuery(testCase) testCase.verifyEqual(value_check, test_val1); end - function testReverseEngineering(testCase) + function TestUuid_testReverseEngineering(testCase) st = dbstack; disp(['---------------' st(1).name '---------------']); q = University.Message; raw_def = dj.internal.Declare.getDefinition(q); assembled_def = describe(q); - raw_sql = dj.internal.Declare.declare(q, raw_def); - assembled_sql = dj.internal.Declare.declare(q, assembled_def); + [raw_sql, ~] = dj.internal.Declare.declare(q, raw_def); + [assembled_sql, ~] = dj.internal.Declare.declare(q, assembled_def); testCase.verifyEqual(raw_sql, assembled_sql); end end diff --git a/+tests/test_schemas/+External/Dimension.m b/+tests/test_schemas/+External/Dimension.m new file mode 100644 index 00000000..9c5b62a6 --- /dev/null +++ b/+tests/test_schemas/+External/Dimension.m @@ -0,0 +1,7 @@ +%{ +dimension_id : int +--- +dimension=null : blob@main +%} +classdef Dimension < dj.Manual +end \ No newline at end of file diff --git a/+tests/test_schemas/+External/Image.m b/+tests/test_schemas/+External/Image.m new file mode 100644 index 00000000..6d2da9c8 --- /dev/null +++ b/+tests/test_schemas/+External/Image.m @@ -0,0 +1,15 @@ +%{ +-> External.Dimension +--- +img=null : blob@main +%} +classdef Image < dj.Computed + methods(Access=protected) + function makeTuples(self, key) + dim = num2cell(fetch1(External.Dimension & key, 'dimension')); + rng(5); + key.img = rand(dim{:}); + self.insert(key) + end + end +end \ No newline at end of file diff --git a/+tests/test_schemas/+University/All.m b/+tests/test_schemas/+University/All.m index d96bcdb8..16766659 100644 --- a/+tests/test_schemas/+University/All.m +++ b/+tests/test_schemas/+University/All.m @@ -2,10 +2,10 @@ # All id : int --- -string : varchar(30) -date : datetime -number : float -blob : longblob +string=null : varchar(30) +date=null : datetime +number=null : float +blob=null : longblob %} classdef All < dj.Manual end \ No newline at end of file diff --git a/+tests/test_schemas/store_config.json b/+tests/test_schemas/store_config.json new file mode 100644 index 00000000..0b3cb172 --- /dev/null +++ b/+tests/test_schemas/store_config.json @@ -0,0 +1,45 @@ +{ + "database.host": "env", + "database.password": "var", + "database.user": "override", + "database.port": 3306, + "database.reconnect": true, + "connection.init_function": null, + "connection.charset": "", + "loglevel": "DEBUG", + "safemode": false, + "fetch_format": "array", + "display.limit": 12, + "display.width": 14, + "display.show_tuple_count": true, + "database.use_tls": null, + "enable_python_native_blobs": false, + "stores": { + "local": { + "protocol": "file", + "location": "{{external_file_store_root}}/base", + "subfolding": [ + 3, + 4 + ] + }, + "new_local": { + "datajoint_type": "blob", + "protocol": "file", + "location": "{{external_file_store_root}}/base", + "subfolding": [ + 3, + 4 + ] + }, + "local_default": { + "protocol": "file", + "location": "{{external_file_store_root}}/base" + }, + "new_local_default": { + "datajoint_type": "blob", + "protocol": "file", + "location": "{{external_file_store_root}}/base" + } + } +} \ No newline at end of file diff --git a/.gitignore b/.gitignore index b48a4ccf..c9a8c43f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,8 @@ mym/ *.env notebook *getSchema.m -docker-compose.yml \ No newline at end of file +docker-compose.yml +.vscode +matlab.prf +win.* +macos.* \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 09778d2b..9535ef6b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ branches: except: - - master-stage - - stage + - /^stage.*$/ sudo: required services: - docker diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index d0e0dc03..6d293123 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -4,21 +4,35 @@ x-net: &net networks: - main services: + db: + <<: *net + image: datajoint/mysql:${MYSQL_TAG} + environment: + - MYSQL_ROOT_PASSWORD=simple + fakeservices.datajoint.io: + <<: *net + image: raphaelguzman/nginx:v0.0.3 + environment: + - ADD_db_TYPE=DATABASE + - ADD_db_ENDPOINT=db:3306 + depends_on: + db: + condition: service_healthy app: <<: *net environment: - DISPLAY - MATLAB_LICENSE - MATLAB_USER - - DJ_HOST=mysql + - DJ_HOST=fakeservices.datajoint.io - DJ_USER=root - DJ_PASS=simple - - DJ_TEST_HOST=mysql + - DJ_TEST_HOST=fakeservices.datajoint.io - DJ_TEST_USER=datajoint - DJ_TEST_PASSWORD=datajoint image: raphaelguzman/matlab:${MATLAB_VERSION}-MIN depends_on: - mysql: + fakeservices.datajoint.io: condition: service_healthy user: ${MATLAB_UID}:${MATLAB_GID} working_dir: /src @@ -34,10 +48,5 @@ services: volumes: - .:/src - /tmp/.X11-unix:/tmp/.X11-unix:rw - mysql: - <<: *net - image: datajoint/mysql:${MYSQL_TAG} - environment: - - MYSQL_ROOT_PASSWORD=simple networks: main: \ No newline at end of file diff --git a/local-docker-compose.yml b/local-docker-compose.yml index 17543866..e4262456 100644 --- a/local-docker-compose.yml +++ b/local-docker-compose.yml @@ -4,13 +4,35 @@ x-net: &net networks: - main services: + db: + <<: *net + image: datajoint/mysql:${MYSQL_TAG} + environment: + - MYSQL_ROOT_PASSWORD=simple + # ports: + # - "3306:3306" + ## To persist MySQL data + # volumes: + # - ./mysql/data:/var/lib/mysql + fakeservices.datajoint.io: + <<: *net + image: raphaelguzman/nginx:v0.0.3 + environment: + - ADD_db_TYPE=DATABASE + - ADD_db_ENDPOINT=db:3306 + ports: + - "443:443" + - "3306:3306" + depends_on: + db: + condition: service_healthy app: <<: *net environment: - - DJ_HOST=db + - DJ_HOST=fakeservices.datajoint.io - DJ_USER=root - DJ_PASS=simple - - DJ_TEST_HOST=db + - DJ_TEST_HOST=fakeservices.datajoint.io - DJ_TEST_USER=datajoint - DJ_TEST_PASSWORD=datajoint - MATLAB_USER @@ -19,7 +41,7 @@ services: - DISPLAY image: raphaelguzman/matlab:${MATLAB_VERSION}-GUI depends_on: - db: + fakeservices.datajoint.io: condition: service_healthy ports: - "8888:8888" @@ -44,15 +66,5 @@ services: - /tmp/.X11-unix:/tmp/.X11-unix:rw ## Additional mounts may go here # - ./notebook:/home/muser/notebooks - db: - <<: *net - image: datajoint/mysql:${MYSQL_TAG} - environment: - - MYSQL_ROOT_PASSWORD=simple - ports: - - "3306:3306" - ## To persist MySQL data - # volumes: - # - ./mysql/data:/var/lib/mysql networks: main: \ No newline at end of file diff --git a/setupDJ.m b/setupDJ.m index 5fc13e22..1d721ffe 100644 --- a/setupDJ.m +++ b/setupDJ.m @@ -27,7 +27,7 @@ function setupDJ(skipPathAddition, force) fprintf('mym missing. Downloading...\n') target = fullfile(base, 'mym.zip'); % mymURL = 'https://github.com/datajoint/mym/archive/master.zip'; - mymURL = 'https://github.com/datajoint/mym/archive/external-storage.zip'; + mymURL = 'https://github.com/guzman-raphael/mym/archive/ext-serialize.zip'; target = websave(target, mymURL); if isunix && ~ismac % on Linux Matlab unzip doesn't work properly so use system unzip @@ -37,7 +37,7 @@ function setupDJ(skipPathAddition, force) end % rename extracted mym-master directory to mym % movefile(fullfile(base, 'mym-master'), mymdir) - movefile(fullfile(base, 'mym-external-storage'), mymdir) + movefile(fullfile(base, 'mym-ext-serialize'), mymdir) delete(target) end