Skip to content

Commit

Permalink
Merge pull request #353 from datavisyn/derive-set-column-categories
Browse files Browse the repository at this point in the history
Derive set column categories automatically
  • Loading branch information
thinkh authored Jul 14, 2020
2 parents deb6208 + 70900ff commit 6e50dab
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions tdp_core/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def derive_columns(table_name, engine, columns=None):
# derive the missing domains and categories
number_columns = [k for k, col in columns.items() if
col['type'] == 'number' and ('min' not in col or 'max' not in col)]
categorical_columns = [k for k, col in columns.items() if col['type'] == 'categorical' and 'categories' not in col]
categorical_columns = [k for k, col in columns.items() if (col['type'] == 'categorical' or col['type'] == 'set') and 'categories' not in col]
if number_columns or categorical_columns:
with session(engine) as s:
if number_columns:
Expand All @@ -477,7 +477,14 @@ def derive_columns(table_name, engine, columns=None):
template += """ AND {col} <> ''"""
template += """ ORDER BY {col} ASC"""
cats = s.execute(template.format(col=col, table=table_name))
columns[col]['categories'] = [str(r['cat']) for r in cats if r['cat'] is not None]
categories = [str(r['cat']) for r in cats if r['cat'] is not None]
if columns[col]['type'] == 'set':
separator = getattr(columns[col], 'separator', ';')
separated_categories = [category.split(separator) for category in categories]
# flatten array
categories = list(set([category for sublist in separated_categories for category in sublist]))
categories.sort() # sort list to avoid random order with each run
columns[col]['categories'] = categories

return columns

Expand Down

0 comments on commit 6e50dab

Please sign in to comment.