From 886a1600841b93cf85255fede6d90358ddbc309e Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Sat, 30 Jul 2016 13:27:40 +0800 Subject: [PATCH 01/14] add redis cache hint to docs --- docs/installation.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/installation.rst b/docs/installation.rst index 54b851a879420..71a2807f8cc34 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -180,7 +180,8 @@ complies with the Flask-Cache specifications. Flask-Cache supports multiple caching backends (Redis, Memcached, SimpleCache (in-memory), or the local filesystem). If you are going to use Memcached please use the pylibmc client library as python-memcached does -not handle storing binary data correctly. +not handle storing binary data correctly. If you use Redis, please install +[python-redis](https://pypi.python.org/pypi/redis). For setting your timeouts, this is done in the Caravel metadata and goes up the "timeout searchpath", from your slice configuration, to your From 3a28a5da5c40441de6fb5721ec4f9c81e4e476f0 Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Wed, 17 Aug 2016 15:18:52 +0800 Subject: [PATCH 02/14] add synctable cmd and get need_insert tables --- caravel/bin/caravel | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index c53cdc2b338fb..9e74ac6fe949f 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -125,6 +125,21 @@ def refresh_druid(): "[" + cluster.cluster_name + "]") session.commit() +@manager.command +def synctable(): + exists_tables = [] + for row in db.session.query(caravel.models.SqlaTable).all(): + exists_tables += [(row.database, row.name)] + + all_tables = [] + for _db in db.session.query(caravel.models.Database).all(): + for _tablename in _db.get_sqla_engine().table_names(): + all_tables += [(_db.database_name, _tablename)] + all_tables = [_ for _ in all_tables if _[1].startswith('data_')] + + need_insert = list(set(all_tables) - set(exists_tables)) + + print(need_insert) if __name__ == "__main__": manager.run() From 18f845c145350243319dd0fc70bc04db1c14e61d Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Wed, 17 Aug 2016 15:42:02 +0800 Subject: [PATCH 03/14] synctable done --- caravel/bin/caravel | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 9e74ac6fe949f..75f609b139854 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -139,7 +139,17 @@ def synctable(): need_insert = list(set(all_tables) - set(exists_tables)) - print(need_insert) + for row in need_insert: + db_name = row[0] + table_name = row[1] + + tbl = caravel.models.SqlaTable(table_name=table_name) + tbl.description = "" + tbl.is_featured = False + tbl.database = db.session.query(caravel.models.Database).filter_by(database_name=db_name).first() + db.session.merge(tbl) + db.session.commit() + tbl.fetch_metadata() if __name__ == "__main__": manager.run() From ee7cd878152cf4ed0d4d9a16e7095d72e5255faa Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Wed, 17 Aug 2016 17:47:06 +0800 Subject: [PATCH 04/14] add synctable desc add synctable prefix option --- caravel/bin/caravel | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 75f609b139854..892a0abe59c6b 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -125,8 +125,11 @@ def refresh_druid(): "[" + cluster.cluster_name + "]") session.commit() -@manager.command -def synctable(): +@manager.option( + '-p', '--prefix', default='data_', + help="Sync Table Prefix") +def synctable(prefix): + ''' Sync DB Table with Caravel Table''' exists_tables = [] for row in db.session.query(caravel.models.SqlaTable).all(): exists_tables += [(row.database, row.name)] @@ -135,7 +138,7 @@ def synctable(): for _db in db.session.query(caravel.models.Database).all(): for _tablename in _db.get_sqla_engine().table_names(): all_tables += [(_db.database_name, _tablename)] - all_tables = [_ for _ in all_tables if _[1].startswith('data_')] + all_tables = [_ for _ in all_tables if _[1].startswith(prefix)] need_insert = list(set(all_tables) - set(exists_tables)) From 24383340179a240e8b9cf8832304b97826238ac0 Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Thu, 18 Aug 2016 12:03:48 +0800 Subject: [PATCH 05/14] fine tune --- caravel/bin/caravel | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 892a0abe59c6b..46d33e1774fb1 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -130,22 +130,20 @@ def refresh_druid(): help="Sync Table Prefix") def synctable(prefix): ''' Sync DB Table with Caravel Table''' - exists_tables = [] + existing_tables = [] for row in db.session.query(caravel.models.SqlaTable).all(): - exists_tables += [(row.database, row.name)] + existing_tables += [(row.database, row.name)] all_tables = [] - for _db in db.session.query(caravel.models.Database).all(): - for _tablename in _db.get_sqla_engine().table_names(): - all_tables += [(_db.database_name, _tablename)] - all_tables = [_ for _ in all_tables if _[1].startswith(prefix)] + for caravel_db in db.session.query(caravel.models.Database).all(): + for table_name in caravel_db.get_sqla_engine().table_names(): + all_tables += [(caravel_db.database_name, table_name)] - need_insert = list(set(all_tables) - set(exists_tables)) - - for row in need_insert: - db_name = row[0] - table_name = row[1] + all_tables = [row for row in all_tables if row[1].startswith(prefix)] + + need_insert = list(set(all_tables) - set(existing_tables)) + for db_name, table_name in need_insert: tbl = caravel.models.SqlaTable(table_name=table_name) tbl.description = "" tbl.is_featured = False From 5db3c698c976a09dcb828133829d303f26fa01df Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Thu, 18 Aug 2016 12:07:41 +0800 Subject: [PATCH 06/14] [synctable] fix bug: multi insert table --- caravel/bin/caravel | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 46d33e1774fb1..82c1fcd151ce1 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -132,8 +132,7 @@ def synctable(prefix): ''' Sync DB Table with Caravel Table''' existing_tables = [] for row in db.session.query(caravel.models.SqlaTable).all(): - existing_tables += [(row.database, row.name)] - + existing_tables += [(row.database.database_name, row.name)] all_tables = [] for caravel_db in db.session.query(caravel.models.Database).all(): for table_name in caravel_db.get_sqla_engine().table_names(): From a165f432709d813e8721e9d72a39af0ccb7d4254 Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Thu, 18 Aug 2016 12:10:26 +0800 Subject: [PATCH 07/14] [synctable] add hint console output --- caravel/bin/caravel | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 82c1fcd151ce1..ad99ce36e8a88 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -151,5 +151,9 @@ def synctable(prefix): db.session.commit() tbl.fetch_metadata() + print("[{db}] {table} insert success.".format(db=db_name, table=table_name)) + + print("[{}] Sync table complete.".format(len(need_insert))) + if __name__ == "__main__": manager.run() From eec6c5f67dddedf2e75063ea60168be2b19f3b8c Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Fri, 19 Aug 2016 14:55:36 +0800 Subject: [PATCH 08/14] use defaultdict to update code --- caravel/bin/caravel | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index ad99ce36e8a88..5f46bd02e7578 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -7,6 +7,7 @@ from __future__ import unicode_literals import logging from datetime import datetime from subprocess import Popen +from collections import defaultdict from flask_migrate import MigrateCommand from flask_script import Manager @@ -133,27 +134,31 @@ def synctable(prefix): existing_tables = [] for row in db.session.query(caravel.models.SqlaTable).all(): existing_tables += [(row.database.database_name, row.name)] - all_tables = [] + + insert_dict = defaultdict(list) for caravel_db in db.session.query(caravel.models.Database).all(): for table_name in caravel_db.get_sqla_engine().table_names(): - all_tables += [(caravel_db.database_name, table_name)] - - all_tables = [row for row in all_tables if row[1].startswith(prefix)] - - need_insert = list(set(all_tables) - set(existing_tables)) + table = (caravel_db.database_name, table_name) - for db_name, table_name in need_insert: - tbl = caravel.models.SqlaTable(table_name=table_name) - tbl.description = "" - tbl.is_featured = False - tbl.database = db.session.query(caravel.models.Database).filter_by(database_name=db_name).first() - db.session.merge(tbl) - db.session.commit() - tbl.fetch_metadata() - - print("[{db}] {table} insert success.".format(db=db_name, table=table_name)) - - print("[{}] Sync table complete.".format(len(need_insert))) + if (table not in existing_tables) and table_name.startswith(prefix): + insert_dict[caravel_db].append(table_name) + + count_insert = 0 + for caravel_db, tables in insert_dict.items(): + for table_name in tables: + count_insert += 1 + + tbl = caravel.models.SqlaTable(table_name=table_name) + tbl.description = "" + tbl.is_featured = False + tbl.database = caravel_db + db.session.merge(tbl) + db.session.commit() + tbl.fetch_metadata() + + print("[{db}] {table} insert success.".format(db=caravel_db.database_name, table=table_name)) + + print("[{}] Sync table complete.".format(count_insert)) if __name__ == "__main__": manager.run() From 11c7c0145fab8942478e1df4b91fc242f0c62219 Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Fri, 19 Aug 2016 15:16:32 +0800 Subject: [PATCH 09/14] [synctable] use add instead of merge --- caravel/bin/caravel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 5f46bd02e7578..208a94595d7ea 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -152,7 +152,7 @@ def synctable(prefix): tbl.description = "" tbl.is_featured = False tbl.database = caravel_db - db.session.merge(tbl) + db.session.add(tbl) db.session.commit() tbl.fetch_metadata() From c57200e808a1bda19a7e13be250bf73e80973c98 Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Fri, 19 Aug 2016 15:41:23 +0800 Subject: [PATCH 10/14] specifies sync db --- caravel/bin/caravel | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 208a94595d7ea..716465a9c13a3 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -129,20 +129,32 @@ def refresh_druid(): @manager.option( '-p', '--prefix', default='data_', help="Sync Table Prefix") -def synctable(prefix): +@manager.option( + '-d', '--database', + help="Specifies database") +def synctable(prefix, database=None): ''' Sync DB Table with Caravel Table''' existing_tables = [] for row in db.session.query(caravel.models.SqlaTable).all(): existing_tables += [(row.database.database_name, row.name)] insert_dict = defaultdict(list) - for caravel_db in db.session.query(caravel.models.Database).all(): + + if database is None: + caravel_dbs = db.session.query(caravel.models.Database).all() + else: + caravel_dbs = (db.session.query(caravel.models.Database) + .filter_by(database_name=database).all()) + + # Get all need insert table + for caravel_db in caravel_dbs: for table_name in caravel_db.get_sqla_engine().table_names(): table = (caravel_db.database_name, table_name) if (table not in existing_tables) and table_name.startswith(prefix): insert_dict[caravel_db].append(table_name) + # Insert to caravel tables count_insert = 0 for caravel_db, tables in insert_dict.items(): for table_name in tables: @@ -156,7 +168,10 @@ def synctable(prefix): db.session.commit() tbl.fetch_metadata() - print("[{db}] {table} insert success.".format(db=caravel_db.database_name, table=table_name)) + print("[{db}] {table} insert success.".format( + db=caravel_db.database_name, + table=table_name + )) print("[{}] Sync table complete.".format(count_insert)) From d0cb663b871246d58814d2bc9b04ddc912110819 Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Fri, 19 Aug 2016 15:42:44 +0800 Subject: [PATCH 11/14] [synctable] add database hint text --- caravel/bin/caravel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 716465a9c13a3..9c77cdabeba75 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -131,7 +131,7 @@ def refresh_druid(): help="Sync Table Prefix") @manager.option( '-d', '--database', - help="Specifies database") + help="Specifies database (use caravel db name, not real db name)") def synctable(prefix, database=None): ''' Sync DB Table with Caravel Table''' existing_tables = [] From 06a65d04d4e805958c534d32fe30683c36b5cbf5 Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Fri, 19 Aug 2016 15:56:20 +0800 Subject: [PATCH 12/14] add warning when sync all table and set prefix is empty str --- caravel/bin/caravel | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 9c77cdabeba75..f64afa915bcd9 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -127,19 +127,30 @@ def refresh_druid(): session.commit() @manager.option( - '-p', '--prefix', default='data_', + '-p', '--prefix', default="", help="Sync Table Prefix") @manager.option( - '-d', '--database', + '-d', '--database', default=None, help="Specifies database (use caravel db name, not real db name)") -def synctable(prefix, database=None): +def synctable(prefix, database): ''' Sync DB Table with Caravel Table''' + print("") + + if (prefix == "" and database is None) or (database == 'main' and prefix == ""): + print("If you not set prefix and db name, some system table may be sync to caravel.") + + check_status = raw_input("Are you sure do this? (Y/N)").lower().strip() + if check_status not in ['y', 'yes']: + print("Exit sync table") + exit() + existing_tables = [] for row in db.session.query(caravel.models.SqlaTable).all(): existing_tables += [(row.database.database_name, row.name)] insert_dict = defaultdict(list) + # db exists? if database is None: caravel_dbs = db.session.query(caravel.models.Database).all() else: From 6aa9d3eca52c0ee61f264dc828731b6fd277998e Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Fri, 19 Aug 2016 16:00:36 +0800 Subject: [PATCH 13/14] check is database exists --- caravel/bin/caravel | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index f64afa915bcd9..1759acef798a9 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -135,7 +135,7 @@ def refresh_druid(): def synctable(prefix, database): ''' Sync DB Table with Caravel Table''' print("") - + if (prefix == "" and database is None) or (database == 'main' and prefix == ""): print("If you not set prefix and db name, some system table may be sync to caravel.") @@ -150,13 +150,16 @@ def synctable(prefix, database): insert_dict = defaultdict(list) - # db exists? if database is None: caravel_dbs = db.session.query(caravel.models.Database).all() else: caravel_dbs = (db.session.query(caravel.models.Database) .filter_by(database_name=database).all()) + if caravel_dbs == []: + print("Database not exists, please check database name") + exit() + # Get all need insert table for caravel_db in caravel_dbs: for table_name in caravel_db.get_sqla_engine().table_names(): From e264d201df217ef463e4d901635289344cb2d20d Mon Sep 17 00:00:00 2001 From: Karl Lin Date: Fri, 19 Aug 2016 16:03:13 +0800 Subject: [PATCH 14/14] [synctable] fine tune --- caravel/bin/caravel | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/caravel/bin/caravel b/caravel/bin/caravel index 1759acef798a9..fcf21be0a7cad 100755 --- a/caravel/bin/caravel +++ b/caravel/bin/caravel @@ -143,13 +143,7 @@ def synctable(prefix, database): if check_status not in ['y', 'yes']: print("Exit sync table") exit() - - existing_tables = [] - for row in db.session.query(caravel.models.SqlaTable).all(): - existing_tables += [(row.database.database_name, row.name)] - - insert_dict = defaultdict(list) - + if database is None: caravel_dbs = db.session.query(caravel.models.Database).all() else: @@ -160,7 +154,12 @@ def synctable(prefix, database): print("Database not exists, please check database name") exit() + existing_tables = [] + for row in db.session.query(caravel.models.SqlaTable).all(): + existing_tables += [(row.database.database_name, row.name)] + # Get all need insert table + insert_dict = defaultdict(list) for caravel_db in caravel_dbs: for table_name in caravel_db.get_sqla_engine().table_names(): table = (caravel_db.database_name, table_name)