From b9e4027ac56bbb585a27eaa3c3a52fc40568139c Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Mon, 27 Feb 2023 15:24:49 +0200
Subject: [PATCH 01/23] table profile added

---
 CHANGELOG.md                     |   1 +
 doc/_toc.yml                     |   1 +
 doc/user-guide/explore-tables.md |  88 ++++++++++++++++++
 src/sql/inspect.py               | 154 ++++++++++++++++++++++++++++++-
 src/sql/magic_cmd.py             |  28 +++++-
 src/tests/test_magic_cmd.py      |  53 +++++++++++
 6 files changed, 323 insertions(+), 2 deletions(-)
 create mode 100644 doc/user-guide/explore-tables.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b50b9a050..51ef6da9f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 # CHANGELOG
 
 ## 0.5.7dev
+* [Feature] Adds `%sqlcmd profile` (#66)
 
 ## 0.5.6 (2023-02-16)
 
diff --git a/doc/_toc.yml b/doc/_toc.yml
index 7180cedd4..fcc5ed168 100644
--- a/doc/_toc.yml
+++ b/doc/_toc.yml
@@ -13,6 +13,7 @@ parts:
     - file: compose
     - file: user-guide/tables-columns
     - file: plot-legacy
+    - file: user-guide/explore-tables
 
   - caption: Integrations
     chapters:
diff --git a/doc/user-guide/explore-tables.md b/doc/user-guide/explore-tables.md
new file mode 100644
index 000000000..9d37dd027
--- /dev/null
+++ b/doc/user-guide/explore-tables.md
@@ -0,0 +1,88 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.14.4
+kernelspec:
+  display_name: Python 3 (ipykernel)
+  language: python
+  name: python3
+---
+
+# Explore tables
+
+When dealing with a new dataset, it's crucial for practitioners to have a comprehensive understanding of the data in a timely manner. This involves exploring and summarizing the dataset efficiently to extract valuable insights. However, this can be a time-consuming process. Fortunately, `%sqlcmd profile` offers an easy way to generate statistics and descriptive information, enabling practitioners to quickly gain a deeper understanding of the dataset.
+
+Availble statistics:
+
+* The count of non empty values
+* The number of unique values
+* The top (most frequent) value
+* The frequency of your top value
+* The mean, standard deviation, min and max values
+* The percentiles of your data: 25%, 50% and 75%.
+
+
+## Examples
+
+### Simple example with SQLite
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+%load_ext sql
+%sql sqlite://
+```
+
+Let's create our table
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+%%sql sqlite://
+CREATE TABLE example_table (rating, price, number, symbol);
+INSERT INTO example_table VALUES (14.44, 2.48, 82, 'a');
+INSERT INTO example_table VALUES (13.13, 1.50, 93, 'b');
+INSERT INTO example_table VALUES (12.59, 0.20, 98, 'a');
+INSERT INTO example_table VALUES (11.54, 0.41, 89, 'a');
+INSERT INTO example_table VALUES (10.532, 0.1, 88, 'c');
+INSERT INTO example_table VALUES (11.5, 0.2, 84, 'b');
+INSERT INTO example_table VALUES (11.1, 0.3, 90, 'a');
+INSERT INTO example_table VALUES (12.9, 0.31, 86, '');
+INSERT INTO example_table VALUES (12.9, 0.31, 86, '    ');
+```
+
+```{code-cell} ipython3
+%sqlcmd profile -t example_table
+```
+
+### Large datasets
+
+We can easily explore large SQlite database using DuckDB.
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+import urllib.request
+from pathlib import Path
+
+if not Path("example.db").is_file():
+    url = "https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"  # noqa
+    urllib.request.urlretrieve(url, "example.db")
+```
+
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+%%sql duckdb:///
+INSTALL 'sqlite_scanner';
+LOAD 'sqlite_scanner';
+CALL sqlite_attach('example.db');
+```
+
+```{code-cell} ipython3
+%sqlcmd profile -t track
+```
\ No newline at end of file
diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index 751f86466..d6c16c9ab 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -1,9 +1,10 @@
 from sqlalchemy import inspect
 from prettytable import PrettyTable
 from ploomber_core.exceptions import modify_exceptions
-
 from sql.connection import Connection
 from sql.telemetry import telemetry
+import sql.run
+import math
 
 
 def _get_inspector(conn):
@@ -73,6 +74,146 @@ def __init__(self, name, schema, conn=None) -> None:
         self._table_txt = self._table.get_string()
 
 
+@modify_exceptions
+class TableDescription(DatabaseInspection):
+    """
+    Generates descriptive statistics.
+
+    Descriptive statistics are:
+
+    Count - Number of all non empty values
+
+    Mean - Mean of the values
+
+    Max - Maximum of the values in the object.
+
+    Min - Minimum of the values in the object.
+
+    STD - Standard deviation of the observations
+
+    25h, 50h and 75h percentiles
+
+    Unique - Number of unique values
+
+    Top - The most frequent value
+
+    Freq - Frequency of the top value
+
+    """
+
+    def __init__(self, table_name, config=None, user_ns=None) -> None:
+        result_table_columns = sql.run.run(
+            Connection.current, f"SELECT * FROM {table_name} WHERE 1=0", config, user_ns
+        )
+
+        columns = result_table_columns.keys
+
+        table_stats = dict({})
+
+        for column in columns:
+            table_stats[column] = dict()
+            result_col_unique_values = sql.run.run(
+                Connection.current,
+                f"SELECT COUNT(DISTINCT {column}) as unique_count FROM {table_name}",
+                config,
+                user_ns,
+            )
+
+            result_col_freq_values = sql.run.run(
+                Connection.current,
+                f"""SELECT {column}, COUNT({column}) as frequency FROM {table_name}
+                GROUP BY {column} ORDER BY Count({column}) Desc""",
+                config,
+                user_ns,
+            )
+
+            result_non_empty_values = sql.run.run(
+                Connection.current,
+                f"""SELECT {column} FROM {table_name} WHERE {column}
+                IS NOT NULL AND TRIM({column}) <> ''
+                ORDER BY {column} ASC
+                """,
+                config,
+                user_ns,
+            )
+
+            col_values = result_non_empty_values.dict()[column]
+            count = len(col_values)
+            table_stats[column]["count"] = count
+            table_stats[column]["freq"] = result_col_freq_values.dict()["frequency"][0]
+            table_stats[column]["unique"] = result_col_unique_values.dict()[
+                "unique_count"
+            ][0]
+            table_stats[column]["top"] = result_col_freq_values.dict()[column][0]
+            table_stats[column]["min"] = col_values[0]
+            table_stats[column]["max"] = col_values[count - 1]
+
+            try:
+                mean = sum(col_values) / count
+                table_stats[column]["mean"] = mean
+
+                values_sum = sum([(math.pow((v - mean), 2)) for v in col_values])
+                std = math.sqrt(values_sum / (count - 1))
+
+                table_stats[column]["std"] = std
+
+                table_stats[column]["25%"] = self._get_n_percentile(25, col_values)
+                table_stats[column]["50%"] = self._get_n_percentile(50, col_values)
+                table_stats[column]["75%"] = self._get_n_percentile(75, col_values)
+
+            except TypeError:
+                # for non numeric values
+                table_stats[column]["mean"] = math.nan
+                table_stats[column]["std"] = math.nan
+                table_stats[column]["25%"] = math.nan
+                table_stats[column]["50%"] = math.nan
+                table_stats[column]["75%"] = math.nan
+
+        self._table = PrettyTable()
+        self._table.field_names = [" "] + list(table_stats.keys())
+
+        rows = list(table_stats.items())[0][1].keys()
+
+        for row in rows:
+            values = [row]
+            for column in table_stats:
+                value = table_stats[column][row]
+                values.append(value)
+
+            self._table.add_row(values)
+
+        self._table_html = self._table.get_html_string()
+        self._table_txt = self._table.get_string()
+
+    def _get_n_percentile(self, n, list) -> float:
+        """
+        Calculates the nth percentile of the given data.
+
+        Parameters
+        ----------
+        n : int
+            The Nth percentile to comupte. Must be between 0 and 100 inclusive.
+
+        list : list of numeric values
+            An ordered list of numeric values
+
+        Returns
+        -------
+        nth percentile of the list
+        """
+        if n < 0 or n > 100:
+            raise ValueError("N must be between 0 and 100 inclusive")
+
+        count = len(list)
+        lp = ((count + 1) * n) / 100
+        index = math.floor(lp)
+        if index - 1 >= 0:
+            diff = list[index] - list[index - 1]
+            distance = lp - index
+
+            return list[index - 1] + distance * diff
+
+
 @telemetry.log_call()
 def get_table_names(schema=None):
     """Get table names for a given connection"""
@@ -83,3 +224,14 @@ def get_table_names(schema=None):
 def get_columns(name, schema=None):
     """Get column names for a given connection"""
     return Columns(name, schema)
+
+
+@telemetry.log_call()
+def get_table_statistics(name, config=None, user_ns=None):
+    """Get table statistics for a given connection.
+
+    For all data types the results will include `count`, `mean`, `std`, `min`
+    `max`, `25`, `50` and `75` percentiles. It will also include `unique`, `top`
+    and `freq` statistics.
+    """
+    return TableDescription(name, config=config, user_ns=user_ns)
diff --git a/src/sql/magic_cmd.py b/src/sql/magic_cmd.py
index aff07f05c..7b795c9da 100644
--- a/src/sql/magic_cmd.py
+++ b/src/sql/magic_cmd.py
@@ -6,6 +6,7 @@
     Magics,
     line_magic,
     magics_class,
+    needs_local_scope,
 )
 from IPython.core.magic_arguments import argument, magic_arguments
 from IPython.core.error import UsageError
@@ -33,6 +34,17 @@ def error(self, message):
 class SqlCmdMagic(Magics, Configurable):
     """%sqlcmd magic"""
 
+    displaycon = True
+    autolimit = None
+    style = "DEFAULT"
+    short_errors = True
+    displaylimit = None
+    autopandas = False
+    column_local_vars = False
+    feedback = False
+    autocommit = False
+
+    @needs_local_scope
     @line_magic("sqlcmd")
     @magic_arguments()
     @argument("line", default="", type=str, help="Command name")
@@ -65,8 +77,22 @@ def execute(self, line="", cell="", local_ns=None):
 
             args = parser.parse_args(others)
             return inspect.get_columns(name=args.table, schema=args.schema)
+
+        elif cmd_name == "profile":
+            parser = CmdParser()
+
+            parser.add_argument(
+                "-t", "--table", type=str, help="Table name", required=True
+            )
+
+            args = parser.parse_args(others)
+
+            user_ns = self.shell.user_ns.copy()
+            user_ns.update(local_ns)
+
+            return inspect.get_table_statistics(name=args.table, config=self, user_ns=user_ns)
         else:
             raise UsageError(
                 f"%sqlcmd has no command: {cmd_name!r}. "
-                "Valid commands are: 'tables', 'columns'"
+                "Valid commands are: 'tables', 'columns', 'profile' "
             )
diff --git a/src/tests/test_magic_cmd.py b/src/tests/test_magic_cmd.py
index 7fc189d03..6e7a6cb86 100644
--- a/src/tests/test_magic_cmd.py
+++ b/src/tests/test_magic_cmd.py
@@ -70,3 +70,56 @@ def test_columns_with_schema(ip, tmp_empty):
     ).result._repr_html_()
 
     assert "some_number" in out
+
+
+def test_table_profile(ip):
+    ip.run_cell("""
+    %%sql sqlite://
+    CREATE TABLE numbers (rating, price, number, word);
+    INSERT INTO numbers VALUES (14.44, 2.48, 82, 'a');
+    INSERT INTO numbers VALUES (13.13, 1.50, 93, 'b');
+    INSERT INTO numbers VALUES (12.59, 0.20, 98, 'a');
+    INSERT INTO numbers VALUES (11.54, 0.41, 89, 'a');
+    INSERT INTO numbers VALUES (10.532, 0.1, 88, 'c');
+    INSERT INTO numbers VALUES (11.5, 0.2, 84, '   ');
+    INSERT INTO numbers VALUES (11.1, 0.3, 90, 'a');
+    INSERT INTO numbers VALUES (12.9, 0.31, 86, '');
+    """)
+
+    expected = {
+        "count": [8, 8, 8, 6],
+        "mean": [12.2165, 0.6875, 88.75, float("NaN")],
+        "min": [10.532, 0.1, 82, float("NaN")],
+        "max": [14.44, 2.48, 98, float("NaN")],
+        "std": [1.2784055917989632, 0.8504914545636036,
+                5.092010548749033, float("NaN")],
+        "25%": [11.2, 0.2, 84.5, float("NaN")],
+        "50%": [12.065, 0.305, 88.5, float("NaN")],
+        "75%": [13.072500000000002, 1.2275, 92.25, float("NaN")],
+        "unique": [8, 7, 8, 4],
+        "freq": [1, 2, 1, 4],
+        "top": [14.44, 0.2, 98, "a"],
+
+    }
+
+    out = ip.run_cell("%sqlcmd profile -t numbers").result
+
+    stats_table = out._table
+
+    for row in stats_table:
+        criteria = row.get_string(
+            fields=[" "], border=False).strip()
+
+        rating = row.get_string(
+            fields=["rating"], border=False, header=False).strip()
+
+        price = row.get_string(
+            fields=["price"], border=False, header=False).strip()
+
+        number = row.get_string(
+            fields=["number"], border=False, header=False).strip()
+
+        if criteria in expected:
+            assert rating == str(expected[criteria][0])
+            assert price == str(expected[criteria][1])
+            assert number == str(expected[criteria][2])

From 0fa35327168e1c7d05374b2f73df04d9b866f174 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Mon, 27 Feb 2023 15:29:36 +0200
Subject: [PATCH 02/23] lint

---
 src/sql/magic_cmd.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/sql/magic_cmd.py b/src/sql/magic_cmd.py
index 7b795c9da..2697f9cd6 100644
--- a/src/sql/magic_cmd.py
+++ b/src/sql/magic_cmd.py
@@ -90,7 +90,9 @@ def execute(self, line="", cell="", local_ns=None):
             user_ns = self.shell.user_ns.copy()
             user_ns.update(local_ns)
 
-            return inspect.get_table_statistics(name=args.table, config=self, user_ns=user_ns)
+            return inspect.get_table_statistics(
+                name=args.table, config=self, user_ns=user_ns
+            )
         else:
             raise UsageError(
                 f"%sqlcmd has no command: {cmd_name!r}. "

From eca6957cd2a58bde7b9130faadac68ca6ce6afa1 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Mon, 27 Feb 2023 15:43:22 +0200
Subject: [PATCH 03/23] test fixed

---
 src/sql/magic_cmd.py        | 2 +-
 src/tests/test_magic_cmd.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sql/magic_cmd.py b/src/sql/magic_cmd.py
index 2697f9cd6..e3c0acba2 100644
--- a/src/sql/magic_cmd.py
+++ b/src/sql/magic_cmd.py
@@ -96,5 +96,5 @@ def execute(self, line="", cell="", local_ns=None):
         else:
             raise UsageError(
                 f"%sqlcmd has no command: {cmd_name!r}. "
-                "Valid commands are: 'tables', 'columns', 'profile' "
+                "Valid commands are: 'tables', 'columns', 'profile'"
             )
diff --git a/src/tests/test_magic_cmd.py b/src/tests/test_magic_cmd.py
index 6e7a6cb86..4ab0b72fd 100644
--- a/src/tests/test_magic_cmd.py
+++ b/src/tests/test_magic_cmd.py
@@ -10,7 +10,7 @@
         [
             "%sqlcmd stuff",
             UsageError,
-            "%sqlcmd has no command: 'stuff'. Valid commands are: 'tables', 'columns'",
+            "%sqlcmd has no command: 'stuff'. Valid commands are: 'tables', 'columns', 'profile'",
         ],
         [
             "%sqlcmd columns",

From a400a036fa75b46f7d52d00deebc72320105c2a3 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Mon, 27 Feb 2023 15:52:54 +0200
Subject: [PATCH 04/23] lint

---
 src/tests/test_magic_cmd.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/tests/test_magic_cmd.py b/src/tests/test_magic_cmd.py
index 4ab0b72fd..e367dd80f 100644
--- a/src/tests/test_magic_cmd.py
+++ b/src/tests/test_magic_cmd.py
@@ -10,7 +10,8 @@
         [
             "%sqlcmd stuff",
             UsageError,
-            "%sqlcmd has no command: 'stuff'. Valid commands are: 'tables', 'columns', 'profile'",
+            "%sqlcmd has no command: 'stuff'. Valid commands are: 'tables', "
+            "'columns', 'profile'",
         ],
         [
             "%sqlcmd columns",
@@ -73,7 +74,8 @@ def test_columns_with_schema(ip, tmp_empty):
 
 
 def test_table_profile(ip):
-    ip.run_cell("""
+    ip.run_cell(
+        """
     %%sql sqlite://
     CREATE TABLE numbers (rating, price, number, word);
     INSERT INTO numbers VALUES (14.44, 2.48, 82, 'a');
@@ -84,22 +86,26 @@ def test_table_profile(ip):
     INSERT INTO numbers VALUES (11.5, 0.2, 84, '   ');
     INSERT INTO numbers VALUES (11.1, 0.3, 90, 'a');
     INSERT INTO numbers VALUES (12.9, 0.31, 86, '');
-    """)
+    """
+    )
 
     expected = {
         "count": [8, 8, 8, 6],
         "mean": [12.2165, 0.6875, 88.75, float("NaN")],
         "min": [10.532, 0.1, 82, float("NaN")],
         "max": [14.44, 2.48, 98, float("NaN")],
-        "std": [1.2784055917989632, 0.8504914545636036,
-                5.092010548749033, float("NaN")],
+        "std": [
+            1.2784055917989632,
+            0.8504914545636036,
+            5.092010548749033,
+            float("NaN"),
+        ],
         "25%": [11.2, 0.2, 84.5, float("NaN")],
         "50%": [12.065, 0.305, 88.5, float("NaN")],
         "75%": [13.072500000000002, 1.2275, 92.25, float("NaN")],
         "unique": [8, 7, 8, 4],
         "freq": [1, 2, 1, 4],
         "top": [14.44, 0.2, 98, "a"],
-
     }
 
     out = ip.run_cell("%sqlcmd profile -t numbers").result
@@ -107,17 +113,13 @@ def test_table_profile(ip):
     stats_table = out._table
 
     for row in stats_table:
-        criteria = row.get_string(
-            fields=[" "], border=False).strip()
+        criteria = row.get_string(fields=[" "], border=False).strip()
 
-        rating = row.get_string(
-            fields=["rating"], border=False, header=False).strip()
+        rating = row.get_string(fields=["rating"], border=False, header=False).strip()
 
-        price = row.get_string(
-            fields=["price"], border=False, header=False).strip()
+        price = row.get_string(fields=["price"], border=False, header=False).strip()
 
-        number = row.get_string(
-            fields=["number"], border=False, header=False).strip()
+        number = row.get_string(fields=["number"], border=False, header=False).strip()
 
         if criteria in expected:
             assert rating == str(expected[criteria][0])

From 704108123d27788caeb983854b786c1c75895803 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Mon, 27 Feb 2023 16:14:49 +0200
Subject: [PATCH 05/23] autopolars property added to config

---
 src/sql/magic_cmd.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/sql/magic_cmd.py b/src/sql/magic_cmd.py
index e3c0acba2..25a4e684d 100644
--- a/src/sql/magic_cmd.py
+++ b/src/sql/magic_cmd.py
@@ -43,6 +43,7 @@ class SqlCmdMagic(Magics, Configurable):
     column_local_vars = False
     feedback = False
     autocommit = False
+    autopolars = False
 
     @needs_local_scope
     @line_magic("sqlcmd")

From 9ccd1cc62fa37e21776c5226378a5800ae4f1eef Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Mon, 27 Feb 2023 17:48:10 +0200
Subject: [PATCH 06/23] save report added

---
 doc/user-guide/explore-tables.md | 10 +++++++++-
 src/sql/inspect.py               |  5 +++--
 src/sql/magic_cmd.py             | 12 +++++++++++-
 src/tests/test_magic_cmd.py      | 30 ++++++++++++++++++++++++++----
 4 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/doc/user-guide/explore-tables.md b/doc/user-guide/explore-tables.md
index 9d37dd027..2a9a82b05 100644
--- a/doc/user-guide/explore-tables.md
+++ b/doc/user-guide/explore-tables.md
@@ -85,4 +85,12 @@ CALL sqlite_attach('example.db');
 
 ```{code-cell} ipython3
 %sqlcmd profile -t track
-```
\ No newline at end of file
+```
+
+### Saving report as HTML
+
+To save the generated report as an HTML file, use the `--output`/`-o` attribute followed by the desired file name
+
+```
+%sqlcmd profile -t track --output my-report.html
+```
diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index d6c16c9ab..d7d2c3836 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -207,11 +207,12 @@ def _get_n_percentile(self, n, list) -> float:
         count = len(list)
         lp = ((count + 1) * n) / 100
         index = math.floor(lp)
-        if index - 1 >= 0:
+        if index - 1 >= 0 and index < len(list):
             diff = list[index] - list[index - 1]
             distance = lp - index
-
             return list[index - 1] + distance * diff
+        else:
+            return None
 
 
 @telemetry.log_call()
diff --git a/src/sql/magic_cmd.py b/src/sql/magic_cmd.py
index 25a4e684d..0ad8249c7 100644
--- a/src/sql/magic_cmd.py
+++ b/src/sql/magic_cmd.py
@@ -86,14 +86,24 @@ def execute(self, line="", cell="", local_ns=None):
                 "-t", "--table", type=str, help="Table name", required=True
             )
 
+            parser.add_argument(
+                "-o", "--output", type=str, help="Store report location", required=False
+            )
+
             args = parser.parse_args(others)
 
             user_ns = self.shell.user_ns.copy()
             user_ns.update(local_ns)
 
-            return inspect.get_table_statistics(
+            report = inspect.get_table_statistics(
                 name=args.table, config=self, user_ns=user_ns
             )
+
+            if args.output:
+                with open(args.output, "w") as f:
+                    f.write(report._repr_html_())
+
+            return report
         else:
             raise UsageError(
                 f"%sqlcmd has no command: {cmd_name!r}. "
diff --git a/src/tests/test_magic_cmd.py b/src/tests/test_magic_cmd.py
index e367dd80f..cdae9f2f2 100644
--- a/src/tests/test_magic_cmd.py
+++ b/src/tests/test_magic_cmd.py
@@ -2,6 +2,7 @@
 
 import pytest
 from IPython.core.error import UsageError
+from pathlib import Path
 
 
 @pytest.mark.parametrize(
@@ -73,7 +74,7 @@ def test_columns_with_schema(ip, tmp_empty):
     assert "some_number" in out
 
 
-def test_table_profile(ip):
+def test_table_profile(ip, tmp_empty):
     ip.run_cell(
         """
     %%sql sqlite://
@@ -92,8 +93,8 @@ def test_table_profile(ip):
     expected = {
         "count": [8, 8, 8, 6],
         "mean": [12.2165, 0.6875, 88.75, float("NaN")],
-        "min": [10.532, 0.1, 82, float("NaN")],
-        "max": [14.44, 2.48, 98, float("NaN")],
+        "min": [10.532, 0.1, 82, "a"],
+        "max": [14.44, 2.48, 98, "c"],
         "std": [
             1.2784055917989632,
             0.8504914545636036,
@@ -103,7 +104,7 @@ def test_table_profile(ip):
         "25%": [11.2, 0.2, 84.5, float("NaN")],
         "50%": [12.065, 0.305, 88.5, float("NaN")],
         "75%": [13.072500000000002, 1.2275, 92.25, float("NaN")],
-        "unique": [8, 7, 8, 4],
+        "unique": [8, 7, 8, 5],
         "freq": [1, 2, 1, 4],
         "top": [14.44, 0.2, 98, "a"],
     }
@@ -121,7 +122,28 @@ def test_table_profile(ip):
 
         number = row.get_string(fields=["number"], border=False, header=False).strip()
 
+        word = row.get_string(fields=["word"], border=False, header=False).strip()
+
         if criteria in expected:
             assert rating == str(expected[criteria][0])
             assert price == str(expected[criteria][1])
             assert number == str(expected[criteria][2])
+            assert word == str(expected[criteria][3])
+
+
+def test_table_profile_store(ip, tmp_empty):
+    ip.run_cell(
+        """
+    %%sql sqlite://
+    CREATE TABLE test_store (rating, price, number, symbol);
+    INSERT INTO test_store VALUES (14.44, 2.48, 82, 'a');
+    INSERT INTO test_store VALUES (13.13, 1.50, 93, 'b');
+    INSERT INTO test_store VALUES (12.59, 0.20, 98, 'a');
+    INSERT INTO test_store VALUES (11.54, 0.41, 89, 'a');
+    """
+    )
+
+    ip.run_cell("%sqlcmd profile -t test_store --output test_report.html")
+
+    report = Path("test_report.html")
+    assert report.is_file()

From 9a0dc82e11f805a4dbcf0360119e159da6b7a14c Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Sun, 5 Mar 2023 18:08:22 +0200
Subject: [PATCH 07/23] percentile_disc added, schema added, docs updated

---
 doc/_toc.yml                     |   2 +-
 doc/user-guide/data-profiling.md | 158 +++++++++++++++++++++++++++++++
 doc/user-guide/explore-tables.md |  96 -------------------
 src/sql/inspect.py               |  68 ++++++++-----
 src/sql/magic_cmd.py             |   6 +-
 src/sql/util.py                  |  34 +++++++
 src/tests/test_magic_cmd.py      |  60 ++++++++++--
 7 files changed, 298 insertions(+), 126 deletions(-)
 create mode 100644 doc/user-guide/data-profiling.md
 delete mode 100644 doc/user-guide/explore-tables.md
 create mode 100644 src/sql/util.py

diff --git a/doc/_toc.yml b/doc/_toc.yml
index 0909c5175..2275954ac 100644
--- a/doc/_toc.yml
+++ b/doc/_toc.yml
@@ -14,7 +14,7 @@ parts:
     - file: user-guide/tables-columns
     - file: plot-legacy
     - file: user-guide/template
-    - file: user-guide/explore-tables
+    - file: user-guide/data-profiling
 
   - caption: Integrations
     chapters:
diff --git a/doc/user-guide/data-profiling.md b/doc/user-guide/data-profiling.md
new file mode 100644
index 000000000..c9ffa8289
--- /dev/null
+++ b/doc/user-guide/data-profiling.md
@@ -0,0 +1,158 @@
+---
+jupytext:
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.14.4
+kernelspec:
+  display_name: Python 3 (ipykernel)
+  language: python
+  name: python3
+---
+
+# Data profiling
+
+When dealing with a new dataset, it's crucial for practitioners to have a comprehensive understanding of the data in a timely manner. This involves exploring and summarizing the dataset efficiently to extract valuable insights. However, this can be a time-consuming process. Fortunately, `%sqlcmd profile` offers an easy way to generate statistics and descriptive information, enabling practitioners to quickly gain a deeper understanding of the dataset.
+
+Availble statistics:
+
+* The count of non empty values
+* The number of unique values
+* The top (most frequent) value
+* The frequency of your top value
+* The mean, standard deviation, min and max values
+* The percentiles of your data: 25%, 50% and 75%.
+
+
+## Examples
+
+### DuckDB
+
+In this example we'll demonstrate the process of profiling a sample dataset that contains historical taxi data from NYC, using DuckDB. However, the code used here is compatible with all major databases.
+
+Download the data
+
+```{code-cell} ipython3
+from pathlib import Path
+from urllib.request import urlretrieve
+
+url = "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet"
+
+if not Path("yellow_tripdata_2021-01.parquet").is_file():
+    urlretrieve(url, "yellow_tripdata_2021-01.parquet")
+```
+
+Setup
+
+```{note}
+this example requires duckdb-engine: `pip install duckdb-engine`
+```
+
+Load the extension and connect to an in-memory DuckDB database:
+
+```{code-cell} ipython3
+%load_ext sql
+```
+
+```{code-cell} ipython3
+%sql duckdb://
+```
+
+Profile table
+
+```{code-cell} ipython3
+%sqlcmd profile --table "yellow_tripdata_2021-01.parquet"
+```
+
+### SQLite
+
+We can easily explore large SQLite database using DuckDB.
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+import urllib.request
+from pathlib import Path
+
+if not Path("example.db").is_file():
+    url = "https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"  # noqa
+    urllib.request.urlretrieve(url, "example.db")
+```
+
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+%%sql duckdb:///
+INSTALL 'sqlite_scanner';
+LOAD 'sqlite_scanner';
+CALL sqlite_attach('example.db');
+```
+
+```{code-cell} ipython3
+%sqlcmd profile -t track
+```
+
+### Saving report as HTML
+
+To save the generated report as an HTML file, use the `--output`/`-o` attribute followed by the desired file name
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+%sqlcmd profile -t track --output my-report.html
+```
+
+```{code-cell} ipython3
+from IPython.display import HTML
+HTML("my-report.html")
+```
+
+### Use schemas
+
+To profile a specific table from various tables in different schemas, we can use the `--schema/-s` attribute.
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+import sqlite3
+
+with sqlite3.connect("a.db") as conn:
+    conn.execute("CREATE TABLE my_numbers (number FLOAT)")
+    conn.execute("INSERT INTO my_numbers VALUES (1)")
+    conn.execute("INSERT INTO my_numbers VALUES (2)")
+    conn.execute("INSERT INTO my_numbers VALUES (3)")
+```
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+%%sql
+ATTACH DATABASE 'a.db' AS a_schema
+```
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+import sqlite3
+
+with sqlite3.connect("b.db") as conn:
+    conn.execute("CREATE TABLE my_numbers (number FLOAT)")
+    conn.execute("INSERT INTO my_numbers VALUES (11)")
+    conn.execute("INSERT INTO my_numbers VALUES (22)")
+    conn.execute("INSERT INTO my_numbers VALUES (33)")
+```
+
+```{code-cell} ipython3
+:tags: [hide-output]
+
+%%sql
+ATTACH DATABASE 'b.db' AS b_schema
+```
+
+Let's profile `my_numbers` of `b_schema`
+
+```{code-cell} ipython3
+%sqlcmd profile --table my_numbers --schema b_schema
+```
diff --git a/doc/user-guide/explore-tables.md b/doc/user-guide/explore-tables.md
deleted file mode 100644
index 2a9a82b05..000000000
--- a/doc/user-guide/explore-tables.md
+++ /dev/null
@@ -1,96 +0,0 @@
----
-jupytext:
-  text_representation:
-    extension: .md
-    format_name: myst
-    format_version: 0.13
-    jupytext_version: 1.14.4
-kernelspec:
-  display_name: Python 3 (ipykernel)
-  language: python
-  name: python3
----
-
-# Explore tables
-
-When dealing with a new dataset, it's crucial for practitioners to have a comprehensive understanding of the data in a timely manner. This involves exploring and summarizing the dataset efficiently to extract valuable insights. However, this can be a time-consuming process. Fortunately, `%sqlcmd profile` offers an easy way to generate statistics and descriptive information, enabling practitioners to quickly gain a deeper understanding of the dataset.
-
-Availble statistics:
-
-* The count of non empty values
-* The number of unique values
-* The top (most frequent) value
-* The frequency of your top value
-* The mean, standard deviation, min and max values
-* The percentiles of your data: 25%, 50% and 75%.
-
-
-## Examples
-
-### Simple example with SQLite
-
-```{code-cell} ipython3
-:tags: [hide-output]
-
-%load_ext sql
-%sql sqlite://
-```
-
-Let's create our table
-
-```{code-cell} ipython3
-:tags: [hide-output]
-
-%%sql sqlite://
-CREATE TABLE example_table (rating, price, number, symbol);
-INSERT INTO example_table VALUES (14.44, 2.48, 82, 'a');
-INSERT INTO example_table VALUES (13.13, 1.50, 93, 'b');
-INSERT INTO example_table VALUES (12.59, 0.20, 98, 'a');
-INSERT INTO example_table VALUES (11.54, 0.41, 89, 'a');
-INSERT INTO example_table VALUES (10.532, 0.1, 88, 'c');
-INSERT INTO example_table VALUES (11.5, 0.2, 84, 'b');
-INSERT INTO example_table VALUES (11.1, 0.3, 90, 'a');
-INSERT INTO example_table VALUES (12.9, 0.31, 86, '');
-INSERT INTO example_table VALUES (12.9, 0.31, 86, '    ');
-```
-
-```{code-cell} ipython3
-%sqlcmd profile -t example_table
-```
-
-### Large datasets
-
-We can easily explore large SQlite database using DuckDB.
-
-```{code-cell} ipython3
-:tags: [hide-output]
-
-import urllib.request
-from pathlib import Path
-
-if not Path("example.db").is_file():
-    url = "https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"  # noqa
-    urllib.request.urlretrieve(url, "example.db")
-```
-
-
-```{code-cell} ipython3
-:tags: [hide-output]
-
-%%sql duckdb:///
-INSTALL 'sqlite_scanner';
-LOAD 'sqlite_scanner';
-CALL sqlite_attach('example.db');
-```
-
-```{code-cell} ipython3
-%sqlcmd profile -t track
-```
-
-### Saving report as HTML
-
-To save the generated report as an HTML file, use the `--output`/`-o` attribute followed by the desired file name
-
-```
-%sqlcmd profile -t track --output my-report.html
-```
diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index d7d2c3836..6ba3370fc 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -5,6 +5,7 @@
 from sql.telemetry import telemetry
 import sql.run
 import math
+from sql.util import convert_to_scientific
 
 
 def _get_inspector(conn):
@@ -101,7 +102,11 @@ class TableDescription(DatabaseInspection):
 
     """
 
-    def __init__(self, table_name, config=None, user_ns=None) -> None:
+    def __init__(self, table_name, schema=None, config=None, user_ns=None) -> None:
+
+        if schema:
+            table_name = f"{schema}.{table_name}"
+
         result_table_columns = sql.run.run(
             Connection.current, f"SELECT * FROM {table_name} WHERE 1=0", config, user_ns
         )
@@ -157,9 +162,15 @@ def __init__(self, table_name, config=None, user_ns=None) -> None:
 
                 table_stats[column]["std"] = std
 
-                table_stats[column]["25%"] = self._get_n_percentile(25, col_values)
-                table_stats[column]["50%"] = self._get_n_percentile(50, col_values)
-                table_stats[column]["75%"] = self._get_n_percentile(75, col_values)
+                table_stats[column]["25%"] = self._get_n_percentile(
+                    25, table_name, column, config, user_ns
+                )
+                table_stats[column]["50%"] = self._get_n_percentile(
+                    50, table_name, column, config, user_ns
+                )
+                table_stats[column]["75%"] = self._get_n_percentile(
+                    75, table_name, column, config, user_ns
+                )
 
             except TypeError:
                 # for non numeric values
@@ -169,6 +180,11 @@ def __init__(self, table_name, config=None, user_ns=None) -> None:
                 table_stats[column]["50%"] = math.nan
                 table_stats[column]["75%"] = math.nan
 
+            except BaseException:
+                # Failed to run sql command.
+                # We ignore the cell stats for such case.
+                pass
+
         self._table = PrettyTable()
         self._table.field_names = [" "] + list(table_stats.keys())
 
@@ -178,6 +194,7 @@ def __init__(self, table_name, config=None, user_ns=None) -> None:
             values = [row]
             for column in table_stats:
                 value = table_stats[column][row]
+                value = convert_to_scientific(value)
                 values.append(value)
 
             self._table.add_row(values)
@@ -185,34 +202,41 @@ def __init__(self, table_name, config=None, user_ns=None) -> None:
         self._table_html = self._table.get_html_string()
         self._table_txt = self._table.get_string()
 
-    def _get_n_percentile(self, n, list) -> float:
+    def _get_n_percentile(
+        self, percentile, table_name, column, config, user_ns
+    ) -> float:
         """
-        Calculates the nth percentile of the given data.
+        Uses percentile_disc SQL query to compute the nth percentile of a
+        specified column in a specified table.
 
         Parameters
         ----------
         n : int
             The Nth percentile to comupte. Must be between 0 and 100 inclusive.
 
-        list : list of numeric values
-            An ordered list of numeric values
+        table_name : str
+            Name of SQL table
+
+        column : str
+            Name of the column in table
 
         Returns
         -------
-        nth percentile of the list
+        Nth percentile of the list
         """
-        if n < 0 or n > 100:
-            raise ValueError("N must be between 0 and 100 inclusive")
+        percentile = percentile / 100
+
+        percentile = sql.run.run(
+            Connection.current,
+            f"""
+            SELECT percentile_disc({percentile}) WITHIN GROUP (ORDER BY {column})
+            as percentile, FROM {table_name}
+            """,
+            config,
+            user_ns,
+        )
 
-        count = len(list)
-        lp = ((count + 1) * n) / 100
-        index = math.floor(lp)
-        if index - 1 >= 0 and index < len(list):
-            diff = list[index] - list[index - 1]
-            distance = lp - index
-            return list[index - 1] + distance * diff
-        else:
-            return None
+        return percentile.dict()["percentile"][0]
 
 
 @telemetry.log_call()
@@ -228,11 +252,11 @@ def get_columns(name, schema=None):
 
 
 @telemetry.log_call()
-def get_table_statistics(name, config=None, user_ns=None):
+def get_table_statistics(name, schema=None, config=None, user_ns=None):
     """Get table statistics for a given connection.
 
     For all data types the results will include `count`, `mean`, `std`, `min`
     `max`, `25`, `50` and `75` percentiles. It will also include `unique`, `top`
     and `freq` statistics.
     """
-    return TableDescription(name, config=config, user_ns=user_ns)
+    return TableDescription(name, schema=schema, config=config, user_ns=user_ns)
diff --git a/src/sql/magic_cmd.py b/src/sql/magic_cmd.py
index 0ad8249c7..4471b8ddd 100644
--- a/src/sql/magic_cmd.py
+++ b/src/sql/magic_cmd.py
@@ -86,6 +86,10 @@ def execute(self, line="", cell="", local_ns=None):
                 "-t", "--table", type=str, help="Table name", required=True
             )
 
+            parser.add_argument(
+                "-s", "--schema", type=str, help="Schema name", required=False
+            )
+
             parser.add_argument(
                 "-o", "--output", type=str, help="Store report location", required=False
             )
@@ -96,7 +100,7 @@ def execute(self, line="", cell="", local_ns=None):
             user_ns.update(local_ns)
 
             report = inspect.get_table_statistics(
-                name=args.table, config=self, user_ns=user_ns
+                schema=args.schema, name=args.table, config=self, user_ns=user_ns
             )
 
             if args.output:
diff --git a/src/sql/util.py b/src/sql/util.py
new file mode 100644
index 000000000..347f302e4
--- /dev/null
+++ b/src/sql/util.py
@@ -0,0 +1,34 @@
+import numpy as np
+
+
+def convert_to_scientific(value):
+    """
+    Converts value to scientific notation if necessary
+
+    Parameters
+    ----------
+    value : any
+        Value to format.
+    """
+    if (
+        isinstance(value, (int, float))
+        and not isinstance(value, bool)
+        and _is_long_number(value)
+    ):
+        new_value = np.format_float_scientific(value, exp_digits=2, precision=3)
+
+    else:
+        new_value = value
+
+    return new_value
+
+
+def _is_long_number(num) -> bool:
+    """
+    Checks if num's digits > 10
+    """
+    if "." in str(num):
+        split_by_decimal = str(num).split(".")
+        if len(split_by_decimal[0]) > 10 or len(split_by_decimal[1]) > 10:
+            return True
+    return False
diff --git a/src/tests/test_magic_cmd.py b/src/tests/test_magic_cmd.py
index cdae9f2f2..c9e3a9bd6 100644
--- a/src/tests/test_magic_cmd.py
+++ b/src/tests/test_magic_cmd.py
@@ -96,19 +96,21 @@ def test_table_profile(ip, tmp_empty):
         "min": [10.532, 0.1, 82, "a"],
         "max": [14.44, 2.48, 98, "c"],
         "std": [
-            1.2784055917989632,
-            0.8504914545636036,
-            5.092010548749033,
+            "1.278e+00",
+            "8.505e-01",
+            "5.092e+00",
             float("NaN"),
         ],
-        "25%": [11.2, 0.2, 84.5, float("NaN")],
-        "50%": [12.065, 0.305, 88.5, float("NaN")],
-        "75%": [13.072500000000002, 1.2275, 92.25, float("NaN")],
+        # "25%": [11.2, 0.2, 84.5, float("NaN")],
+        # "50%": [12.065, 0.305, 88.5, float("NaN")],
+        # "75%": [13.072500000000002, 1.2275, 92.25, float("NaN")],
         "unique": [8, 7, 8, 5],
         "freq": [1, 2, 1, 4],
         "top": [14.44, 0.2, 98, "a"],
     }
 
+    # note : We ignote Nth percentile since sqlite doesn't support `percentile_disc`
+
     out = ip.run_cell("%sqlcmd profile -t numbers").result
 
     stats_table = out._table
@@ -131,6 +133,52 @@ def test_table_profile(ip, tmp_empty):
             assert word == str(expected[criteria][3])
 
 
+def test_table_schema_profile(ip, tmp_empty):
+
+    with sqlite3.connect("a.db") as conn:
+        conn.execute("CREATE TABLE t (n FLOAT)")
+        conn.execute("INSERT INTO t VALUES (1)")
+        conn.execute("INSERT INTO t VALUES (2)")
+        conn.execute("INSERT INTO t VALUES (3)")
+
+    with sqlite3.connect("b.db") as conn:
+        conn.execute("CREATE TABLE t (n FLOAT)")
+        conn.execute("INSERT INTO t VALUES (11)")
+        conn.execute("INSERT INTO t VALUES (22)")
+        conn.execute("INSERT INTO t VALUES (33)")
+
+    ip.run_cell(
+        """
+    %%sql sqlite://
+    ATTACH DATABASE 'a.db' AS a_schema;
+    ATTACH DATABASE 'b.db' AS b_schema;
+    """
+    )
+
+    expected = {
+        "count": [3],
+        "mean": [22.0],
+        "min": [11.0],
+        "max": [33.0],
+        "std": [11.0],
+        "unique": [3],
+        "freq": [1],
+        "top": [33.0],
+    }
+
+    out = ip.run_cell("%sqlcmd profile -t t --schema b_schema").result
+
+    stats_table = out._table
+
+    for row in stats_table:
+        criteria = row.get_string(fields=[" "], border=False).strip()
+
+        cell = row.get_string(fields=["n"], border=False, header=False).strip()
+
+        if criteria in expected:
+            assert cell == str(expected[criteria][0])
+
+
 def test_table_profile_store(ip, tmp_empty):
     ip.run_cell(
         """

From 56e3d2eb80e68105d076c1630cebef2f0656913d Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Sun, 5 Mar 2023 18:14:16 +0200
Subject: [PATCH 08/23] numpy added to setup

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 1234d0991..af595b434 100644
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,7 @@
     "jinja2",
     "ploomber-core>=0.2.4",
     'importlib-metadata;python_version<"3.8"',
+    "numpy"
 ]
 
 DEV = [

From 431d2fb7c1c5b298dd634a28ffd58d86501a7865 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Tue, 7 Mar 2023 19:29:00 +0200
Subject: [PATCH 09/23] np removed, run_raw added, queries updated, test fixed

---
 doc/user-guide/data-profiling.md |   3 +-
 setup.py                         |   3 +-
 src/sql/inspect.py               | 157 +++++++++++++------------------
 src/sql/magic_cmd.py             |  22 +----
 src/sql/run.py                   |  17 +++-
 src/sql/util.py                  |   5 +-
 src/tests/test_magic_cmd.py      |  29 ++----
 7 files changed, 96 insertions(+), 140 deletions(-)

diff --git a/doc/user-guide/data-profiling.md b/doc/user-guide/data-profiling.md
index c9ffa8289..b36359038 100644
--- a/doc/user-guide/data-profiling.md
+++ b/doc/user-guide/data-profiling.md
@@ -24,7 +24,6 @@ Availble statistics:
 * The mean, standard deviation, min and max values
 * The percentiles of your data: 25%, 50% and 75%.
 
-
 ## Examples
 
 ### DuckDB
@@ -46,7 +45,7 @@ if not Path("yellow_tripdata_2021-01.parquet").is_file():
 Setup
 
 ```{note}
-this example requires duckdb-engine: `pip install duckdb-engine`
+This example requires duckdb-engine: `pip install duckdb-engine`
 ```
 
 Load the extension and connect to an in-memory DuckDB database:
diff --git a/setup.py b/setup.py
index af595b434..51849f922 100644
--- a/setup.py
+++ b/setup.py
@@ -23,8 +23,7 @@
     "ipython-genutils>=0.1.0",
     "jinja2",
     "ploomber-core>=0.2.4",
-    'importlib-metadata;python_version<"3.8"',
-    "numpy"
+    'importlib-metadata;python_version<"3.8"'
 ]
 
 DEV = [
diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index 6ba3370fc..cb5ffe5cb 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -82,7 +82,7 @@ class TableDescription(DatabaseInspection):
 
     Descriptive statistics are:
 
-    Count - Number of all non empty values
+    Count - Number of all not None values
 
     Mean - Mean of the values
 
@@ -94,7 +94,7 @@ class TableDescription(DatabaseInspection):
 
     25h, 50h and 75h percentiles
 
-    Unique - Number of unique values
+    Unique - Number of not None unique values
 
     Top - The most frequent value
 
@@ -102,75 +102,83 @@ class TableDescription(DatabaseInspection):
 
     """
 
-    def __init__(self, table_name, schema=None, config=None, user_ns=None) -> None:
-
+    def __init__(self, table_name, schema=None, config=None) -> None:
         if schema:
             table_name = f"{schema}.{table_name}"
 
-        result_table_columns = sql.run.run(
-            Connection.current, f"SELECT * FROM {table_name} WHERE 1=0", config, user_ns
-        )
-
-        columns = result_table_columns.keys
+        columns = sql.run.run_raw(
+            Connection.current, f"SELECT * FROM {table_name} WHERE 1=0", config
+        ).keys
 
         table_stats = dict({})
 
         for column in columns:
             table_stats[column] = dict()
-            result_col_unique_values = sql.run.run(
-                Connection.current,
-                f"SELECT COUNT(DISTINCT {column}) as unique_count FROM {table_name}",
-                config,
-                user_ns,
-            )
-
-            result_col_freq_values = sql.run.run(
+            result_col_freq_values = sql.run.run_raw(
                 Connection.current,
-                f"""SELECT {column}, COUNT({column}) as frequency FROM {table_name}
+                f"""SELECT {column} as top,
+                COUNT({column}) as frequency FROM {table_name}
                 GROUP BY {column} ORDER BY Count({column}) Desc""",
                 config,
-                user_ns,
-            )
+            ).dict()
 
-            result_non_empty_values = sql.run.run(
+            # get all non None values, min, max and avg.
+            result_value_values = sql.run.run_raw(
                 Connection.current,
-                f"""SELECT {column} FROM {table_name} WHERE {column}
-                IS NOT NULL AND TRIM({column}) <> ''
-                ORDER BY {column} ASC
+                f"""
+                SELECT MIN({column}) AS min,
+                MAX({column}) AS max,
+                COUNT(DISTINCT {column}) AS unique_count,
+                COUNT({column}) AS total
+                FROM {table_name}
+                WHERE {column} IS NOT NULL AND TRIM({column}) <> ''
                 """,
                 config,
-                user_ns,
-            )
-
-            col_values = result_non_empty_values.dict()[column]
-            count = len(col_values)
-            table_stats[column]["count"] = count
-            table_stats[column]["freq"] = result_col_freq_values.dict()["frequency"][0]
-            table_stats[column]["unique"] = result_col_unique_values.dict()[
-                "unique_count"
-            ][0]
-            table_stats[column]["top"] = result_col_freq_values.dict()[column][0]
-            table_stats[column]["min"] = col_values[0]
-            table_stats[column]["max"] = col_values[count - 1]
+            ).dict()
 
-            try:
-                mean = sum(col_values) / count
-                table_stats[column]["mean"] = mean
+            table_stats[column]["freq"] = result_col_freq_values["frequency"][0]
+            table_stats[column]["top"] = result_col_freq_values["top"][0]
+            table_stats[column]["count"] = result_value_values["total"][0]
+            table_stats[column]["unique"] = result_value_values["unique_count"][0]
+            table_stats[column]["min"] = result_value_values["min"][0]
+            table_stats[column]["max"] = result_value_values["max"][0]
 
-                values_sum = sum([(math.pow((v - mean), 2)) for v in col_values])
-                std = math.sqrt(values_sum / (count - 1))
+            avg = None
+            try:
+                results_avg = sql.run.run_raw(
+                    Connection.current,
+                    f"""
+                                SELECT AVG({column}) AS avg
+                                FROM {table_name}
+                                WHERE {column} IS NOT NULL AND TRIM({column}) <> ''
+                                """,
+                    config,
+                ).dict()
+                avg = results_avg["avg"][0]
+            except BaseException:
+                avg = math.nan
 
-                table_stats[column]["std"] = std
+            table_stats[column]["mean"] = avg
 
-                table_stats[column]["25%"] = self._get_n_percentile(
-                    25, table_name, column, config, user_ns
-                )
-                table_stats[column]["50%"] = self._get_n_percentile(
-                    50, table_name, column, config, user_ns
-                )
-                table_stats[column]["75%"] = self._get_n_percentile(
-                    75, table_name, column, config, user_ns
-                )
+            try:
+                # Note: This STDEV and PERCENTILE_DISC will work only on DuckDB
+                result = sql.run.run_raw(
+                    Connection.current,
+                    f"""
+                    SELECT
+                        stddev_pop({column}) as std,
+                        percentile_disc(0.25) WITHIN GROUP (ORDER BY {column}) as p25,
+                        percentile_disc(0.50) WITHIN GROUP (ORDER BY {column}) as p50,
+                        percentile_disc(0.75) WITHIN GROUP (ORDER BY {column}) as p75
+                    FROM {table_name}
+                    """,
+                    config,
+                ).dict()
+
+                table_stats[column]["std"] = result["std"][0]
+                table_stats[column]["25%"] = result["p25"][0]
+                table_stats[column]["50%"] = result["p50"][0]
+                table_stats[column]["75%"] = result["p75"][0]
 
             except TypeError:
                 # for non numeric values
@@ -193,7 +201,10 @@ def __init__(self, table_name, schema=None, config=None, user_ns=None) -> None:
         for row in rows:
             values = [row]
             for column in table_stats:
-                value = table_stats[column][row]
+                if row in table_stats[column]:
+                    value = table_stats[column][row]
+                else:
+                    value = ""
                 value = convert_to_scientific(value)
                 values.append(value)
 
@@ -202,42 +213,6 @@ def __init__(self, table_name, schema=None, config=None, user_ns=None) -> None:
         self._table_html = self._table.get_html_string()
         self._table_txt = self._table.get_string()
 
-    def _get_n_percentile(
-        self, percentile, table_name, column, config, user_ns
-    ) -> float:
-        """
-        Uses percentile_disc SQL query to compute the nth percentile of a
-        specified column in a specified table.
-
-        Parameters
-        ----------
-        n : int
-            The Nth percentile to comupte. Must be between 0 and 100 inclusive.
-
-        table_name : str
-            Name of SQL table
-
-        column : str
-            Name of the column in table
-
-        Returns
-        -------
-        Nth percentile of the list
-        """
-        percentile = percentile / 100
-
-        percentile = sql.run.run(
-            Connection.current,
-            f"""
-            SELECT percentile_disc({percentile}) WITHIN GROUP (ORDER BY {column})
-            as percentile, FROM {table_name}
-            """,
-            config,
-            user_ns,
-        )
-
-        return percentile.dict()["percentile"][0]
-
 
 @telemetry.log_call()
 def get_table_names(schema=None):
@@ -252,11 +227,11 @@ def get_columns(name, schema=None):
 
 
 @telemetry.log_call()
-def get_table_statistics(name, schema=None, config=None, user_ns=None):
+def get_table_statistics(name, schema=None, config=None):
     """Get table statistics for a given connection.
 
     For all data types the results will include `count`, `mean`, `std`, `min`
     `max`, `25`, `50` and `75` percentiles. It will also include `unique`, `top`
     and `freq` statistics.
     """
-    return TableDescription(name, schema=schema, config=config, user_ns=user_ns)
+    return TableDescription(name, schema=schema, config=config)
diff --git a/src/sql/magic_cmd.py b/src/sql/magic_cmd.py
index 4471b8ddd..bb9d1a079 100644
--- a/src/sql/magic_cmd.py
+++ b/src/sql/magic_cmd.py
@@ -5,8 +5,7 @@
 from IPython.core.magic import (
     Magics,
     line_magic,
-    magics_class,
-    needs_local_scope,
+    magics_class
 )
 from IPython.core.magic_arguments import argument, magic_arguments
 from IPython.core.error import UsageError
@@ -34,22 +33,10 @@ def error(self, message):
 class SqlCmdMagic(Magics, Configurable):
     """%sqlcmd magic"""
 
-    displaycon = True
-    autolimit = None
-    style = "DEFAULT"
-    short_errors = True
-    displaylimit = None
-    autopandas = False
-    column_local_vars = False
-    feedback = False
-    autocommit = False
-    autopolars = False
-
-    @needs_local_scope
     @line_magic("sqlcmd")
     @magic_arguments()
     @argument("line", default="", type=str, help="Command name")
-    def execute(self, line="", cell="", local_ns=None):
+    def execute(self, line="", cell=""):
         """
         Command
         """
@@ -96,11 +83,8 @@ def execute(self, line="", cell="", local_ns=None):
 
             args = parser.parse_args(others)
 
-            user_ns = self.shell.user_ns.copy()
-            user_ns.update(local_ns)
-
             report = inspect.get_table_statistics(
-                schema=args.schema, name=args.table, config=self, user_ns=user_ns
+                schema=args.schema, name=args.table, config=self.config
             )
 
             if args.output:
diff --git a/src/sql/run.py b/src/sql/run.py
index 77969db01..7fad057ae 100644
--- a/src/sql/run.py
+++ b/src/sql/run.py
@@ -106,13 +106,18 @@ def __init__(self, sqlaproxy, config):
         self.keys = {}
         if sqlaproxy.returns_rows:
             self.keys = sqlaproxy.keys()
-            if config.autolimit:
+            if isinstance(config.autolimit, bool):
                 list.__init__(self, sqlaproxy.fetchmany(size=config.autolimit))
             else:
                 list.__init__(self, sqlaproxy.fetchall())
             self.field_names = unduplicate_field_names(self.keys)
+
+            _style = None
+            if isinstance(config.style, str):
+                _style = prettytable.__dict__[config.style.upper()]
+
             self.pretty = PrettyTable(
-                self.field_names, style=prettytable.__dict__[config.style.upper()]
+                self.field_names, style=_style
             )
         else:
             list.__init__(self, [])
@@ -347,7 +352,7 @@ def from_list(self, source_list):
         def fetchmany(size):
             pos = 0
             while pos < len(source_list):
-                yield source_list[pos : pos + size]
+                yield source_list[pos: pos + size]
                 pos += size
 
         self.fetchmany = fetchmany
@@ -415,6 +420,12 @@ def run(conn, sql, config, user_namespace):
         return "Connected: %s" % conn.name
 
 
+def run_raw(conn, sql, config):
+    result = conn.session.execute(sql)
+    resultset = ResultSet(result, config)
+    return resultset
+
+
 class PrettyTable(prettytable.PrettyTable):
     def __init__(self, *args, **kwargs):
         self.row_count = 0
diff --git a/src/sql/util.py b/src/sql/util.py
index 347f302e4..f606391ca 100644
--- a/src/sql/util.py
+++ b/src/sql/util.py
@@ -1,6 +1,3 @@
-import numpy as np
-
-
 def convert_to_scientific(value):
     """
     Converts value to scientific notation if necessary
@@ -15,7 +12,7 @@ def convert_to_scientific(value):
         and not isinstance(value, bool)
         and _is_long_number(value)
     ):
-        new_value = np.format_float_scientific(value, exp_digits=2, precision=3)
+        new_value = "{:,.3e}".format(value)
 
     else:
         new_value = value
diff --git a/src/tests/test_magic_cmd.py b/src/tests/test_magic_cmd.py
index c9e3a9bd6..91ea7ce9c 100644
--- a/src/tests/test_magic_cmd.py
+++ b/src/tests/test_magic_cmd.py
@@ -78,7 +78,7 @@ def test_table_profile(ip, tmp_empty):
     ip.run_cell(
         """
     %%sql sqlite://
-    CREATE TABLE numbers (rating, price, number, word);
+    CREATE TABLE numbers (rating float, price float, number int, word varchar(50));
     INSERT INTO numbers VALUES (14.44, 2.48, 82, 'a');
     INSERT INTO numbers VALUES (13.13, 1.50, 93, 'b');
     INSERT INTO numbers VALUES (12.59, 0.20, 98, 'a');
@@ -92,29 +92,20 @@ def test_table_profile(ip, tmp_empty):
 
     expected = {
         "count": [8, 8, 8, 6],
-        "mean": [12.2165, 0.6875, 88.75, float("NaN")],
+        "mean": [12.2165, "6.875e-01", 88.75, 0.0],
         "min": [10.532, 0.1, 82, "a"],
         "max": [14.44, 2.48, 98, "c"],
-        "std": [
-            "1.278e+00",
-            "8.505e-01",
-            "5.092e+00",
-            float("NaN"),
-        ],
-        # "25%": [11.2, 0.2, 84.5, float("NaN")],
-        # "50%": [12.065, 0.305, 88.5, float("NaN")],
-        # "75%": [13.072500000000002, 1.2275, 92.25, float("NaN")],
-        "unique": [8, 7, 8, 5],
+        "unique": [8, 7, 8, 3],
         "freq": [1, 2, 1, 4],
         "top": [14.44, 0.2, 98, "a"],
     }
 
-    # note : We ignote Nth percentile since sqlite doesn't support `percentile_disc`
-
     out = ip.run_cell("%sqlcmd profile -t numbers").result
 
     stats_table = out._table
 
+    assert len(stats_table.rows) == len(expected)
+
     for row in stats_table:
         criteria = row.get_string(fields=[" "], border=False).strip()
 
@@ -126,11 +117,11 @@ def test_table_profile(ip, tmp_empty):
 
         word = row.get_string(fields=["word"], border=False, header=False).strip()
 
-        if criteria in expected:
-            assert rating == str(expected[criteria][0])
-            assert price == str(expected[criteria][1])
-            assert number == str(expected[criteria][2])
-            assert word == str(expected[criteria][3])
+        assert criteria in expected
+        assert rating == str(expected[criteria][0])
+        assert price == str(expected[criteria][1])
+        assert number == str(expected[criteria][2])
+        assert word == str(expected[criteria][3])
 
 
 def test_table_schema_profile(ip, tmp_empty):

From fafa53343324330b56b6d1bcda2f687f9330509d Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Tue, 7 Mar 2023 20:02:46 +0200
Subject: [PATCH 10/23] test fixed

---
 src/sql/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sql/run.py b/src/sql/run.py
index 7fad057ae..ff45543f3 100644
--- a/src/sql/run.py
+++ b/src/sql/run.py
@@ -106,7 +106,7 @@ def __init__(self, sqlaproxy, config):
         self.keys = {}
         if sqlaproxy.returns_rows:
             self.keys = sqlaproxy.keys()
-            if isinstance(config.autolimit, bool):
+            if isinstance(config.autolimit, int):
                 list.__init__(self, sqlaproxy.fetchmany(size=config.autolimit))
             else:
                 list.__init__(self, sqlaproxy.fetchall())

From 122f1067c9f8c6951d95b5778c6b823597a80e83 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Wed, 8 Mar 2023 10:07:44 +0200
Subject: [PATCH 11/23] config.autolimit check fixed

---
 src/sql/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sql/run.py b/src/sql/run.py
index ff45543f3..462492140 100644
--- a/src/sql/run.py
+++ b/src/sql/run.py
@@ -106,7 +106,7 @@ def __init__(self, sqlaproxy, config):
         self.keys = {}
         if sqlaproxy.returns_rows:
             self.keys = sqlaproxy.keys()
-            if isinstance(config.autolimit, int):
+            if isinstance(config.autolimit, int) and config.autolimit > 0:
                 list.__init__(self, sqlaproxy.fetchmany(size=config.autolimit))
             else:
                 list.__init__(self, sqlaproxy.fetchall())

From 83b9dd39430905d5a057acd76cfa2087a0261cf4 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Wed, 8 Mar 2023 20:35:54 +0200
Subject: [PATCH 12/23] integration tests added

---
 src/sql/inspect.py                            | 137 +++++++++++-------
 .../integration/test_generic_db_opeations.py  |  99 +++++++++++++
 2 files changed, 183 insertions(+), 53 deletions(-)

diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index cb5ffe5cb..18e0c1e8a 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -111,84 +111,115 @@ def __init__(self, table_name, schema=None, config=None) -> None:
         ).keys
 
         table_stats = dict({})
+        columns_to_include_in_report = set()
 
         for column in columns:
             table_stats[column] = dict()
-            result_col_freq_values = sql.run.run_raw(
-                Connection.current,
-                f"""SELECT {column} as top,
-                COUNT({column}) as frequency FROM {table_name}
-                GROUP BY {column} ORDER BY Count({column}) Desc""",
-                config,
-            ).dict()
-
-            # get all non None values, min, max and avg.
-            result_value_values = sql.run.run_raw(
-                Connection.current,
-                f"""
-                SELECT MIN({column}) AS min,
-                MAX({column}) AS max,
-                COUNT(DISTINCT {column}) AS unique_count,
-                COUNT({column}) AS total
-                FROM {table_name}
-                WHERE {column} IS NOT NULL AND TRIM({column}) <> ''
-                """,
-                config,
-            ).dict()
-
-            table_stats[column]["freq"] = result_col_freq_values["frequency"][0]
-            table_stats[column]["top"] = result_col_freq_values["top"][0]
-            table_stats[column]["count"] = result_value_values["total"][0]
-            table_stats[column]["unique"] = result_value_values["unique_count"][0]
-            table_stats[column]["min"] = result_value_values["min"][0]
-            table_stats[column]["max"] = result_value_values["max"][0]
-
-            avg = None
+
+            # index is reserved word in sqlite so we use
+            # brackets to make it work.
+            if column == "index":
+                _column = "[index]"
+            else:
+                _column = column
+
+            try:
+                result_col_freq_values = sql.run.run_raw(
+                    Connection.current,
+                    f"""SELECT {_column} as top,
+                    COUNT({_column}) as frequency FROM {table_name}
+                    GROUP BY {_column} ORDER BY Count({_column}) Desc""",
+                    config,
+                ).dict()
+
+                table_stats[column]["freq"] = result_col_freq_values["frequency"][0]
+                table_stats[column]["top"] = result_col_freq_values["top"][0]
+
+                columns_to_include_in_report.update(["freq", "top"])
+
+            except Exception:
+                pass
+
+            try:
+                # get all non None values, min, max and avg.
+                result_value_values = sql.run.run_raw(
+                    Connection.current,
+                    f"""
+                    SELECT MIN({_column}) AS min,
+                    MAX({_column}) AS max,
+                    COUNT(DISTINCT {_column}) AS unique_count,
+                    COUNT({_column}) AS count
+                    FROM {table_name}
+                    WHERE {_column} IS NOT NULL AND TRIM({_column}) <> ''
+                    """,
+                    config,
+                ).dict()
+
+                table_stats[column]["count"] = result_value_values["count"][0]
+                table_stats[column]["unique"] = result_value_values["unique_count"][0]
+                table_stats[column]["min"] = result_value_values["min"][0]
+                table_stats[column]["max"] = result_value_values["max"][0]
+
+                columns_to_include_in_report.update(["count", "unique", "min", "max"])
+
+            except Exception:
+                pass
+
             try:
                 results_avg = sql.run.run_raw(
                     Connection.current,
                     f"""
-                                SELECT AVG({column}) AS avg
+                                SELECT AVG({_column}) AS avg
                                 FROM {table_name}
-                                WHERE {column} IS NOT NULL AND TRIM({column}) <> ''
+                                WHERE {_column} IS NOT NULL AND TRIM({_column}) <> ''
                                 """,
                     config,
                 ).dict()
-                avg = results_avg["avg"][0]
-            except BaseException:
-                avg = math.nan
+                table_stats[column]["mean"] = results_avg["avg"][0]
+                columns_to_include_in_report.update(["mean"])
+
+            except Exception:
+                table_stats[column]["mean"] = math.nan
 
-            table_stats[column]["mean"] = avg
+            # These keys are numeric and work only on duckdb
+            special_numeric_keys = ["std", "25%", "50%", "75%"]
 
             try:
-                # Note: This STDEV and PERCENTILE_DISC will work only on DuckDB
+                # Note: stddev_pop and PERCENTILE_DISC will work only on DuckDB
                 result = sql.run.run_raw(
                     Connection.current,
                     f"""
                     SELECT
-                        stddev_pop({column}) as std,
-                        percentile_disc(0.25) WITHIN GROUP (ORDER BY {column}) as p25,
-                        percentile_disc(0.50) WITHIN GROUP (ORDER BY {column}) as p50,
-                        percentile_disc(0.75) WITHIN GROUP (ORDER BY {column}) as p75
+                        stddev_pop({_column}) as key_std,
+                        percentile_disc(0.25) WITHIN GROUP
+                        (ORDER BY {_column}) as key_25,
+                        percentile_disc(0.50) WITHIN GROUP
+                        (ORDER BY {_column}) as key_50,
+                        percentile_disc(0.75) WITHIN GROUP
+                        (ORDER BY {_column}) as key_75
                     FROM {table_name}
                     """,
                     config,
                 ).dict()
 
-                table_stats[column]["std"] = result["std"][0]
-                table_stats[column]["25%"] = result["p25"][0]
-                table_stats[column]["50%"] = result["p50"][0]
-                table_stats[column]["75%"] = result["p75"][0]
+                for key in special_numeric_keys:
+                    r_key = f'key_{key.replace("%", "")}'
+                    table_stats[column][key] = result[r_key][0]
+
+                columns_to_include_in_report.update(special_numeric_keys)
 
             except TypeError:
                 # for non numeric values
-                table_stats[column]["mean"] = math.nan
-                table_stats[column]["std"] = math.nan
-                table_stats[column]["25%"] = math.nan
-                table_stats[column]["50%"] = math.nan
-                table_stats[column]["75%"] = math.nan
+                for key in special_numeric_keys:
+                    table_stats[column][key] = math.nan
+
+            except Exception as e:
+                # We tried to apply numeric function on
+                # non numeric value, i.e: DateTime
+                if "duckdb.BinderException" in str(e):
+                    for key in special_numeric_keys:
+                        table_stats[column][key] = math.nan
 
-            except BaseException:
                 # Failed to run sql command.
                 # We ignore the cell stats for such case.
                 pass
@@ -196,8 +227,8 @@ def __init__(self, table_name, schema=None, config=None) -> None:
         self._table = PrettyTable()
         self._table.field_names = [" "] + list(table_stats.keys())
 
-        rows = list(table_stats.items())[0][1].keys()
-
+        rows = list(columns_to_include_in_report)
+        rows.sort(reverse=True)
         for row in rows:
             values = [row]
             for column in table_stats:
diff --git a/src/tests/integration/test_generic_db_opeations.py b/src/tests/integration/test_generic_db_opeations.py
index 53f37e676..e7bbdbe1c 100644
--- a/src/tests/integration/test_generic_db_opeations.py
+++ b/src/tests/integration/test_generic_db_opeations.py
@@ -3,6 +3,7 @@
 import warnings
 from sql.telemetry import telemetry
 from unittest.mock import ANY, Mock
+import math
 
 
 @pytest.fixture(autouse=True)
@@ -144,3 +145,101 @@ def test_telemetry_execute_command_has_connection_info(
             },
         },
     )
+
+
+@pytest.mark.parametrize(
+    "ip_with_dynamic_db, table, table_columns, expected",
+    [
+        # ("ip_with_postgreSQL",
+        #  "taxi",
+        #  ["index", "taxi_driver_name"],
+        #  {
+        #      "count": [45, 45],
+        #      "mean": [22.0, 0.0],
+        #      "min": [0, "Eric Ken"],
+        #      "max": [44, "Kevin Kelly"],
+        #      "unique": [45, 3],
+        #      "freq": [1, 15],
+        #      "top": [0, "Kevin Kelly"],
+        #  }
+        #  ),
+        # ("ip_with_mySQL",
+        #  "taxi",
+        #  ["index", "taxi_driver_name"],
+        #  {
+        #      "count": [45, 45],
+        #      "mean": [22.0, 0.0],
+        #      "min": [0, "Eric Ken"],
+        #      "max": [44, "Kevin Kelly"],
+        #      "unique": [45, 3],
+        #      "freq": [1, 15],
+        #      "top": [0, "Kevin Kelly"],
+        #  }
+        #  ),
+        # ("ip_with_mariaDB",
+        #  "taxi",
+        #  ["index", "taxi_driver_name"],
+        #  {
+        #      "count": [45, 45],
+        #      "mean": [22.0, 0.0],
+        #      "min": [0, "Eric Ken"],
+        #      "max": [44, "Kevin Kelly"],
+        #      "unique": [45, 3],
+        #      "freq": [1, 15],
+        #      "top": [0, "Kevin Kelly"],
+        #  }
+        #  ),
+        ("ip_with_SQLite",
+         "taxi",
+         ["index", "taxi_driver_name"],
+         {
+             "count": [45, 45],
+             "mean": [22.0, 0.0],
+             "min": [0, "Eric Ken"],
+             "max": [44, "Kevin Kelly"],
+             "unique": [45, 3],
+             "freq": [1, 15],
+             "top": [0, "Kevin Kelly"],
+         }
+         ),
+        ("ip_with_duckDB",
+         "yellow_tripdata_2021-01.parquet",
+         ["VendorID", "tpep_pickup_datetime", "passenger_count"],
+         {
+             "count": [1369769, 1369769, 1271417],
+             "mean": ["1.722e+00", math.nan, "1.412e+00"],
+             "min": [1, "2008-12-31 23:05:14", 0.0],
+             "max": [6, "2021-02-22 16:52:16", 8.0],
+             "unique": [3, 939020, 9],
+             "freq": [937141, 13, 966236],
+             "top": [2, "2021-01-14 13:52:00", 1.0],
+
+             "std": ["5.925e-01", math.nan, "1.060e+00"],
+             "25%": [1, math.nan, 1.0],
+             "50%": [2, math.nan, 1.0],
+             "75%": [2, math.nan, 1.0],
+         }),
+    ],
+)
+def test_profile_query(request, ip_with_dynamic_db, table, table_columns, expected):
+    ip_with_dynamic_db = request.getfixturevalue(ip_with_dynamic_db)
+
+    out = ip_with_dynamic_db.run_cell(
+        f"""
+        %sqlcmd profile --table "{table}"
+        """
+    ).result
+
+    stats_table = out._table
+
+    assert len(stats_table.rows) == len(expected)
+
+    for row in stats_table:
+        criteria = row.get_string(fields=[" "], border=False).strip()
+
+        for i, column in enumerate(table_columns):
+            cell_value = row.get_string(
+                fields=[column], border=False, header=False).strip()
+
+            assert criteria in expected
+            assert cell_value == str(expected[criteria][i])

From 4d0f84de5a60fe73f2f9737f8286a3eef8374845 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Wed, 8 Mar 2023 21:16:14 +0200
Subject: [PATCH 13/23] integration tests fixed

---
 src/sql/inspect.py                            |  36 +++---
 .../integration/test_generic_db_opeations.py  | 113 +++++++++---------
 2 files changed, 72 insertions(+), 77 deletions(-)

diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index 18e0c1e8a..2a67fcaec 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -116,19 +116,13 @@ def __init__(self, table_name, schema=None, config=None) -> None:
         for column in columns:
             table_stats[column] = dict()
 
-            # index is reserved word in sqlite so we use
-            # brackets to make it work.
-            if column == "index":
-                _column = "[index]"
-            else:
-                _column = column
-
+            # Note: index is reserved word in sqlite
             try:
                 result_col_freq_values = sql.run.run_raw(
                     Connection.current,
-                    f"""SELECT {_column} as top,
-                    COUNT({_column}) as frequency FROM {table_name}
-                    GROUP BY {_column} ORDER BY Count({_column}) Desc""",
+                    f"""SELECT {column} as top,
+                    COUNT({column}) as frequency FROM {table_name}
+                    GROUP BY {column} ORDER BY Count({column}) Desc""",
                     config,
                 ).dict()
 
@@ -145,12 +139,12 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                 result_value_values = sql.run.run_raw(
                     Connection.current,
                     f"""
-                    SELECT MIN({_column}) AS min,
-                    MAX({_column}) AS max,
-                    COUNT(DISTINCT {_column}) AS unique_count,
-                    COUNT({_column}) AS count
+                    SELECT MIN({column}) AS min,
+                    MAX({column}) AS max,
+                    COUNT(DISTINCT {column}) AS unique_count,
+                    COUNT({column}) AS count
                     FROM {table_name}
-                    WHERE {_column} IS NOT NULL AND TRIM({_column}) <> ''
+                    WHERE {column} IS NOT NULL AND TRIM({column}) <> ''
                     """,
                     config,
                 ).dict()
@@ -169,9 +163,9 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                 results_avg = sql.run.run_raw(
                     Connection.current,
                     f"""
-                                SELECT AVG({_column}) AS avg
+                                SELECT AVG({column}) AS avg
                                 FROM {table_name}
-                                WHERE {_column} IS NOT NULL AND TRIM({_column}) <> ''
+                                WHERE {column} IS NOT NULL AND TRIM({column}) <> ''
                                 """,
                     config,
                 ).dict()
@@ -190,13 +184,13 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                     Connection.current,
                     f"""
                     SELECT
-                        stddev_pop({_column}) as key_std,
+                        stddev_pop({column}) as key_std,
                         percentile_disc(0.25) WITHIN GROUP
-                        (ORDER BY {_column}) as key_25,
+                        (ORDER BY {column}) as key_25,
                         percentile_disc(0.50) WITHIN GROUP
-                        (ORDER BY {_column}) as key_50,
+                        (ORDER BY {column}) as key_50,
                         percentile_disc(0.75) WITHIN GROUP
-                        (ORDER BY {_column}) as key_75
+                        (ORDER BY {column}) as key_75
                     FROM {table_name}
                     """,
                     config,
diff --git a/src/tests/integration/test_generic_db_opeations.py b/src/tests/integration/test_generic_db_opeations.py
index e7bbdbe1c..e3765ce4c 100644
--- a/src/tests/integration/test_generic_db_opeations.py
+++ b/src/tests/integration/test_generic_db_opeations.py
@@ -150,46 +150,33 @@ def test_telemetry_execute_command_has_connection_info(
 @pytest.mark.parametrize(
     "ip_with_dynamic_db, table, table_columns, expected",
     [
-        # ("ip_with_postgreSQL",
-        #  "taxi",
-        #  ["index", "taxi_driver_name"],
-        #  {
-        #      "count": [45, 45],
-        #      "mean": [22.0, 0.0],
-        #      "min": [0, "Eric Ken"],
-        #      "max": [44, "Kevin Kelly"],
-        #      "unique": [45, 3],
-        #      "freq": [1, 15],
-        #      "top": [0, "Kevin Kelly"],
-        #  }
-        #  ),
-        # ("ip_with_mySQL",
-        #  "taxi",
-        #  ["index", "taxi_driver_name"],
-        #  {
-        #      "count": [45, 45],
-        #      "mean": [22.0, 0.0],
-        #      "min": [0, "Eric Ken"],
-        #      "max": [44, "Kevin Kelly"],
-        #      "unique": [45, 3],
-        #      "freq": [1, 15],
-        #      "top": [0, "Kevin Kelly"],
-        #  }
-        #  ),
-        # ("ip_with_mariaDB",
-        #  "taxi",
-        #  ["index", "taxi_driver_name"],
-        #  {
-        #      "count": [45, 45],
-        #      "mean": [22.0, 0.0],
-        #      "min": [0, "Eric Ken"],
-        #      "max": [44, "Kevin Kelly"],
-        #      "unique": [45, 3],
-        #      "freq": [1, 15],
-        #      "top": [0, "Kevin Kelly"],
-        #  }
-        #  ),
-        ("ip_with_SQLite",
+        ("ip_with_postgreSQL",
+         "taxi",
+         ["index", "taxi_driver_name"],
+         {
+             "count": [45, 45],
+             "mean": [22.0, 0.0],
+             "min": [0, "Eric Ken"],
+             "max": [44, "Kevin Kelly"],
+             "unique": [45, 3],
+             "freq": [1, 15],
+             "top": [0, "Kevin Kelly"],
+         }
+         ),
+        ("ip_with_mySQL",
+         "taxi",
+         ["index", "taxi_driver_name"],
+         {
+             "count": [45, 45],
+             "mean": [22.0, 0.0],
+             "min": [0, "Eric Ken"],
+             "max": [44, "Kevin Kelly"],
+             "unique": [45, 3],
+             "freq": [1, 15],
+             "top": [0, "Kevin Kelly"],
+         }
+         ),
+        ("ip_with_mariaDB",
          "taxi",
          ["index", "taxi_driver_name"],
          {
@@ -202,26 +189,40 @@ def test_telemetry_execute_command_has_connection_info(
              "top": [0, "Kevin Kelly"],
          }
          ),
+        ("ip_with_SQLite",
+         "taxi",
+         ["taxi_driver_name"],
+         {
+             "count": [45],
+             "mean": [0.0],
+             "min": ["Eric Ken"],
+             "max": ["Kevin Kelly"],
+             "unique": [3],
+             "freq": [15],
+             "top": ["Kevin Kelly"],
+         }
+         ),
         ("ip_with_duckDB",
-         "yellow_tripdata_2021-01.parquet",
-         ["VendorID", "tpep_pickup_datetime", "passenger_count"],
+         "taxi",
+         ["index", "taxi_driver_name"],
          {
-             "count": [1369769, 1369769, 1271417],
-             "mean": ["1.722e+00", math.nan, "1.412e+00"],
-             "min": [1, "2008-12-31 23:05:14", 0.0],
-             "max": [6, "2021-02-22 16:52:16", 8.0],
-             "unique": [3, 939020, 9],
-             "freq": [937141, 13, 966236],
-             "top": [2, "2021-01-14 13:52:00", 1.0],
-
-             "std": ["5.925e-01", math.nan, "1.060e+00"],
-             "25%": [1, math.nan, 1.0],
-             "50%": [2, math.nan, 1.0],
-             "75%": [2, math.nan, 1.0],
-         }),
+             "count": [45, 45],
+             "mean": [22.0, math.nan],
+             "min": [0, "Eric Ken"],
+             "max": [44, "Kevin Kelly"],
+             "unique": [45, 3],
+             "freq": [1, 15],
+             "top": [0, "Eric Ken"],
+             "std": ["1.299e+01", math.nan],
+             "25%": [11, math.nan],
+             "50%": [22, math.nan],
+             "75%": [33, math.nan],
+
+         }
+         ),
     ],
 )
-def test_profile_query(request, ip_with_dynamic_db, table, table_columns, expected):
+def test_profile_query(request, ip_with_dynamic_db, table, table_columns, expected, capsys):
     ip_with_dynamic_db = request.getfixturevalue(ip_with_dynamic_db)
 
     out = ip_with_dynamic_db.run_cell(

From 105aa3d957a43b6acef0958f16163d6ac7baf8e5 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Wed, 8 Mar 2023 21:17:28 +0200
Subject: [PATCH 14/23] lint

---
 src/tests/integration/test_generic_db_opeations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/integration/test_generic_db_opeations.py b/src/tests/integration/test_generic_db_opeations.py
index e3765ce4c..3ff399663 100644
--- a/src/tests/integration/test_generic_db_opeations.py
+++ b/src/tests/integration/test_generic_db_opeations.py
@@ -222,7 +222,7 @@ def test_telemetry_execute_command_has_connection_info(
          ),
     ],
 )
-def test_profile_query(request, ip_with_dynamic_db, table, table_columns, expected, capsys):
+def test_profile_query(request, ip_with_dynamic_db, table, table_columns, expected):
     ip_with_dynamic_db = request.getfixturevalue(ip_with_dynamic_db)
 
     out = ip_with_dynamic_db.run_cell(

From 8e4aac33cbebed59de6f15fb84419613998812ac Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Wed, 8 Mar 2023 21:26:20 +0200
Subject: [PATCH 15/23] index removed from integration tests

---
 .../integration/test_generic_db_opeations.py  | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/tests/integration/test_generic_db_opeations.py b/src/tests/integration/test_generic_db_opeations.py
index 3ff399663..2c6bd3af1 100644
--- a/src/tests/integration/test_generic_db_opeations.py
+++ b/src/tests/integration/test_generic_db_opeations.py
@@ -152,41 +152,41 @@ def test_telemetry_execute_command_has_connection_info(
     [
         ("ip_with_postgreSQL",
          "taxi",
-         ["index", "taxi_driver_name"],
+         ["taxi_driver_name"],
          {
-             "count": [45, 45],
-             "mean": [22.0, 0.0],
-             "min": [0, "Eric Ken"],
-             "max": [44, "Kevin Kelly"],
-             "unique": [45, 3],
-             "freq": [1, 15],
-             "top": [0, "Kevin Kelly"],
+             "count": [45],
+             "mean": [0.0],
+             "min": ["Eric Ken"],
+             "max": ["Kevin Kelly"],
+             "unique": [3],
+             "freq": [15],
+             "top": ["Kevin Kelly"],
          }
          ),
         ("ip_with_mySQL",
          "taxi",
-         ["index", "taxi_driver_name"],
+         ["taxi_driver_name"],
          {
-             "count": [45, 45],
-             "mean": [22.0, 0.0],
-             "min": [0, "Eric Ken"],
-             "max": [44, "Kevin Kelly"],
-             "unique": [45, 3],
-             "freq": [1, 15],
-             "top": [0, "Kevin Kelly"],
+             "count": [45],
+             "mean": [0.0],
+             "min": ["Eric Ken"],
+             "max": ["Kevin Kelly"],
+             "unique": [3],
+             "freq": [15],
+             "top": ["Kevin Kelly"],
          }
          ),
         ("ip_with_mariaDB",
          "taxi",
-         ["index", "taxi_driver_name"],
+         ["taxi_driver_name"],
          {
-             "count": [45, 45],
-             "mean": [22.0, 0.0],
-             "min": [0, "Eric Ken"],
-             "max": [44, "Kevin Kelly"],
-             "unique": [45, 3],
-             "freq": [1, 15],
-             "top": [0, "Kevin Kelly"],
+             "count": [45],
+             "mean": [0.0],
+             "min": ["Eric Ken"],
+             "max": ["Kevin Kelly"],
+             "unique": [3],
+             "freq": [15],
+             "top": ["Kevin Kelly"],
          }
          ),
         ("ip_with_SQLite",

From 829352d2814397c9842ab5b3d785b8bcfed0b054 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Wed, 8 Mar 2023 21:39:07 +0200
Subject: [PATCH 16/23] postgres, mysql and maria excluded from profile test

---
 .../integration/test_generic_db_opeations.py  | 85 ++++++++++---------
 1 file changed, 46 insertions(+), 39 deletions(-)

diff --git a/src/tests/integration/test_generic_db_opeations.py b/src/tests/integration/test_generic_db_opeations.py
index 2c6bd3af1..4d01466b1 100644
--- a/src/tests/integration/test_generic_db_opeations.py
+++ b/src/tests/integration/test_generic_db_opeations.py
@@ -150,45 +150,52 @@ def test_telemetry_execute_command_has_connection_info(
 @pytest.mark.parametrize(
     "ip_with_dynamic_db, table, table_columns, expected",
     [
-        ("ip_with_postgreSQL",
-         "taxi",
-         ["taxi_driver_name"],
-         {
-             "count": [45],
-             "mean": [0.0],
-             "min": ["Eric Ken"],
-             "max": ["Kevin Kelly"],
-             "unique": [3],
-             "freq": [15],
-             "top": ["Kevin Kelly"],
-         }
-         ),
-        ("ip_with_mySQL",
-         "taxi",
-         ["taxi_driver_name"],
-         {
-             "count": [45],
-             "mean": [0.0],
-             "min": ["Eric Ken"],
-             "max": ["Kevin Kelly"],
-             "unique": [3],
-             "freq": [15],
-             "top": ["Kevin Kelly"],
-         }
-         ),
-        ("ip_with_mariaDB",
-         "taxi",
-         ["taxi_driver_name"],
-         {
-             "count": [45],
-             "mean": [0.0],
-             "min": ["Eric Ken"],
-             "max": ["Kevin Kelly"],
-             "unique": [3],
-             "freq": [15],
-             "top": ["Kevin Kelly"],
-         }
-         ),
+        pytest.param("ip_with_postgreSQL",
+                     "taxi",
+                     ["taxi_driver_name"],
+                     {
+                         "count": [45],
+                         "mean": [0.0],
+                         "min": ["Eric Ken"],
+                         "max": ["Kevin Kelly"],
+                         "unique": [3],
+                         "freq": [15],
+                         "top": ["Kevin Kelly"],
+                     },
+                     marks=pytest.mark.xfail(
+                         reason="Need to parse results"),
+                     ),
+
+        pytest.param("ip_with_mySQL",
+                     "taxi",
+                     ["taxi_driver_name"],
+                     {
+                         "count": [45],
+                         "mean": [0.0],
+                         "min": ["Eric Ken"],
+                         "max": ["Kevin Kelly"],
+                         "unique": [3],
+                         "freq": [15],
+                         "top": ["Kevin Kelly"],
+                     },
+                     marks=pytest.mark.xfail(
+                         reason="Need to get column names from table with a different query"),
+                     ),
+        pytest.param("ip_with_mariaDB",
+                     "taxi",
+                     ["taxi_driver_name"],
+                     {
+                         "count": [45],
+                         "mean": [0.0],
+                         "min": ["Eric Ken"],
+                         "max": ["Kevin Kelly"],
+                         "unique": [3],
+                         "freq": [15],
+                         "top": ["Kevin Kelly"],
+                     },
+                     marks=pytest.mark.xfail(
+                         reason="Need to get column names from table with a different query"),
+                     ),
         ("ip_with_SQLite",
          "taxi",
          ["taxi_driver_name"],

From 823cc61f57a41274bb94a56839593037b9e6cc1c Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Wed, 8 Mar 2023 21:39:48 +0200
Subject: [PATCH 17/23] lint

---
 .../integration/test_generic_db_opeations.py  | 161 +++++++++---------
 1 file changed, 83 insertions(+), 78 deletions(-)

diff --git a/src/tests/integration/test_generic_db_opeations.py b/src/tests/integration/test_generic_db_opeations.py
index 4d01466b1..ebc63d1db 100644
--- a/src/tests/integration/test_generic_db_opeations.py
+++ b/src/tests/integration/test_generic_db_opeations.py
@@ -150,83 +150,87 @@ def test_telemetry_execute_command_has_connection_info(
 @pytest.mark.parametrize(
     "ip_with_dynamic_db, table, table_columns, expected",
     [
-        pytest.param("ip_with_postgreSQL",
-                     "taxi",
-                     ["taxi_driver_name"],
-                     {
-                         "count": [45],
-                         "mean": [0.0],
-                         "min": ["Eric Ken"],
-                         "max": ["Kevin Kelly"],
-                         "unique": [3],
-                         "freq": [15],
-                         "top": ["Kevin Kelly"],
-                     },
-                     marks=pytest.mark.xfail(
-                         reason="Need to parse results"),
-                     ),
-
-        pytest.param("ip_with_mySQL",
-                     "taxi",
-                     ["taxi_driver_name"],
-                     {
-                         "count": [45],
-                         "mean": [0.0],
-                         "min": ["Eric Ken"],
-                         "max": ["Kevin Kelly"],
-                         "unique": [3],
-                         "freq": [15],
-                         "top": ["Kevin Kelly"],
-                     },
-                     marks=pytest.mark.xfail(
-                         reason="Need to get column names from table with a different query"),
-                     ),
-        pytest.param("ip_with_mariaDB",
-                     "taxi",
-                     ["taxi_driver_name"],
-                     {
-                         "count": [45],
-                         "mean": [0.0],
-                         "min": ["Eric Ken"],
-                         "max": ["Kevin Kelly"],
-                         "unique": [3],
-                         "freq": [15],
-                         "top": ["Kevin Kelly"],
-                     },
-                     marks=pytest.mark.xfail(
-                         reason="Need to get column names from table with a different query"),
-                     ),
-        ("ip_with_SQLite",
-         "taxi",
-         ["taxi_driver_name"],
-         {
-             "count": [45],
-             "mean": [0.0],
-             "min": ["Eric Ken"],
-             "max": ["Kevin Kelly"],
-             "unique": [3],
-             "freq": [15],
-             "top": ["Kevin Kelly"],
-         }
-         ),
-        ("ip_with_duckDB",
-         "taxi",
-         ["index", "taxi_driver_name"],
-         {
-             "count": [45, 45],
-             "mean": [22.0, math.nan],
-             "min": [0, "Eric Ken"],
-             "max": [44, "Kevin Kelly"],
-             "unique": [45, 3],
-             "freq": [1, 15],
-             "top": [0, "Eric Ken"],
-             "std": ["1.299e+01", math.nan],
-             "25%": [11, math.nan],
-             "50%": [22, math.nan],
-             "75%": [33, math.nan],
-
-         }
-         ),
+        pytest.param(
+            "ip_with_postgreSQL",
+            "taxi",
+            ["taxi_driver_name"],
+            {
+                "count": [45],
+                "mean": [0.0],
+                "min": ["Eric Ken"],
+                "max": ["Kevin Kelly"],
+                "unique": [3],
+                "freq": [15],
+                "top": ["Kevin Kelly"],
+            },
+            marks=pytest.mark.xfail(reason="Need to parse results"),
+        ),
+        pytest.param(
+            "ip_with_mySQL",
+            "taxi",
+            ["taxi_driver_name"],
+            {
+                "count": [45],
+                "mean": [0.0],
+                "min": ["Eric Ken"],
+                "max": ["Kevin Kelly"],
+                "unique": [3],
+                "freq": [15],
+                "top": ["Kevin Kelly"],
+            },
+            marks=pytest.mark.xfail(
+                reason="Need to get column names from table with a different query"
+            ),
+        ),
+        pytest.param(
+            "ip_with_mariaDB",
+            "taxi",
+            ["taxi_driver_name"],
+            {
+                "count": [45],
+                "mean": [0.0],
+                "min": ["Eric Ken"],
+                "max": ["Kevin Kelly"],
+                "unique": [3],
+                "freq": [15],
+                "top": ["Kevin Kelly"],
+            },
+            marks=pytest.mark.xfail(
+                reason="Need to get column names from table with a different query"
+            ),
+        ),
+        (
+            "ip_with_SQLite",
+            "taxi",
+            ["taxi_driver_name"],
+            {
+                "count": [45],
+                "mean": [0.0],
+                "min": ["Eric Ken"],
+                "max": ["Kevin Kelly"],
+                "unique": [3],
+                "freq": [15],
+                "top": ["Kevin Kelly"],
+            },
+        ),
+        (
+            "ip_with_duckDB",
+            "taxi",
+            ["index", "taxi_driver_name"],
+            {
+                "count": [45, 45],
+                "mean": [22.0, math.nan],
+                "min": [0, "Eric Ken"],
+                "max": [44, "Kevin Kelly"],
+                "unique": [45, 3],
+                "freq": [1, 15],
+                "top": [0, "Eric Ken"],
+                "std": ["1.299e+01", math.nan],
+                "25%": [11, math.nan],
+                "50%": [22, math.nan],
+                "75%": [33, math.nan],
+            },
+        ),
     ],
 )
 def test_profile_query(request, ip_with_dynamic_db, table, table_columns, expected):
@@ -247,7 +251,8 @@ def test_profile_query(request, ip_with_dynamic_db, table, table_columns, expect
 
         for i, column in enumerate(table_columns):
             cell_value = row.get_string(
-                fields=[column], border=False, header=False).strip()
+                fields=[column], border=False, header=False
+            ).strip()
 
             assert criteria in expected
             assert cell_value == str(expected[criteria][i])

From 29492a12cd66d5d91b130c8a52a615a8792beca8 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Thu, 9 Mar 2023 10:49:34 +0200
Subject: [PATCH 18/23] postgresql fixed

---
 src/sql/inspect.py                            | 12 +++----
 .../integration/test_generic_db_opeations.py  | 33 ++++++++++---------
 src/tests/test_magic_cmd.py                   |  6 ++--
 3 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index 2a67fcaec..d2383aa83 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -120,7 +120,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
             try:
                 result_col_freq_values = sql.run.run_raw(
                     Connection.current,
-                    f"""SELECT {column} as top,
+                    f"""SELECT DISTINCT {column} as top,
                     COUNT({column}) as frequency FROM {table_name}
                     GROUP BY {column} ORDER BY Count({column}) Desc""",
                     config,
@@ -144,7 +144,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                     COUNT(DISTINCT {column}) AS unique_count,
                     COUNT({column}) AS count
                     FROM {table_name}
-                    WHERE {column} IS NOT NULL AND TRIM({column}) <> ''
+                    WHERE {column} IS NOT NULL
                     """,
                     config,
                 ).dict()
@@ -165,11 +165,11 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                     f"""
                                 SELECT AVG({column}) AS avg
                                 FROM {table_name}
-                                WHERE {column} IS NOT NULL AND TRIM({column}) <> ''
+                                WHERE {column} IS NOT NULL
                                 """,
                     config,
                 ).dict()
-                table_stats[column]["mean"] = results_avg["avg"][0]
+                table_stats[column]["mean"] = float(results_avg["avg"][0])
                 columns_to_include_in_report.update(["mean"])
 
             except Exception:
@@ -198,7 +198,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
 
                 for key in special_numeric_keys:
                     r_key = f'key_{key.replace("%", "")}'
-                    table_stats[column][key] = result[r_key][0]
+                    table_stats[column][key] = float(result[r_key][0])
 
                 columns_to_include_in_report.update(special_numeric_keys)
 
@@ -214,7 +214,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                     for key in special_numeric_keys:
                         table_stats[column][key] = math.nan
 
-                # Failed to run sql command.
+                # Failed to run sql command/func (e.g stddev_pop).
                 # We ignore the cell stats for such case.
                 pass
 
diff --git a/src/tests/integration/test_generic_db_opeations.py b/src/tests/integration/test_generic_db_opeations.py
index ebc63d1db..019e4395a 100644
--- a/src/tests/integration/test_generic_db_opeations.py
+++ b/src/tests/integration/test_generic_db_opeations.py
@@ -150,20 +150,23 @@ def test_telemetry_execute_command_has_connection_info(
 @pytest.mark.parametrize(
     "ip_with_dynamic_db, table, table_columns, expected",
     [
-        pytest.param(
+        (
             "ip_with_postgreSQL",
             "taxi",
-            ["taxi_driver_name"],
+            ["index", "taxi_driver_name"],
             {
-                "count": [45],
-                "mean": [0.0],
-                "min": ["Eric Ken"],
-                "max": ["Kevin Kelly"],
-                "unique": [3],
-                "freq": [15],
-                "top": ["Kevin Kelly"],
+                "count": [45, 45],
+                "mean": [22.0, math.nan],
+                "min": [0, "Eric Ken"],
+                "max": [44, "Kevin Kelly"],
+                "unique": [45, 3],
+                "freq": [1, 15],
+                "top": [0, "Eric Ken"],
+                "std": ["1.299e+01", ""],
+                "25%": [11.0, ""],
+                "50%": [22.0, ""],
+                "75%": [33.0, ""],
             },
-            marks=pytest.mark.xfail(reason="Need to parse results"),
         ),
         pytest.param(
             "ip_with_mySQL",
@@ -179,7 +182,7 @@ def test_telemetry_execute_command_has_connection_info(
                 "top": ["Kevin Kelly"],
             },
             marks=pytest.mark.xfail(
-                reason="Need to get column names from table with a different query"
+                reason="Need to get column names with a different query"
             ),
         ),
         pytest.param(
@@ -196,7 +199,7 @@ def test_telemetry_execute_command_has_connection_info(
                 "top": ["Kevin Kelly"],
             },
             marks=pytest.mark.xfail(
-                reason="Need to get column names from table with a different query"
+                reason="Need to get column names with a different query"
             ),
         ),
         (
@@ -226,9 +229,9 @@ def test_telemetry_execute_command_has_connection_info(
                 "freq": [1, 15],
                 "top": [0, "Eric Ken"],
                 "std": ["1.299e+01", math.nan],
-                "25%": [11, math.nan],
-                "50%": [22, math.nan],
-                "75%": [33, math.nan],
+                "25%": [11.0, math.nan],
+                "50%": [22.0, math.nan],
+                "75%": [33.0, math.nan],
             },
         ),
     ],
diff --git a/src/tests/test_magic_cmd.py b/src/tests/test_magic_cmd.py
index 91ea7ce9c..20242df4c 100644
--- a/src/tests/test_magic_cmd.py
+++ b/src/tests/test_magic_cmd.py
@@ -91,11 +91,11 @@ def test_table_profile(ip, tmp_empty):
     )
 
     expected = {
-        "count": [8, 8, 8, 6],
+        "count": [8, 8, 8, 8],
         "mean": [12.2165, "6.875e-01", 88.75, 0.0],
-        "min": [10.532, 0.1, 82, "a"],
+        "min": [10.532, 0.1, 82, ""],
         "max": [14.44, 2.48, 98, "c"],
-        "unique": [8, 7, 8, 3],
+        "unique": [8, 7, 8, 5],
         "freq": [1, 2, 1, 4],
         "top": [14.44, 0.2, 98, "a"],
     }

From abeb44af9bd65702b2baa922109fc186891bffd5 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Thu, 9 Mar 2023 11:13:15 +0200
Subject: [PATCH 19/23] postgresql nan values fixed

---
 src/sql/inspect.py                                 |  2 +-
 src/tests/integration/test_generic_db_opeations.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index d2383aa83..d1d5c4807 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -210,7 +210,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
             except Exception as e:
                 # We tried to apply numeric function on
                 # non numeric value, i.e: DateTime
-                if "duckdb.BinderException" in str(e):
+                if "duckdb.BinderException" or "add explicit type casts" in str(e):
                     for key in special_numeric_keys:
                         table_stats[column][key] = math.nan
 
diff --git a/src/tests/integration/test_generic_db_opeations.py b/src/tests/integration/test_generic_db_opeations.py
index 019e4395a..0f5908705 100644
--- a/src/tests/integration/test_generic_db_opeations.py
+++ b/src/tests/integration/test_generic_db_opeations.py
@@ -162,10 +162,10 @@ def test_telemetry_execute_command_has_connection_info(
                 "unique": [45, 3],
                 "freq": [1, 15],
                 "top": [0, "Eric Ken"],
-                "std": ["1.299e+01", ""],
-                "25%": [11.0, ""],
-                "50%": [22.0, ""],
-                "75%": [33.0, ""],
+                "std": ["1.299e+01", math.nan],
+                "25%": [11.0, math.nan],
+                "50%": [22.0, math.nan],
+                "75%": [33.0, math.nan],
             },
         ),
         pytest.param(
@@ -182,7 +182,7 @@ def test_telemetry_execute_command_has_connection_info(
                 "top": ["Kevin Kelly"],
             },
             marks=pytest.mark.xfail(
-                reason="Need to get column names with a different query"
+                reason="Need to get column names from table with a different query"
             ),
         ),
         pytest.param(
@@ -199,7 +199,7 @@ def test_telemetry_execute_command_has_connection_info(
                 "top": ["Kevin Kelly"],
             },
             marks=pytest.mark.xfail(
-                reason="Need to get column names with a different query"
+                reason="Need to get column names from table with a different query"
             ),
         ),
         (

From 606b9bb5368d0ce436640854f0850d80f9cd09ce Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Tue, 14 Mar 2023 15:37:52 +0200
Subject: [PATCH 20/23] rebase

---
 .github/workflows/ci.yaml  | 32 +++++++++++++++++++++++++++
 CHANGELOG.md               |  7 +++++-
 doc/community/vs.md        | 12 ++++++++--
 doc/integrations/duckdb.md | 45 ++++++++++++++++++++++++++++++++++++++
 setup.py                   |  2 +-
 src/sql/__init__.py        |  2 +-
 src/sql/command.py         |  1 +
 src/sql/connection.py      | 35 +++++++++++++++++++----------
 src/sql/magic.py           |  2 +-
 src/sql/run.py             |  1 +
 src/tests/test_magic.py    | 13 +++++++++++
 tasks.py                   |  2 +-
 12 files changed, 135 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 0e3adec95..99e07b0e3 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -43,6 +43,38 @@ jobs:
         run: |
           pytest --durations-min=5 --ignore=src/tests/integration
 
+  test-sqlalchemy-v1:
+    strategy:
+      matrix:
+        python-version: ['3.11']
+        os: [ubuntu-latest, macos-latest, windows-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Lint with flake8
+        run: |
+          python -m pip install --upgrade pip
+          # run flake8 on .py files
+          pip install flake8
+          flake8
+          # run flake8 on notebooks (.ipynb, .md, etc)
+          pip install jupytext nbqa
+          nbqa flake8 .
+      - name: Install dependencies
+        run: |
+          pip install "sqlalchemy<2"
+          pip install ".[dev]"
+      - name: Test with pytest
+        run: |
+          pytest --durations-min=5 --ignore=src/tests/integration
 
   # run: pkgmt check
   check:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cee954d8c..95526129e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,13 @@
 # CHANGELOG
 
-## 0.6.4dev
+## 0.6.5dev
 * [Feature] Adds `%sqlcmd profile` (#66)
 
+## 0.6.4 (2023-03-12)
+
+* [Fix] Adds support for SQL Alchemy 2.0
+* [Doc] Summary section on jupysql vs ipython-sql
+
 ## 0.6.3 (2023-03-06)
 
 * [Fix] Displaying variable substitution warning only when the variable to expand exists in the user's namespace
diff --git a/doc/community/vs.md b/doc/community/vs.md
index 808774a4d..759dd38e0 100644
--- a/doc/community/vs.md
+++ b/doc/community/vs.md
@@ -4,7 +4,15 @@ JupySQL is an actively maintained fork of [ipython-sql](https://github.com/cathe
 
 ## Incompatibilities
 
-If you're migrating from `ipython-sql` to JupySQL, these are the differences (it most cases, no code changes are needed):
+If you're migrating from `ipython-sql` to JupySQL, these are the differences (in most cases, no code changes are needed):
 
 - Since `0.6` JupySQL no longer supports old versions of IPython
-- Variable expansion is being replaced from `{variable}`, `${variable}` to `{{variable}}`
\ No newline at end of file
+- Variable expansion is being replaced from `{variable}`, `${variable}` to `{{variable}}`
+
+## New features
+
+- [Plotting](../plot) module that allows you to efficiently plot massive datasets without running out of memory.
+- JupySQL allows you to break queries into multiple cells with the help of CTEs. [Click here](../compose) to learn more.
+- Using `%sqlcmd tables` and `%sqlcmd columns --table/-t` user can quickly explore tables in the database and the columns each table has. [Click here](../user-guide/tables-columns) to learn more.
+- [Polars Integration](../integrations/polars) to convert query results to `polars.DataFrame`. `%config SqlMagic.autopolars` can be used to automatically return Polars DataFrames instead of regular result sets.
+- Integration tests with PostgreSQL, MariaDB, MySQL, SQLite and DuckDB.
\ No newline at end of file
diff --git a/doc/integrations/duckdb.md b/doc/integrations/duckdb.md
index 78da10885..f310345fd 100644
--- a/doc/integrations/duckdb.md
+++ b/doc/integrations/duckdb.md
@@ -272,3 +272,48 @@ some_engine = create_engine(
 %sql some_engine
 ```
 
+## Listing Tables
+
+This section demonstrates how to list tables from both the `.csv` and `.parquet` files introduced in the previous sections.
+
+### Listing tables from a `.csv` file
+
+The data from the `.csv` file must first be registered as a table in order for the table to be listed.
+
+```{code-cell} ipython3
+%%sql
+CREATE TABLE penguins AS SELECT * FROM penguins.csv
+```
+
+The cell above allows the data to now be listed as a table from the following code:
+
+```{code-cell} ipython3
+%sqlcmd tables
+```
+
+### Listing tables from a `.parquet` file
+
+Identically, to list the data from a `.parquet` file as a table, the data must first be registered as a table.
+
+```{code-cell} ipython3
+%%sql
+CREATE TABLE tripdata AS SELECT * FROM "yellow_tripdata_2021-01.parquet"
+```
+
+The data is now able to be listed as a table from the following code:
+
+```{code-cell} ipython3
+%sqlcmd tables
+```
+
+## Listing Columns
+
+After either registering the data from the`.csv` or `.parquet` files as a table, their respective columns can now be listed with the following code:
+
+```{code-cell} ipython3
+%sqlcmd columns -t penguins
+```
+
+```{code-cell} ipython3
+%sqlcmd columns -t tripdata
+```
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 027499004..60ba9fdfb 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
 install_requires = [
     "prettytable",
     "ipython>=1.0",
-    "sqlalchemy>=0.6.7,<2.0",
+    "sqlalchemy",
     "sqlparse",
     "ipython-genutils>=0.1.0",
     "jinja2",
diff --git a/src/sql/__init__.py b/src/sql/__init__.py
index 478cfa53c..0923e9e5c 100644
--- a/src/sql/__init__.py
+++ b/src/sql/__init__.py
@@ -1,6 +1,6 @@
 from .magic import RenderMagic, SqlMagic, load_ipython_extension
 
-__version__ = "0.6.4dev"
+__version__ = "0.6.5dev"
 
 
 __all__ = [
diff --git a/src/sql/command.py b/src/sql/command.py
index 6fb840ba5..ec7d7990f 100644
--- a/src/sql/command.py
+++ b/src/sql/command.py
@@ -90,6 +90,7 @@ def result_var(self):
 
     def _var_expand(self, sql, user_ns, magic):
         sql = Template(sql).render(user_ns)
+
         parsed_sql = magic.shell.var_expand(sql, depth=2)
 
         has_SQLAlchemy_var_expand = ":" in sql and any(
diff --git a/src/sql/connection.py b/src/sql/connection.py
index 516d79fd9..ea340f06d 100644
--- a/src/sql/connection.py
+++ b/src/sql/connection.py
@@ -11,6 +11,7 @@
     "For technical support: https://ploomber.io/community"
     "\nDocumentation: https://jupysql.ploomber.io/en/latest/connecting.html"
 )
+IS_SQLALCHEMY_ONE = int(sqlalchemy.__version__.split(".")[0]) == 1
 
 # Check Full List: https://docs.sqlalchemy.org/en/20/dialects
 MISSING_PACKAGE_LIST_EXCEPT_MATCHERS = {
@@ -193,11 +194,23 @@ def _error_module_not_found(cls, e):
         return ModuleNotFoundError("test")
 
     def __init__(self, engine, alias=None):
-        self.dialect = engine.url.get_dialect()
-        self.metadata = sqlalchemy.MetaData(bind=engine)
+        self.url = engine.url
         self.name = self.assign_name(engine)
+        self.dialect = self.url.get_dialect()
         self.session = engine.connect()
-        self.connections[alias or repr(self.metadata.bind.url)] = self
+
+        if IS_SQLALCHEMY_ONE:
+            self.metadata = sqlalchemy.MetaData(bind=engine)
+
+        self.connections[
+            alias
+            or (
+                repr(sqlalchemy.MetaData(bind=engine).bind.url)
+                if IS_SQLALCHEMY_ONE
+                else repr(engine.url)
+            )
+        ] = self
+
         self.connect_args = None
         self.alias = alias
         Connection.current = self
@@ -298,7 +311,7 @@ def connection_list(cls):
         result = []
         for key in sorted(cls.connections):
             conn = cls.connections[key]
-            engine_url = conn.metadata.bind.url  # type: sqlalchemy.engine.url.URL
+            engine_url = conn.metadata.bind.url if IS_SQLALCHEMY_ONE else conn.url
 
             prefix = "* " if conn == cls.current else "  "
 
@@ -312,7 +325,7 @@ def connection_list(cls):
         return "\n".join(result)
 
     @classmethod
-    def _close(cls, descriptor):
+    def close(cls, descriptor):
         if isinstance(descriptor, Connection):
             conn = descriptor
         else:
@@ -328,12 +341,10 @@ def _close(cls, descriptor):
         if descriptor in cls.connections:
             cls.connections.pop(descriptor)
         else:
-            cls.connections.pop(str(conn.metadata.bind.url))
-
-        conn.session.close()
-
-    def close(self):
-        self.__class__._close(self)
+            cls.connections.pop(
+                str(conn.metadata.bind.url) if IS_SQLALCHEMY_ONE else str(conn.url)
+            )
+            conn.session.close()
 
     @classmethod
     def _get_curr_connection_info(cls):
@@ -341,7 +352,7 @@ def _get_curr_connection_info(cls):
         if not cls.current:
             return None
 
-        engine = cls.current.metadata.bind
+        engine = cls.current.metadata.bind if IS_SQLALCHEMY_ONE else cls.current
         return {
             "dialect": getattr(engine.dialect, "name", None),
             "driver": getattr(engine.dialect, "driver", None),
diff --git a/src/sql/magic.py b/src/sql/magic.py
index c15cdcd8e..5c8e50381 100644
--- a/src/sql/magic.py
+++ b/src/sql/magic.py
@@ -266,7 +266,7 @@ def _execute(self, payload, line, cell, local_ns):
         if args.connections:
             return sql.connection.Connection.connections
         elif args.close:
-            return sql.connection.Connection._close(args.close)
+            return sql.connection.Connection.close(args.close)
 
         connect_arg = command.connection
 
diff --git a/src/sql/run.py b/src/sql/run.py
index 462492140..bec6b06e5 100644
--- a/src/sql/run.py
+++ b/src/sql/run.py
@@ -408,6 +408,7 @@ def run(conn, sql, config, user_namespace):
             _commit(conn=conn, config=config)
             if result and config.feedback:
                 print(interpret_rowcount(result.rowcount))
+
         resultset = ResultSet(result, config)
         if config.autopandas:
             return resultset.DataFrame()
diff --git a/src/tests/test_magic.py b/src/tests/test_magic.py
index 8b0086c42..b52e5f21e 100644
--- a/src/tests/test_magic.py
+++ b/src/tests/test_magic.py
@@ -590,3 +590,16 @@ def test_jupysql_alias():
         "line": {"jupysql": "execute", "sql": "execute"},
         "cell": {"jupysql": "execute", "sql": "execute"},
     }
+
+
+@pytest.mark.xfail(reason="will be fixed once we deprecate the $name parametrization")
+def test_columns_with_dollar_sign(ip_empty):
+    ip_empty.run_cell("%sql sqlite://")
+    result = ip_empty.run_cell(
+        """
+    %sql SELECT $2 FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three'))"""
+    )
+
+    html = result.result._repr_html_()
+
+    assert "$2" in html
diff --git a/tasks.py b/tasks.py
index a84a784c1..8cdea7e48 100644
--- a/tasks.py
+++ b/tasks.py
@@ -32,7 +32,7 @@ def setup(c, version=None, doc=False):
 
 @task(aliases=["d"])
 def doc(c):
-    with c.cd('doc'):
+    with c.cd("doc"):
         c.run(
             "python3 -m sphinx -T -E -W --keep-going -b html \
               -d _build/doctrees -D language=en . _build/html"

From ea81d9e39caabd0c1df246a0c1f07567c667efc2 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Tue, 14 Mar 2023 16:32:46 +0200
Subject: [PATCH 21/23] naming changed

---
 src/sql/inspect.py | 10 +++++-----
 src/sql/run.py     |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index d1d5c4807..59c2f39d7 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -106,7 +106,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
         if schema:
             table_name = f"{schema}.{table_name}"
 
-        columns = sql.run.run_raw(
+        columns = sql.run.raw_run(
             Connection.current, f"SELECT * FROM {table_name} WHERE 1=0", config
         ).keys
 
@@ -118,7 +118,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
 
             # Note: index is reserved word in sqlite
             try:
-                result_col_freq_values = sql.run.run_raw(
+                result_col_freq_values = sql.run.raw_run(
                     Connection.current,
                     f"""SELECT DISTINCT {column} as top,
                     COUNT({column}) as frequency FROM {table_name}
@@ -136,7 +136,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
 
             try:
                 # get all non None values, min, max and avg.
-                result_value_values = sql.run.run_raw(
+                result_value_values = sql.run.raw_run(
                     Connection.current,
                     f"""
                     SELECT MIN({column}) AS min,
@@ -160,7 +160,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                 pass
 
             try:
-                results_avg = sql.run.run_raw(
+                results_avg = sql.run.raw_run(
                     Connection.current,
                     f"""
                                 SELECT AVG({column}) AS avg
@@ -180,7 +180,7 @@ def __init__(self, table_name, schema=None, config=None) -> None:
 
             try:
                 # Note: stddev_pop and PERCENTILE_DISC will work only on DuckDB
-                result = sql.run.run_raw(
+                result = sql.run.raw_run(
                     Connection.current,
                     f"""
                     SELECT
diff --git a/src/sql/run.py b/src/sql/run.py
index bec6b06e5..a655f3d56 100644
--- a/src/sql/run.py
+++ b/src/sql/run.py
@@ -421,7 +421,7 @@ def run(conn, sql, config, user_namespace):
         return "Connected: %s" % conn.name
 
 
-def run_raw(conn, sql, config):
+def raw_run(conn, sql, config):
     result = conn.session.execute(sql)
     resultset = ResultSet(result, config)
     return resultset

From 1f5bea07ccd019a0201331205b8058dc6a7078c0 Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Thu, 16 Mar 2023 17:00:15 +0200
Subject: [PATCH 22/23] sqlalchemy downgraded to 1

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 60ba9fdfb..646bf009e 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
 install_requires = [
     "prettytable",
     "ipython>=1.0",
-    "sqlalchemy",
+    "sqlalchemy<2",
     "sqlparse",
     "ipython-genutils>=0.1.0",
     "jinja2",

From a0398f18505540f61cc85d99cdb41247d61a65ed Mon Sep 17 00:00:00 2001
From: yafimvo <yafimvo@gmail.com>
Date: Sun, 19 Mar 2023 12:47:07 +0200
Subject: [PATCH 23/23] config removed from raw_run

---
 src/sql/inspect.py   | 51 +++++++++++++++++++++-----------------------
 src/sql/magic_cmd.py |  2 +-
 src/sql/run.py       |  6 ++----
 3 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/src/sql/inspect.py b/src/sql/inspect.py
index 59c2f39d7..a8970ae22 100644
--- a/src/sql/inspect.py
+++ b/src/sql/inspect.py
@@ -102,13 +102,13 @@ class TableDescription(DatabaseInspection):
 
     """
 
-    def __init__(self, table_name, schema=None, config=None) -> None:
+    def __init__(self, table_name, schema=None) -> None:
         if schema:
             table_name = f"{schema}.{table_name}"
 
         columns = sql.run.raw_run(
-            Connection.current, f"SELECT * FROM {table_name} WHERE 1=0", config
-        ).keys
+            Connection.current, f"SELECT * FROM {table_name} WHERE 1=0"
+        ).keys()
 
         table_stats = dict({})
         columns_to_include_in_report = set()
@@ -122,12 +122,11 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                     Connection.current,
                     f"""SELECT DISTINCT {column} as top,
                     COUNT({column}) as frequency FROM {table_name}
-                    GROUP BY {column} ORDER BY Count({column}) Desc""",
-                    config,
-                ).dict()
+                    GROUP BY {column} ORDER BY Count({column}) Desc"""
+                ).fetchall()
 
-                table_stats[column]["freq"] = result_col_freq_values["frequency"][0]
-                table_stats[column]["top"] = result_col_freq_values["top"][0]
+                table_stats[column]["freq"] = result_col_freq_values[0][1]
+                table_stats[column]["top"] = result_col_freq_values[0][0]
 
                 columns_to_include_in_report.update(["freq", "top"])
 
@@ -145,14 +144,13 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                     COUNT({column}) AS count
                     FROM {table_name}
                     WHERE {column} IS NOT NULL
-                    """,
-                    config,
-                ).dict()
+                    """
+                ).fetchall()
 
-                table_stats[column]["count"] = result_value_values["count"][0]
-                table_stats[column]["unique"] = result_value_values["unique_count"][0]
-                table_stats[column]["min"] = result_value_values["min"][0]
-                table_stats[column]["max"] = result_value_values["max"][0]
+                table_stats[column]["min"] = result_value_values[0][0]
+                table_stats[column]["max"] = result_value_values[0][1]
+                table_stats[column]["unique"] = result_value_values[0][2]
+                table_stats[column]["count"] = result_value_values[0][3]
 
                 columns_to_include_in_report.update(["count", "unique", "min", "max"])
 
@@ -166,10 +164,10 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                                 SELECT AVG({column}) AS avg
                                 FROM {table_name}
                                 WHERE {column} IS NOT NULL
-                                """,
-                    config,
-                ).dict()
-                table_stats[column]["mean"] = float(results_avg["avg"][0])
+                                """
+                ).fetchall()
+
+                table_stats[column]["mean"] = float(results_avg[0][0])
                 columns_to_include_in_report.update(["mean"])
 
             except Exception:
@@ -192,13 +190,12 @@ def __init__(self, table_name, schema=None, config=None) -> None:
                         percentile_disc(0.75) WITHIN GROUP
                         (ORDER BY {column}) as key_75
                     FROM {table_name}
-                    """,
-                    config,
-                ).dict()
+                    """
+                ).fetchall()
 
-                for key in special_numeric_keys:
-                    r_key = f'key_{key.replace("%", "")}'
-                    table_stats[column][key] = float(result[r_key][0])
+                for i, key in enumerate(special_numeric_keys):
+                    # r_key = f'key_{key.replace("%", "")}'
+                    table_stats[column][key] = float(result[0][i])
 
                 columns_to_include_in_report.update(special_numeric_keys)
 
@@ -252,11 +249,11 @@ def get_columns(name, schema=None):
 
 
 @telemetry.log_call()
-def get_table_statistics(name, schema=None, config=None):
+def get_table_statistics(name, schema=None):
     """Get table statistics for a given connection.
 
     For all data types the results will include `count`, `mean`, `std`, `min`
     `max`, `25`, `50` and `75` percentiles. It will also include `unique`, `top`
     and `freq` statistics.
     """
-    return TableDescription(name, schema=schema, config=config)
+    return TableDescription(name, schema=schema)
diff --git a/src/sql/magic_cmd.py b/src/sql/magic_cmd.py
index bb9d1a079..6e8cff4ad 100644
--- a/src/sql/magic_cmd.py
+++ b/src/sql/magic_cmd.py
@@ -84,7 +84,7 @@ def execute(self, line="", cell=""):
             args = parser.parse_args(others)
 
             report = inspect.get_table_statistics(
-                schema=args.schema, name=args.table, config=self.config
+                schema=args.schema, name=args.table
             )
 
             if args.output:
diff --git a/src/sql/run.py b/src/sql/run.py
index b33a65330..3efec6382 100644
--- a/src/sql/run.py
+++ b/src/sql/run.py
@@ -439,10 +439,8 @@ def run(conn, sql, config):
         return "Connected: %s" % conn.name
 
 
-def raw_run(conn, sql, config):
-    result = conn.session.execute(sql)
-    resultset = ResultSet(result, config)
-    return resultset
+def raw_run(conn, sql):
+    return conn.session.execute(sql)
 
 
 class PrettyTable(prettytable.PrettyTable):