Skip to content

Commit

Permalink
Merge branch 'main' into databricks_dialect
Browse files Browse the repository at this point in the history
  • Loading branch information
EvgSkv authored Oct 3, 2023
2 parents 6f75c2b + 872b27a commit 68a643d
Show file tree
Hide file tree
Showing 81 changed files with 8,217 additions and 225 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
*.DS_Store
*.pyc
.ipynb_checkpoints
.idea
__pycache__/
docs/.DS_Store
21 changes: 13 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,26 @@ a language created at Google earlier.
## Why?

Logica is for engineers, data scientists and other specialists who want to use
logic programming syntax when writing queries and pipelines to run on
[BigQuery](https://cloud.google.com/bigquery).
logic programming syntax when writing queries and pipelines for databases and datawarehouses.
Logica programs run on
[BigQuery](https://cloud.google.com/bigquery), [Postgres](https://postgresql.org) and [SQLite](https://www.sqlite.org/).

Logica compiles to StandardSQL and gives you access to the power of BigQuery
engine with the convenience of logic programming syntax. This is useful because
BigQuery is magnitudes more powerful than state of the art native
logic programming engines.
Logica compiles to SQL and gives you access to the power of SQL ecosystem
with the convenience of logic programming syntax.

This is useful because
SQL enginers are magnitudes more powerful than state of the art native
logic programming engines. For example, BigQuery is a distributed datawarehouse and thus logic programs written
in Logica can be easily parallelized onto thousands of servers. Postgres and SQLite are among most popular databases, they are
capable of processing substantial volumes of data right on your machine.

We encourage you to try Logica, especially if

* you already use logic programming and need more computational power, **or**
* you use SQL, but feel unsatisfied about its readability, **or**
* you already have data in BigQuery, PostgreSQL or SQLite, **or**
* you want to learn logic programming and apply it to processing of Big Data.

In the future we plan to support more SQL dialects and engines.
Support for more SQL dialects and engines is coming in the future.

## I have not heard of logic programming. What is it?

Expand Down
99 changes: 75 additions & 24 deletions colab_logica.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,21 @@

"""Library for using Logica in CoLab."""

from decimal import Decimal
import getpass
import json
import re

from .common import color
from .common import concertina_lib
from .common import psql_logica

from .compiler import functors
from .compiler import rule_translate
from .compiler import universe

from .type_inference.research import infer

import IPython

from IPython.core.magic import register_cell_magic
Expand Down Expand Up @@ -70,6 +78,13 @@

PREAMBLE = None

if hasattr(concertina_lib, 'graphviz'):
DISPLAY_MODE = 'colab'
else:
DISPLAY_MODE = 'colab-text'

DEFAULT_ENGINE = 'bigquery'


def SetPreamble(preamble):
global PREAMBLE
Expand All @@ -83,6 +98,12 @@ def SetDbConnection(connection):
global DB_CONNECTION
DB_CONNECTION = connection

def ConnectToPostgres(mode='interactive'):
connection = psql_logica.ConnectToPostgres(mode)
SetDbConnection(connection)
global DEFAULT_ENGINE
DEFAULT_ENGINE = 'psql'

def EnsureAuthenticatedUser():
global USER_AUTHENTICATED
global PROJECT
Expand Down Expand Up @@ -143,9 +164,14 @@ def RunSQL(sql, engine, connection=None, is_final=False):
return client.query(sql).to_dataframe()
elif engine == 'psql':
if is_final:
return pandas.read_sql(sql, connection)
cursor = psql_logica.PostgresExecute(sql, connection)
rows = cursor.fetchall()
df = pandas.DataFrame(
rows, columns=[d[0] for d in cursor.description])
df = df.applymap(psql_logica.DigestPsqlType)
return df
else:
return connection.execute(sql)
psql_logica.PostgresExecute(sql, connection)
elif engine == 'sqlite':
try:
if is_final:
Expand All @@ -164,6 +190,15 @@ def RunSQL(sql, engine, connection=None, is_final=False):
'for now.')


def Ingress(table_name, csv_file_name):
with open(csv_file_name) as csv_data_io:
cursor = DB_CONNECTION.cursor()
cursor.copy_expert(
'COPY %s FROM STDIN WITH CSV HEADER' % table_name,
csv_data_io)
DB_CONNECTION.commit()


class SqliteRunner(object):
def __init__(self):
self.connection = sqlite3_logica.SqliteConnect()
Expand All @@ -177,13 +212,17 @@ class PostgresRunner(object):
def __init__(self):
global DB_CONNECTION
global DB_ENGINE
if DB_CONNECTION:
self.engine = DB_ENGINE
self.connection = DB_CONNECTION
else:
(self.engine, self.connection) = PostgresJumpStart()
DB_ENGINE = self.engine
DB_CONNECTION = self.connection
if not DB_CONNECTION:
print("Assuming this is running on Google CoLab in a temporary")
print("environment.")
print("Would you like to install and run postgres?")
user_choice = input('y or N? ')
if user_choice != 'y':
print('User declined.')
print('Bailing out.')
return
PostgresJumpStart()
self.connection = DB_CONNECTION

def __call__(self, sql, engine, is_final):
return RunSQL(sql, engine, self.connection, is_final)
Expand All @@ -206,13 +245,18 @@ def Logica(line, cell, run_query):
e.ShowMessage()
return
try:
program = universe.LogicaProgram(parsed_rules)
program = universe.LogicaProgram(
parsed_rules,
user_flags={'logica_default_engine': DEFAULT_ENGINE})
except functors.FunctorError as e:
e.ShowMessage()
return
except rule_translate.RuleCompileException as e:
e.ShowMessage()
return
except infer.TypeErrorCaughtException as e:
e.ShowMessage()
return

engine = program.annotations.Engine()

Expand Down Expand Up @@ -269,9 +313,13 @@ def Logica(line, cell, run_query):
else:
raise Exception('Logica only supports BigQuery, PostgreSQL and SQLite '
'for now.')

result_map = concertina_lib.ExecuteLogicaProgram(
executions, sql_runner=sql_runner, sql_engine=engine)
try:
result_map = concertina_lib.ExecuteLogicaProgram(
executions, sql_runner=sql_runner, sql_engine=engine,
display_mode=DISPLAY_MODE)
except infer.TypeErrorCaughtException as e:
e.ShowMessage()
return

for idx, predicate in enumerate(predicates):
t = result_map[predicate]
Expand Down Expand Up @@ -320,18 +368,21 @@ def PostgresJumpStart():
# Connect to the database.
from logica import colab_logica
from sqlalchemy import create_engine
import pandas
engine = create_engine('postgresql+psycopg2://logica:logica@127.0.0.1', pool_recycle=3600);
connection = engine.connect();
colab_logica.SetDbConnection(connection)""")
import psycopg2
connection = psycopg2.connect(host='localhost', database='logica', user='logica', password='logica')
connection.autocommit = True
colab_logica.DEFAULT_ENGINE = 'psql'
colab_logica.SetDbConnection(connection)
""")
return
print('Installation succeeded. Connecting...')
# Connect to the database.
from logica import colab_logica
from sqlalchemy import create_engine
import pandas
engine = create_engine('postgresql+psycopg2://logica:logica@127.0.0.1', pool_recycle=3600)
connection = engine.connect()
import psycopg2
connection = psycopg2.connect(host='localhost', database='logica', user='logica', password='logica')
connection.autocommit = True

print('Connected.')
return engine, connection
global DEFAULT_ENGINE
global DB_CONNECTION
DEFAULT_ENGINE = 'psql'
DB_CONNECTION = connection
52 changes: 44 additions & 8 deletions common/concertina_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@

try:
import graphviz
except:
pass
# This is annoying to see in terminal each time.
# Consider adding back if lack of messaging is confusing.
# print('Could not import graphviz tools in Concertina.')

try:
from IPython.display import HTML
from IPython.display import display
from IPython.display import update_display
except:
print('Could not import CoLab tools in Concertina.')
print('Could not import IPython in Concertina.')

if '.' not in __package__:
from common import graph_art
Expand All @@ -33,7 +41,7 @@ def Run(self, action):
is_final=(predicate in self.final_predicates))
end = datetime.datetime.now()
if self.print_running_predicate:
print(' (%d seconds)' % (end - start).seconds)
print(' (%d ms)' % int((end - start).total_seconds() * 1000))
if predicate in self.final_predicates:
self.final_result[predicate] = result

Expand Down Expand Up @@ -75,7 +83,7 @@ def __init__(self, config, engine, display_mode='colab'):
self.all_actions = {a["name"] for a in self.config}
self.complete_actions = set()
self.running_actions = set()
assert display_mode in ('colab', 'terminal'), (
assert display_mode in ('colab', 'terminal', 'colab-text'), (
'Unrecognized display mode: %s' % display_mode)
self.display_mode = display_mode
self.display_id = self.GetDisplayId()
Expand Down Expand Up @@ -137,7 +145,14 @@ def AsNodesAndEdges(self):
"""Nodes and edges to display in terminal."""
def ColoredNode(node):
if node in self.running_actions:
return '\033[1m\033[93m' + node + '\033[0m'
if self.display_mode == 'terminal':
return '\033[1m\033[93m' + node + '\033[0m'
elif self.display_mode == 'colab-text':
return (
'<b>' + node + '</b>'
)
else:
assert False, self.display_mode
else:
return node
nodes = []
Expand All @@ -150,11 +165,24 @@ def ColoredNode(node):
edges.append([prerequisite_node, a_node])
return nodes, edges

def StateAsSimpleHTML(self):
style = ';'.join([
'border: 1px solid rgba(0, 0, 0, 0.3)',
'width: fit-content;',
'padding: 20px',
'border-radius: 5px',
'box-shadow: 1px 1px 3px rgba(0, 0, 0, 0.2)'])
return HTML('<div style="%s"><pre>%s</pre></div>' % (
style, self.AsTextPicture(updating=False)))

def Display(self):
if self.display_mode == 'colab':
display(self.AsGraphViz(), display_id=self.display_id)
elif self.display_mode == 'terminal':
print(self.AsTextPicture(updating=False))
elif self.display_mode == 'colab-text':
display(self.StateAsSimpleHTML(),
display_id=self.display_id)
else:
assert 'Unexpected mode:', self.display_mode

Expand All @@ -163,6 +191,10 @@ def UpdateDisplay(self):
update_display(self.AsGraphViz(), display_id=self.display_id)
elif self.display_mode == 'terminal':
print(self.AsTextPicture(updating=True))
elif self.display_mode == 'colab-text':
update_display(
self.StateAsSimpleHTML(),
display_id=self.display_id)
else:
assert 'Unexpected mode:', self.display_mode

Expand Down Expand Up @@ -261,10 +293,14 @@ def ConcertinaConfig(table_to_export_map, dependency_edges,
print_running_predicate=(display_mode != 'terminal'))

preambles = set(e.preamble for e in logica_executions)
assert len(preambles) == 1, 'Inconsistent preambles: %s' % preambles
[preamble] = list(preambles)
if preamble:
sql_runner(preamble, sql_engine, is_final=False)
# Due to change of types from predicate to predicate preables are not
# consistent. However we expect preambles to be idempotent.
# So we simply run all of them.
# assert len(preambles) == 1, 'Inconsistent preambles: %s' % preambles
# [preamble] = list(preambles)
for preamble in preambles:
if preamble:
sql_runner(preamble, sql_engine, is_final=False)

concertina = Concertina(config, engine, display_mode=display_mode)
concertina.Run()
Expand Down
46 changes: 46 additions & 0 deletions common/logica_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,18 @@
"""Utilities for YotaQL tests."""

import subprocess
import json

if '.' not in __package__:
from common import color
from common import logica_lib
from type_inference.research import infer
from parser_py import parse
else:
from ..common import color
from ..common import logica_lib
from ..type_inference.research import infer
from ..parser_py import parse


class TestManager(object):
Expand Down Expand Up @@ -54,6 +59,47 @@ def RunTest(cls, name, src, predicate, golden, user_flags,
cls.GOLDEN_RUN, cls.ANNOUNCE_TESTS,
import_root)

@classmethod
def RunTypesTest(cls, name, src=None, golden=None):
if cls.RUN_ONLY and name not in cls.RUN_ONLY:
return
RunTypesTest(name, src, golden,
overwrite=cls.GOLDEN_RUN)


def RunTypesTest(name, src=None, golden=None,
overwrite=False):
src = src or (name + '.l')
golden = golden or (name + '.txt')

test_result = '{warning}RUNNING{end}'
print(color.Format('% 50s %s' % (name, test_result)))

program_text = open(src).read()
try:
parsed_rules = parse.ParseFile(program_text)['rule']
except parse.ParsingException as parsing_exception:
parsing_exception.ShowMessage()
sys.exit(1)

typing_engine = infer.TypesInferenceEngine(parsed_rules)
typing_engine.InferTypes()
result = json.dumps(parsed_rules, sort_keys=True, indent=' ')

if overwrite:
with open(golden, 'w') as w:
w.write(result)
golden_result = open(golden).read()

if result == golden_result:
test_result = '{ok}PASSED{end}'
else:
p = subprocess.Popen(['diff', '-', golden], stdin=subprocess.PIPE)
p.communicate(result.encode())
test_result = '{error}FAILED{end}'

print('\033[F\033[K' + color.Format('% 50s %s' % (name, test_result)))


def RunTest(name, src, predicate, golden,
user_flags=None,
Expand Down
Loading

0 comments on commit 68a643d

Please sign in to comment.