Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds new positional arg for class SQLQuery #65

Merged
merged 6 commits into from
Apr 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Python/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]
mock = {version = ">=3.0.5", markers="python_version >= '2.7' and python_version < '3.3'"}
responses = ">=0.10.14"
pytest = ">=2.8.0,<=3.10.1"
pytest-cov = "*"

Expand All @@ -15,6 +15,7 @@ numpy = "*"
pandas = "*"
pyarrow = "==0.15.1"
requests = ">= 2.2.0"
backoff = "==1.10.0"

[scripts]
ci = "pytest"
Expand Down
173 changes: 116 additions & 57 deletions Python/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,4 @@ You can use IBM Watson Studio with the following [demo notebook](https://datapla
* `instance_crn`: SQL Query instance CRN identifier
* `target_cos_url`: Optional default target URL. Don't use when you want to provide target URL in SQL statement text.
* `client_info`: Optional string to identify your client application in IBM Cloud for PD reasons.
* `max_tries`: Optional integer to specify maximum attempts when dealing with request rate limit. Default value is `1`, which means it will through exception `RateLimitedException` when response status code is `429`. It will enable _exponential backoff_ when specifying any positive number greater than `1`. For instance, given `max_tries=5`, assuming it will get response status code `429` for 4 times until the 5th attempt will get response status code `201`, the wait time will be `2s`, `4s`, `8s` and `16s` for each attempts.
52 changes: 33 additions & 19 deletions Python/ibmcloudsql/SQLQuery.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import xml.etree.ElementTree as ET
import sys
import types
import backoff
import requests
from requests.exceptions import HTTPError
import pandas as pd
Expand All @@ -35,7 +36,7 @@ class RateLimitedException(Exception):
pass

class SQLQuery():
def __init__(self, api_key, instance_crn, target_cos_url=None, client_info=''):
def __init__(self, api_key, instance_crn, target_cos_url=None, client_info='', max_tries=1):
self.instance_crn = instance_crn
self.target_cos = target_cos_url
self.export_cos_url = target_cos_url
Expand All @@ -44,6 +45,8 @@ def __init__(self, api_key, instance_crn, target_cos_url=None, client_info=''):
else:
self.user_agent = client_info

self.max_tries = max_tries

self.request_headers = {'Content-Type': 'application/json'}
self.request_headers.update({'Accept': 'application/json'})
self.request_headers.update({'User-Agent': self.user_agent})
Expand Down Expand Up @@ -122,29 +125,14 @@ def logon(self, force=False):
self.logged_on = True
self.last_logon = datetime.now()

def submit_sql(self, sql_text, pagesize=None):
self.logon()
sqlData = {'statement': sql_text}
# If a valid pagesize is specified we need to append the proper PARTITIONED EVERY <num> ROWS clause
if pagesize or pagesize==0:
if type(pagesize) == int and pagesize>0:
if self.target_cos:
sqlData["statement"] += " INTO {}".format(self.target_cos)
elif " INTO " not in sql_text.upper():
raise SyntaxError("Neither resultset_target parameter nor \"INTO\" clause specified.")
elif " PARTITIONED " in sql_text.upper():
raise SyntaxError("Must not use PARTITIONED clause when specifying pagesize parameter.")
sqlData["statement"] += " PARTITIONED EVERY {} ROWS".format(pagesize)
else:
raise ValueError('pagesize parameter ({}) is not valid.'.format(pagesize))
elif self.target_cos:
sqlData.update({'resultset_target': self.target_cos})
def _send_req(self, json_data):
'''send SQL data to API. return job id'''

try:
response = requests.post(
"https://api.sql-query.cloud.ibm.com/v2/sql_jobs?instance_crn={}".format(self.instance_crn),
headers=self.request_headers,
json=sqlData)
json=json_data)

# Throw in case we hit the rate limit
if (response.status_code == 429):
Expand All @@ -161,6 +149,32 @@ def submit_sql(self, sql_text, pagesize=None):
except HTTPError as e:
raise SyntaxError("SQL submission failed: {}".format(response.json()['errors'][0]['message']))

def submit_sql(self, sql_text, pagesize=None):
self.logon()
sqlData = {'statement': sql_text}
# If a valid pagesize is specified we need to append the proper PARTITIONED EVERY <num> ROWS clause
if pagesize or pagesize==0:
if type(pagesize) == int and pagesize>0:
if self.target_cos:
sqlData["statement"] += " INTO {}".format(self.target_cos)
elif " INTO " not in sql_text.upper():
raise SyntaxError("Neither resultset_target parameter nor \"INTO\" clause specified.")
elif " PARTITIONED " in sql_text.upper():
raise SyntaxError("Must not use PARTITIONED clause when specifying pagesize parameter.")
sqlData["statement"] += " PARTITIONED EVERY {} ROWS".format(pagesize)
else:
raise ValueError('pagesize parameter ({}) is not valid.'.format(pagesize))
elif self.target_cos:
sqlData.update({'resultset_target': self.target_cos})

intrumented_send = backoff.on_exception(
backoff.expo,
RateLimitedException,
max_tries=self.max_tries
)(self._send_req)

return intrumented_send(sqlData)

def wait_for_job(self, jobId):
self.logon()

Expand Down
Loading