From ee27bd3e62fe1ab956c4d0453b42febee7f89c71 Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sat, 14 Jun 2014 09:23:33 -0400 Subject: [PATCH 01/11] initial pep8 love, need to fix key loading from a config file, added `requirements.txt` --- .gitignore | 1 + requirements.txt | 2 ++ urlquery/api.py | 69 ++++++++++++++++++++++++++++++------------------ 3 files changed, 47 insertions(+), 25 deletions(-) create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index d2054c0..8ca657d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.pyc +*.swp build urlquery/apikey.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d8cb953 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests==2.3.0 +dateutils==0.6.6 diff --git a/urlquery/api.py b/urlquery/api.py index 386f2d4..f829b35 100644 --- a/urlquery/api.py +++ b/urlquery/api.py @@ -3,17 +3,13 @@ try: import simplejson as json -except: +except ImportError: import json import requests from dateutil.parser import parse from datetime import datetime, timedelta import time -try: - from apikey import key -except: - key = '' base_url = 'https://uqapi.net/v3/json' gzip_default = False @@ -21,7 +17,8 @@ __feed_type = ['unfiltered', 'flagged'] __intervals = ['hour', 'day'] __priorities = ['urlfeed', 'low', 'medium', 'high'] -__search_types = ['string', 'regexp', 'ids_alert', 'urlquery_alert', 'js_script_hash'] +__search_types = ['string', 'regexp', 'ids_alert', + 'urlquery_alert', 'js_script_hash'] __result_types = ['reports', 'url_list'] __url_matchings = ['url_host', 'url_path'] @@ -33,6 +30,7 @@ def __set_default_values(gzip=False): to_return['gzip'] = True return to_return + def __query(query, gzip=False): if query.get('error') is not None: return query @@ -40,6 +38,7 @@ def __query(query, gzip=False): r = requests.post(base_url, data=json.dumps(query)) return r.json() + def urlfeed(feed='unfiltered', interval='hour', timestamp=None): """ The urlfeed function is used to access the main feed of URL from @@ -71,17 +70,19 @@ def urlfeed(feed='unfiltered', interval='hour', timestamp=None): :return: URLFEED { - "start_time" : string, - "end_time" : string, - "feed" : [URLs] Array of URL objects (see README) + "start_time" : string, + "end_time" : string, + "feed" : [URLs] Array of URL objects (see README) } """ query = {'method': 'urlfeed'} if feed not in __feed_type: - query.update({'error': 'Feed can only be in ' + ', '.join(__feed_type)}) + query.update({'error': + 'Feed can only be in ' + ', '.join(__feed_type)}) if interval not in __intervals: - query.update({'error': 'Interval can only be in ' + ', '.join(__intervals)}) + query.update({'error': + 'Interval can only be in ' + ', '.join(__intervals)}) if timestamp is None: ts = datetime.now() if interval == 'hour': @@ -93,15 +94,17 @@ def urlfeed(feed='unfiltered', interval='hour', timestamp=None): try: timestamp = time.mktime(parse(timestamp).utctimetuple()) except: - query.update({'error': 'Unable to convert time to timestamp: ' + str(time)}) + query.update({'error': + 'Unable to convert time to timestamp: ' + str(time)}) query['feed'] = feed query['interval'] = interval query['timestamp'] = timestamp return __query(query) + def submit(url, useragent=None, referer=None, priority='low', - access_level='public', callback_url=None, submit_vt=False, - save_only_alerted=False): + access_level='public', callback_url=None, submit_vt=False, + save_only_alerted=False): """ Submits an URL for analysis. @@ -169,7 +172,8 @@ def submit(url, useragent=None, referer=None, priority='low', if priority not in __priorities: query.update({'error': 'priority must be in '+', '.join(__priorities)}) if access_level not in __access_levels: - query.update({'error': 'assess_level must be in '+', '.join(__access_levels)}) + query.update({'error': + 'assess_level must be in '+', '.join(__access_levels)}) query['url'] = url if useragent is not None: query['useragent'] = useragent @@ -185,6 +189,7 @@ def submit(url, useragent=None, referer=None, priority='low', query['save_only_alerted'] = True return __query(query) + def user_agent_list(): """ Returns a list of accepted user agent strings. These might @@ -195,8 +200,9 @@ def user_agent_list(): query = {'method': 'user_agent_list'} return __query(query) + def mass_submit(urls, useragent=None, referer=None, - access_level='public', priority='low', callback_url=None): + access_level='public', priority='low', callback_url=None): """ See submit for details. All URLs will be queued with the same settings. @@ -208,7 +214,8 @@ def mass_submit(urls, useragent=None, referer=None, """ query = {'method': 'mass_submit'} if access_level not in __access_levels: - query.update({'error': 'assess_level must be in '+', '.join(__access_levels)}) + query.update({'error': + 'assess_level must be in '+', '.join(__access_levels)}) if priority not in __priorities: query.update({'error': 'priority must be in '+', '.join(__priorities)}) if useragent is not None: @@ -221,6 +228,7 @@ def mass_submit(urls, useragent=None, referer=None, query['callback_url'] = callback_url return __query(query) + def queue_status(queue_id): """ Polls the current status of a queued URL. Normal processing time @@ -236,7 +244,7 @@ def queue_status(queue_id): def report(report_id, recent_limit=0, include_details=False, - include_screenshot=False, include_domain_graph=False): + include_screenshot=False, include_domain_graph=False): """ This extracts data for a given report, the amount of data and what is included is dependent on the parameters set and the @@ -290,6 +298,7 @@ def report(report_id, recent_limit=0, include_details=False, query['include_domain_graph'] = True return __query(query) + def report_list(timestamp=None, limit=50): """ Returns a list of reports created from the given timestamp, if it’s @@ -324,13 +333,15 @@ def report_list(timestamp=None, limit=50): try: timestamp = time.mktime(parse(timestamp).utctimetuple()) except: - query.update({'error': 'Unable to convert time to timestamp: ' + str(time)}) + query.update({'error': + 'Unable to convert time to timestamp: ' + str(time)}) query['timestamp'] = timestamp query['limit'] = limit return __query(query) + def search(q, search_type='string', result_type='reports', - url_matching='url_host', date_from=None, deep=False): + url_matching='url_host', date_from=None, deep=False): """ Search in the database @@ -370,11 +381,17 @@ def search(q, search_type='string', result_type='reports', """ query = {'method': 'search'} if search_type not in __search_types: - query.update({'error': 'search_type can only be in ' + ', '.join(__search_types)}) + query.update({'error': + 'search_type can only be in ' + + ', '.join(__search_types)}) if result_type not in __result_types: - query.update({'error': 'result_type can only be in ' + ', '.join(__result_types)}) + query.update({'error': + 'result_type can only be in ' + + ', '.join(__result_types)}) if url_matching not in __url_matchings: - query.update({'error': 'url_matching can only be in ' + ', '.join(__url_matchings)}) + query.update({'error': + 'url_matching can only be in ' + + ', '.join(__url_matchings)}) if date_from is None: ts = datetime.now() @@ -383,7 +400,9 @@ def search(q, search_type='string', result_type='reports', try: timestamp = time.mktime(parse(date_from).utctimetuple()) except: - query.update({'error': 'Unable to convert time to timestamp: ' + str(time)}) + query.update({'error': + 'Unable to convert time to timestamp: ' + + str(time)}) query['q'] = q query['search_type'] = search_type @@ -394,6 +413,7 @@ def search(q, search_type='string', result_type='reports', query['deep'] = True return __query(query) + def reputation(q): """ Searches a reputation list of URLs detected over the last month. @@ -408,4 +428,3 @@ def reputation(q): query = {'method': 'reputation'} query['q'] = q return __query(query) - From 5064daf9f3e15ca5f62b0b59d5c0afa20b12321c Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sat, 14 Jun 2014 09:25:28 -0400 Subject: [PATCH 02/11] minor README tweaks --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 567749c..975edfd 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ returned is determined by the key and its associated permissions. Note: The access of the default key (no key) is very limited, even more than -whatis accessible on the public site. +what is accessible on the public site. API calls ========= @@ -107,6 +107,10 @@ To get the responses of the api gzip'ed, change 'gzip_default' to True. Dependencies ============ +If `pip` is available, any prerequirements may be added via the usual "requirements" dance: + +`pip install -r requirements.txt` + Hard: * requests: https://github.com/kennethreitz/Requests From 3f7ec96e0a48845ff7358690ec3c1eb1d5df7b80 Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sat, 14 Jun 2014 09:27:51 -0400 Subject: [PATCH 03/11] another minor tweak: added spaces around `+` in string joins --- urlquery/api.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/urlquery/api.py b/urlquery/api.py index f829b35..1e05e5c 100644 --- a/urlquery/api.py +++ b/urlquery/api.py @@ -170,10 +170,12 @@ def submit(url, useragent=None, referer=None, priority='low', """ query = {'method': 'submit'} if priority not in __priorities: - query.update({'error': 'priority must be in '+', '.join(__priorities)}) + query.update({'error': + 'priority must be in ' + ', '.join(__priorities)}) if access_level not in __access_levels: query.update({'error': - 'assess_level must be in '+', '.join(__access_levels)}) + 'assess_level must be in ' + + ', '.join(__access_levels)}) query['url'] = url if useragent is not None: query['useragent'] = useragent From 10f821f76b710d9d2772d93c37a2be0072258b4d Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sat, 14 Jun 2014 22:55:58 -0400 Subject: [PATCH 04/11] refactored to have the `gzip` and `apikey` parameters as parameters to the API methods, rather than module variables` ` --- README.md | 6 ++--- urlquery/api.py | 66 ++++++++++++++++++++++++++++++------------------- 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 975edfd..16bf509 100644 --- a/README.md +++ b/README.md @@ -97,17 +97,17 @@ of the API call. This is called "_response_" API Key ======= -If you have an API Key, put it in apikey.py with the variable name 'key'. +If you have an API Key, put it in a config file, and pass it into the API methods via the `apikey` parameter. Gzip ==== -To get the responses of the api gzip'ed, change 'gzip_default' to True. +To get the responses of the api gzip'ed, set the `gzip` parameter to `True`. Dependencies ============ -If `pip` is available, any prerequirements may be added via the usual "requirements" dance: +If `pip` is available, any prerequisites may be added via the usual "requirements" dance: `pip install -r requirements.txt` diff --git a/urlquery/api.py b/urlquery/api.py index 1e05e5c..b0b3d29 100644 --- a/urlquery/api.py +++ b/urlquery/api.py @@ -11,8 +11,11 @@ from datetime import datetime, timedelta import time + base_url = 'https://uqapi.net/v3/json' -gzip_default = False + +# XXX: the sheer number of globals makes me wonder if +# this wouldn't be better as a class... __feed_type = ['unfiltered', 'flagged'] __intervals = ['hour', 'day'] @@ -21,25 +24,35 @@ 'urlquery_alert', 'js_script_hash'] __result_types = ['reports', 'url_list'] __url_matchings = ['url_host', 'url_path'] +cfg = None - -def __set_default_values(gzip=False): +def __set_default_values(gzip=False, apikey=None): to_return = {} - to_return['key'] = key - if gzip_default or gzip: + + if apikey is not None: + to_return['key'] = apikey + else: + to_return['key'] = '' + + if gzip: to_return['gzip'] = True + return to_return -def __query(query, gzip=False): +def __query(query, gzip=False, apikey=None): if query.get('error') is not None: return query - query.update(__set_default_values(gzip)) + query.update(__set_default_values(gzip, apikey)) r = requests.post(base_url, data=json.dumps(query)) return r.json() +# XXX: Kinda messy to have `gzip` passed into each URL feed... +# almost need an "initial" setup function, kinda like +# an initializer... -def urlfeed(feed='unfiltered', interval='hour', timestamp=None): +def urlfeed(feed='unfiltered', interval='hour', timestamp=None, + gzip=False, apikey=None): """ The urlfeed function is used to access the main feed of URL from the service. Currently there are two distinct feed: @@ -99,12 +112,12 @@ def urlfeed(feed='unfiltered', interval='hour', timestamp=None): query['feed'] = feed query['interval'] = interval query['timestamp'] = timestamp - return __query(query) + return __query(query, gzip, apikey) def submit(url, useragent=None, referer=None, priority='low', access_level='public', callback_url=None, submit_vt=False, - save_only_alerted=False): + save_only_alerted=False, gzip=False, apikey=None): """ Submits an URL for analysis. @@ -189,10 +202,10 @@ def submit(url, useragent=None, referer=None, priority='low', query['submit_vt'] = True if save_only_alerted: query['save_only_alerted'] = True - return __query(query) + return __query(query, gzip, apikey) -def user_agent_list(): +def user_agent_list(gzip=False, apikey=None): """ Returns a list of accepted user agent strings. These might change over time, select one from the returned list. @@ -200,11 +213,12 @@ def user_agent_list(): :return: A list of accepted user agents """ query = {'method': 'user_agent_list'} - return __query(query) + return __query(query, gzip, apikey) def mass_submit(urls, useragent=None, referer=None, - access_level='public', priority='low', callback_url=None): + access_level='public', priority='low', callback_url=None, + gzip=False, apikey=None): """ See submit for details. All URLs will be queued with the same settings. @@ -228,10 +242,10 @@ def mass_submit(urls, useragent=None, referer=None, query['priority'] = priority if callback_url is not None: query['callback_url'] = callback_url - return __query(query) + return __query(query, gzip, apikey) -def queue_status(queue_id): +def queue_status(queue_id, gzip=False, apikey=None): """ Polls the current status of a queued URL. Normal processing time for a URL is about 1 minute. @@ -242,11 +256,12 @@ def queue_status(queue_id): """ query = {'method': 'queue_status'} query['queue_id'] = queue_id - return __query(query) + return __query(query, gzip=False, apikey=None) def report(report_id, recent_limit=0, include_details=False, - include_screenshot=False, include_domain_graph=False): + include_screenshot=False, include_domain_graph=False, + gzip=False, apikey=None): """ This extracts data for a given report, the amount of data and what is included is dependent on the parameters set and the @@ -298,10 +313,10 @@ def report(report_id, recent_limit=0, include_details=False, query['include_screenshot'] = True if include_domain_graph: query['include_domain_graph'] = True - return __query(query) + return __query(query, gzip, apikey) -def report_list(timestamp=None, limit=50): +def report_list(timestamp=None, limit=50, gzip=False, apikey=None): """ Returns a list of reports created from the given timestamp, if it’s not included the most recent reports will be returned. @@ -339,11 +354,12 @@ def report_list(timestamp=None, limit=50): 'Unable to convert time to timestamp: ' + str(time)}) query['timestamp'] = timestamp query['limit'] = limit - return __query(query) + return __query(query, gzip, apikey) def search(q, search_type='string', result_type='reports', - url_matching='url_host', date_from=None, deep=False): + url_matching='url_host', date_from=None, deep=False, + gzip=False, apikey=None): """ Search in the database @@ -413,10 +429,10 @@ def search(q, search_type='string', result_type='reports', query['from'] = timestamp if deep: query['deep'] = True - return __query(query) + return __query(query, gzip, apikey) -def reputation(q): +def reputation(q, gzip=False, apikey=None): """ Searches a reputation list of URLs detected over the last month. The search query can be a domain or an IP. @@ -429,4 +445,4 @@ def reputation(q): query = {'method': 'reputation'} query['q'] = q - return __query(query) + return __query(query, gzip, apikey) From cad55edd87d077c50073811fc9cb8b9780562293 Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sat, 14 Jun 2014 22:57:54 -0400 Subject: [PATCH 05/11] woops, forgot to clean up my toys; removed unused `cfg` variable. --- urlquery/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/urlquery/api.py b/urlquery/api.py index b0b3d29..8e4ae42 100644 --- a/urlquery/api.py +++ b/urlquery/api.py @@ -24,7 +24,6 @@ 'urlquery_alert', 'js_script_hash'] __result_types = ['reports', 'url_list'] __url_matchings = ['url_host', 'url_path'] -cfg = None def __set_default_values(gzip=False, apikey=None): to_return = {} From 4bd1970c6f22cc4f53337e14ae532c3e833ca3ec Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sat, 21 Jun 2014 08:15:07 -0400 Subject: [PATCH 06/11] added `gzip_default` to api.py, added `ooapi.py` which is a class-based version of the same --- urlquery/api.py | 3 +- urlquery/ooapi.py | 457 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 459 insertions(+), 1 deletion(-) create mode 100644 urlquery/ooapi.py diff --git a/urlquery/api.py b/urlquery/api.py index 8e4ae42..c87d25c 100644 --- a/urlquery/api.py +++ b/urlquery/api.py @@ -13,6 +13,7 @@ base_url = 'https://uqapi.net/v3/json' +gzip_default = False # XXX: the sheer number of globals makes me wonder if # this wouldn't be better as a class... @@ -33,7 +34,7 @@ def __set_default_values(gzip=False, apikey=None): else: to_return['key'] = '' - if gzip: + if gzip_default or gzip: to_return['gzip'] = True return to_return diff --git a/urlquery/ooapi.py b/urlquery/ooapi.py new file mode 100644 index 0000000..a36d21e --- /dev/null +++ b/urlquery/ooapi.py @@ -0,0 +1,457 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +try: + import simplejson as json +except ImportError: + import json + +import requests +from dateutil.parser import parse +from datetime import datetime, timedelta +import time + + +base_url = 'https://uqapi.net/v3/json' +gzip_default = False + + +class URLQuery(object): + __slots__ = ["_feed_type", "_intervals", "_priorities", "_search_types", + "_result_types", "_url_types", "gzip_default", "base_url", + "apikey"] + + def __init__(self, base_url=None, gzip_default=False, apikey=None): + self._feed_type = ['unfiltered', 'flagged'] + self._intervals = ['hour', 'day'] + self._priorities = ['urlfeed', 'low', 'medium', 'high'] + self._search_types = ['string', 'regexp', 'ids_alert', + 'urlquery_alert', 'js_script_hash'] + self._result_types = ['reports', 'url_list'] + self._url_matchings = ['url_host', 'url_path'] + self.gzip_default = gzip_default + + if base_url is not None: + self.base_url = base_url + else: + self.base_url = 'https://uqapi.net/v3/json' + + if apikey is not None: + self.apikey = apikey + else: + self.apikey = '' + + + def query(self, query, gzip=False): + if query.get('error') is not None: + return query + + if self.gzip_default or gzip: + query['gzip'] = True + + query['key'] = self.apikey + r = requests.post(base_url, data=json.dumps(query)) + return r.json() + + + def urlfeed(self, feed='unfiltered', interval='hour', timestamp=None, + gzip=False, apikey=None): + """ + The urlfeed function is used to access the main feed of URL from + the service. Currently there are two distinct feed: + + + :param feed: Currently there are two distinct feed: + + * *unfiltered*: contains all URL received by the service, as + with other API calls some restrictions to the feed might + apply depending. (default) + * *flagged*: contains URLs flagged by some detection by + urlquery, it will not contain data triggered by IDS + alerts as that not possible to correlate correctly to a + given URL. Access to this is currently restricted. + + :param interval: Sets the size of time window. + * *hour*: splits the day into 24 slices which each + goes from 00-59 of every hour, + for example: 10:00-10:59 (default) + * *day*: will return all URLs from a given date + + :param timestamp: This selects which slice to return. + Any timestamp within a given interval/time + slice can be used to return URLs from that + timeframe. (default: now) + + + :return: URLFEED + + { + "start_time" : string, + "end_time" : string, + "feed" : [URLs] Array of URL objects (see README) + } + + """ + query = {'method': 'urlfeed'} + if feed not in self._feed_type: + query.update({'error': + 'Feed can only be in ' + + ', '.join(self._feed_type)}) + if interval not in self._intervals: + query.update({'error': + 'Interval can only be in ' + + ', '.join(self._intervals)}) + if timestamp is None: + ts = datetime.now() + if interval == 'hour': + ts = ts - timedelta(hours=1) + if interval == 'day': + ts = ts - timedelta(days=1) + timestamp = time.mktime(ts.utctimetuple()) + else: + try: + timestamp = time.mktime(parse(timestamp).utctimetuple()) + except: + query.update({'error': + 'Unable to convert time to timestamp: ' + + str(time)}) + query['feed'] = feed + query['interval'] = interval + query['timestamp'] = timestamp + return self.query(query, gzip, apikey) + + + def submit(self, url, useragent=None, referer=None, priority='low', + access_level='public', callback_url=None, submit_vt=False, + save_only_alerted=False, gzip=False, apikey=None): + """ + Submits an URL for analysis. + + :param url: URL to submit for analysis + + :param useragent: See user_agent_list API function. Setting an + invalid UserAgent will result in a random UserAgent getting + selected. + + :param referer: Referer to be applied to the first visiting URL + + :param priority: Set a priority on the submission. + * *urlfeed*: URL might take several hour before completing. + Used for big unfiltered feeds. Some filtering applies + before accepting to queue so a submitted URL might not + be tested. + * *low*: For vetted or filtered feeds (default) + * *medium*: Normal submissions + * *high*: To ensure highest priority. + + :param access_level: Set accessibility of the report + * *public*: URL is publicly available on the site (default) + * *nonpublic*: Shared with other security organizations/researchers. + * *private*: Only submitting key has access. + + :param callback_url: Results are POSTed back to the provided + URL when processing has completed. The results will be + originating from uqapi.net. Requires an API key. + + :param submit_vt: Submits any unknown file toVirusTotal for + analysis. Information from VirusTotal will be included the + report as soon as they have finished processing the sample. + Most likely will the report from urlquery be available + before the data is received back from VirusTotal. + Default: false + + Only executables, zip archives and pdf documents are + currently submitted. + + .. note:: Not fully implemented yet. + + :param save_only_alerted: Only reports which contains alerts + (IDS, UQ alerts, Blacklists etc.) are kept. The main purpose + for this flag is for mass testing URLs which has not been + properly vetted so only URLs of interest are kept. + Default: false + + Combining this with a callback URL will result in only those + that has alerts on them beingPOSTed back to the callback URL. + + :return: QUEUE_STATUS + + { + "status" : string, ("queued", "processing", "done") + "queue_id" : string, + "report_id" : string, Included once "status" = "done" + "priority" : string, + "url" : URL object, See README + "settings" : SETTINGS object See README + } + + + """ + query = {'method': 'submit'} + if priority not in self._priorities: + query.update({'error': + 'priority must be in ' + + ', '.join(self._priorities)}) + if access_level not in self._access_levels: + query.update({'error': + 'assess_level must be in ' + + ', '.join(self._access_levels)}) + query['url'] = url + if useragent is not None: + query['useragent'] = useragent + if referer is not None: + query['referer'] = referer + query['priority'] = priority + query['access_level'] = access_level + if callback_url is not None: + query['callback_url'] = callback_url + if submit_vt: + query['submit_vt'] = True + if save_only_alerted: + query['save_only_alerted'] = True + return self.query(query, gzip, apikey) + + + def user_agent_list(self, gzip=False, apikey=None): + """ + Returns a list of accepted user agent strings. These might + change over time, select one from the returned list. + + :return: A list of accepted user agents + """ + query = {'method': 'user_agent_list'} + return self.query(query, gzip, apikey) + + + def mass_submit(self, urls, useragent=None, referer=None, + access_level='public', priority='low', callback_url=None, + gzip=False, apikey=None): + """ + See submit for details. All URLs will be queued with the same settings. + + :return: + + { + [QUEUE_STATUS] Array of QUEUE_STATUS objects, See submit + } + """ + query = {'method': 'mass_submit'} + if access_level not in self._access_levels: + query.update({'error': + 'assess_level must be in ' + + ', '.join(self._access_levels)}) + if priority not in self._priorities: + query.update({'error': 'priority must be in ' + + ', '.join(self._priorities)}) + if useragent is not None: + query['useragent'] = useragent + if referer is not None: + query['referer'] = referer + query['access_level'] = access_level + query['priority'] = priority + if callback_url is not None: + query['callback_url'] = callback_url + return self.query(query, gzip, apikey) + + + def queue_status(self, queue_id, gzip=False, apikey=None): + """ + Polls the current status of a queued URL. Normal processing time + for a URL is about 1 minute. + + :param queue_id: QueueIDis returned by the submit API calls + + :return: QUEUE_STATUS (See submit) + """ + query = {'method': 'queue_status'} + query['queue_id'] = queue_id + return self.query(query, gzip=False, apikey=None) + + + def report(self, report_id, recent_limit=0, include_details=False, + include_screenshot=False, include_domain_graph=False, + gzip=False, apikey=None): + """ + This extracts data for a given report, the amount of data and + what is included is dependent on the parameters set and the + permissions of the API key. + + :param report_id: ID of the report. To get a valid report_id + either use search to look for specificreports or report_list + to get a list of recently finished reports. + Can be string or an integer + + :param recent_limit: Number of recent reports to include. + Only applies when include_details is true. + Integer, default: 0 + + :param include_details: Includes details in the report, like the + alert information, Javascript and transaction data. + Default: False + + :param include_screenshot: A screenshot is included in the report + as a base64. The mime type of the image is also included. + Default: False + + :param include_domain_graph: A domain graph is included in the + report as a base64. The mime type of the image is also included. + Default: False + + + :return: BASICREPORT + + { + "report_id": string, + "date" : string, Date formatted string + "url" : URL, URL object - See README + "settings" : SETTINGS, SETTINGS object - See README + "urlquery_alert_count" : int, Total UQ alerts + "ids_alert_count" : int, Total IDS alert + "blacklist_alert_count" : int, Total Blacklist alerts + "screenshot" : BINBLOB, BINBLOB object - See README + "domain_graph" : BINBLOB BINBLOB object - See README + } + """ + query = {'method': 'report'} + query['report_id'] = report_id + if recent_limit is not None: + query['recent_limit'] = recent_limit + if include_details: + query['include_details'] = True + if include_screenshot: + query['include_screenshot'] = True + if include_domain_graph: + query['include_domain_graph'] = True + return self.query(query, gzip, apikey) + + + def report_list(self, timestamp=None, limit=50, gzip=False, apikey=None): + """ + Returns a list of reports created from the given timestamp, if it’s + not included the most recent reports will be returned. + + Used to get a list of reports from given timestamp, along with basic + information about the report like number of alerts and the + submitted URL. + + To get reports which are nonpublic or private a API key is needed + which has access to these. + + :param timestamp: Unix Epoch timestamp from the starting point to get + reports. + Default: If None, setted to datetime.now() + + :param limit: Number of reports in the list + Default: 50 + + :return: + + { + "reports": [BASICREPORTS] List of BASICREPORTS - See report + } + + """ + query = {'method': 'report_list'} + if timestamp is None: + ts = datetime.now() + timestamp = time.mktime(ts.utctimetuple()) + else: + try: + timestamp = time.mktime(parse(timestamp).utctimetuple()) + except: + query.update({'error': + 'Unable to convert time to timestamp: ' + str(time)}) + query['timestamp'] = timestamp + query['limit'] = limit + return self.query(query, gzip, apikey) + + + def search(q, search_type='string', result_type='reports', + url_matching='url_host', date_from=None, deep=False, + gzip=False, apikey=None): + """ + Search in the database + + :param q: Search query + + :param search_type: Search type + * *string*: Used to find URLs which contains a given string. + To search for URLs on a specific IP use string. If a + string is found to match an IP address it will automaticly + search based on the IP. (default) + * *regexp*: Search for a regexp pattern within URLs + * *ids_alert*: Search for specific IDS alerts + * *urlquery_alert*: ????? FIXME ????? + * *js_script_hash*: Used to search for URLs/reports which + contains a specific JavaScript. The scripts are searched + based on SHA256, the hash value for each script are + included in the report details. Can be used to find other + + :param result_type: Result type + * *reports*: Full reports (default) + * *url_list*: List of urls + + :param url_matching: What part of an URL to do pattern matching + against. Only applies to string and regexp searches. + * *url_host*: match against host (default) + * *url_path*: match against path + + + :param date_from: Unix epoch timestamp for starting searching point. + Default: If None, setted to datetime.now() + + + :param deep: Search all URLs, not just submitted URLs. + Default: false + Experimental! Should be used with care as it’s very resource + intensive. + """ + query = {'method': 'search'} + if search_type not in self._search_types: + query.update({'error': + 'search_type can only be in ' + + ', '.join(self._search_types)}) + if result_type not in self._result_types: + query.update({'error': + 'result_type can only be in ' + + ', '.join(self._result_types)}) + if url_matching not in self._url_matchings: + query.update({'error': + 'url_matching can only be in ' + + ', '.join(self._url_matchings)}) + + if date_from is None: + ts = datetime.now() + timestamp = time.mktime(ts.utctimetuple()) + else: + try: + timestamp = time.mktime(parse(date_from).utctimetuple()) + except: + query.update({'error': + 'Unable to convert time to timestamp: ' + + str(time)}) + + query['q'] = q + query['search_type'] = search_type + query['result_type'] = result_type + query['url_matching'] = url_matching + query['from'] = timestamp + if deep: + query['deep'] = True + return self.query(query, gzip, apikey) + + + def reputation(self, q, gzip=False, apikey=None): + """ + Searches a reputation list of URLs detected over the last month. + The search query can be a domain or an IP. + + With an API key, matching URLs will be returned along with the + triggering alert. + + :param q: Search query + """ + + query = {'method': 'reputation'} + query['q'] = q + return self.query(query, gzip, apikey) From 122b00067f1be9d60db8d8335ac4ba922ea221b1 Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sun, 22 Jun 2014 12:41:50 -0400 Subject: [PATCH 07/11] ok, basic working version of ooapi; need to just add some tests & exercise some stuff --- urlquery/ooapi.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/urlquery/ooapi.py b/urlquery/ooapi.py index a36d21e..544ecea 100644 --- a/urlquery/ooapi.py +++ b/urlquery/ooapi.py @@ -19,7 +19,7 @@ class URLQuery(object): __slots__ = ["_feed_type", "_intervals", "_priorities", "_search_types", "_result_types", "_url_types", "gzip_default", "base_url", - "apikey"] + "_url_matchings", "apikey"] def __init__(self, base_url=None, gzip_default=False, apikey=None): self._feed_type = ['unfiltered', 'flagged'] @@ -42,14 +42,18 @@ def __init__(self, base_url=None, gzip_default=False, apikey=None): self.apikey = '' - def query(self, query, gzip=False): + def query(self, query, gzip=False, apikey=None): if query.get('error') is not None: return query if self.gzip_default or gzip: query['gzip'] = True - query['key'] = self.apikey + if apikey is not None: + query['key'] = apikey + else: + query['key'] = self.apikey + r = requests.post(base_url, data=json.dumps(query)) return r.json() From 4984b05bd90ea8949bb2cc522f3ad166fa5ff5d5 Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sun, 22 Jun 2014 12:51:22 -0400 Subject: [PATCH 08/11] updated init... --- urlquery/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/urlquery/__init__.py b/urlquery/__init__.py index b69959f..259c650 100644 --- a/urlquery/__init__.py +++ b/urlquery/__init__.py @@ -1 +1,2 @@ from api import * +from ooapi import URLQuery From b1abaf14b7888b187c48cf35b1ec0287f1b33ced Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sun, 22 Jun 2014 12:56:53 -0400 Subject: [PATCH 09/11] some more pep8 love --- urlquery/api.py | 7 +++---- urlquery/ooapi.py | 34 +++++++++++++++------------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/urlquery/api.py b/urlquery/api.py index c87d25c..92927a9 100644 --- a/urlquery/api.py +++ b/urlquery/api.py @@ -26,6 +26,7 @@ __result_types = ['reports', 'url_list'] __url_matchings = ['url_host', 'url_path'] + def __set_default_values(gzip=False, apikey=None): to_return = {} @@ -47,9 +48,6 @@ def __query(query, gzip=False, apikey=None): r = requests.post(base_url, data=json.dumps(query)) return r.json() -# XXX: Kinda messy to have `gzip` passed into each URL feed... -# almost need an "initial" setup function, kinda like -# an initializer... def urlfeed(feed='unfiltered', interval='hour', timestamp=None, gzip=False, apikey=None): @@ -140,7 +138,8 @@ def submit(url, useragent=None, referer=None, priority='low', :param access_level: Set accessibility of the report * *public*: URL is publicly available on the site (default) - * *nonpublic*: Shared with other security organizations/researchers. + * *nonpublic*: Shared with other security organizations or + researchers. * *private*: Only submitting key has access. :param callback_url: Results are POSTed back to the provided diff --git a/urlquery/ooapi.py b/urlquery/ooapi.py index 544ecea..af3369a 100644 --- a/urlquery/ooapi.py +++ b/urlquery/ooapi.py @@ -41,7 +41,6 @@ def __init__(self, base_url=None, gzip_default=False, apikey=None): else: self.apikey = '' - def query(self, query, gzip=False, apikey=None): if query.get('error') is not None: return query @@ -57,7 +56,6 @@ def query(self, query, gzip=False, apikey=None): r = requests.post(base_url, data=json.dumps(query)) return r.json() - def urlfeed(self, feed='unfiltered', interval='hour', timestamp=None, gzip=False, apikey=None): """ @@ -67,9 +65,9 @@ def urlfeed(self, feed='unfiltered', interval='hour', timestamp=None, :param feed: Currently there are two distinct feed: - * *unfiltered*: contains all URL received by the service, as - with other API calls some restrictions to the feed might - apply depending. (default) + * *unfiltered*: contains all URL received by the service, + as with other API calls some restrictions to the feed + might apply depending. (default) * *flagged*: contains URLs flagged by some detection by urlquery, it will not contain data triggered by IDS alerts as that not possible to correlate correctly to a @@ -92,9 +90,11 @@ def urlfeed(self, feed='unfiltered', interval='hour', timestamp=None, { "start_time" : string, "end_time" : string, - "feed" : [URLs] Array of URL objects (see README) + "feed" : [URLs] Array of URL objects } + For more information on "feed", please see the README + """ query = {'method': 'urlfeed'} if feed not in self._feed_type: @@ -124,7 +124,6 @@ def urlfeed(self, feed='unfiltered', interval='hour', timestamp=None, query['timestamp'] = timestamp return self.query(query, gzip, apikey) - def submit(self, url, useragent=None, referer=None, priority='low', access_level='public', callback_url=None, submit_vt=False, save_only_alerted=False, gzip=False, apikey=None): @@ -150,7 +149,8 @@ def submit(self, url, useragent=None, referer=None, priority='low', :param access_level: Set accessibility of the report * *public*: URL is publicly available on the site (default) - * *nonpublic*: Shared with other security organizations/researchers. + * *nonpublic*: Shared with other security organizations or + researchers. * *private*: Only submitting key has access. :param callback_url: Results are POSTed back to the provided @@ -215,7 +215,6 @@ def submit(self, url, useragent=None, referer=None, priority='low', query['save_only_alerted'] = True return self.query(query, gzip, apikey) - def user_agent_list(self, gzip=False, apikey=None): """ Returns a list of accepted user agent strings. These might @@ -226,12 +225,12 @@ def user_agent_list(self, gzip=False, apikey=None): query = {'method': 'user_agent_list'} return self.query(query, gzip, apikey) - def mass_submit(self, urls, useragent=None, referer=None, access_level='public', priority='low', callback_url=None, gzip=False, apikey=None): """ - See submit for details. All URLs will be queued with the same settings. + See submit for details. All URLs will be queued with the same + settings. :return: @@ -257,7 +256,6 @@ def mass_submit(self, urls, useragent=None, referer=None, query['callback_url'] = callback_url return self.query(query, gzip, apikey) - def queue_status(self, queue_id, gzip=False, apikey=None): """ Polls the current status of a queued URL. Normal processing time @@ -271,7 +269,6 @@ def queue_status(self, queue_id, gzip=False, apikey=None): query['queue_id'] = queue_id return self.query(query, gzip=False, apikey=None) - def report(self, report_id, recent_limit=0, include_details=False, include_screenshot=False, include_domain_graph=False, gzip=False, apikey=None): @@ -298,7 +295,8 @@ def report(self, report_id, recent_limit=0, include_details=False, Default: False :param include_domain_graph: A domain graph is included in the - report as a base64. The mime type of the image is also included. + report as a base64. The mime type of the image is also + included. Default: False @@ -328,7 +326,6 @@ def report(self, report_id, recent_limit=0, include_details=False, query['include_domain_graph'] = True return self.query(query, gzip, apikey) - def report_list(self, timestamp=None, limit=50, gzip=False, apikey=None): """ Returns a list of reports created from the given timestamp, if it’s @@ -364,12 +361,12 @@ def report_list(self, timestamp=None, limit=50, gzip=False, apikey=None): timestamp = time.mktime(parse(timestamp).utctimetuple()) except: query.update({'error': - 'Unable to convert time to timestamp: ' + str(time)}) + 'Unable to convert time to timestamp: ' + + str(time)}) query['timestamp'] = timestamp query['limit'] = limit return self.query(query, gzip, apikey) - def search(q, search_type='string', result_type='reports', url_matching='url_host', date_from=None, deep=False, gzip=False, apikey=None): @@ -401,7 +398,7 @@ def search(q, search_type='string', result_type='reports', * *url_path*: match against path - :param date_from: Unix epoch timestamp for starting searching point. + :param date_from: Unix epoch timestamp for starting search point. Default: If None, setted to datetime.now() @@ -444,7 +441,6 @@ def search(q, search_type='string', result_type='reports', query['deep'] = True return self.query(query, gzip, apikey) - def reputation(self, q, gzip=False, apikey=None): """ Searches a reputation list of URLs detected over the last month. From dfd284d46babdc0c3298cb964894997bb496fb9b Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sun, 22 Jun 2014 13:03:04 -0400 Subject: [PATCH 10/11] ooapi note --- urlquery/ooapi.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/urlquery/ooapi.py b/urlquery/ooapi.py index af3369a..1970fc8 100644 --- a/urlquery/ooapi.py +++ b/urlquery/ooapi.py @@ -15,6 +15,10 @@ base_url = 'https://uqapi.net/v3/json' gzip_default = False +# XXX: Would be nice if the below would return objects rather than JSON... +# dunno how well that would work with the whole flow of everything, but +# it might be interesting. + class URLQuery(object): __slots__ = ["_feed_type", "_intervals", "_priorities", "_search_types", From 9dfd218ac6de28e53a85a5c20d13fe0773f7d489 Mon Sep 17 00:00:00 2001 From: Stefan Edwards Date: Sun, 22 Jun 2014 13:05:14 -0400 Subject: [PATCH 11/11] removed note that started the ooapi thought --- urlquery/api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/urlquery/api.py b/urlquery/api.py index 92927a9..7140377 100644 --- a/urlquery/api.py +++ b/urlquery/api.py @@ -15,9 +15,6 @@ base_url = 'https://uqapi.net/v3/json' gzip_default = False -# XXX: the sheer number of globals makes me wonder if -# this wouldn't be better as a class... - __feed_type = ['unfiltered', 'flagged'] __intervals = ['hour', 'day'] __priorities = ['urlfeed', 'low', 'medium', 'high']