Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
gouline authored May 26, 2020
1 parent b73a9de commit 5ba0b6f
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 41 deletions.
Empty file added setup.cfg
Empty file.
3 changes: 2 additions & 1 deletion setup.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from setuptools import setup

with open('README.md') as f:
long_description = f.read()
long_description = f.read()

setup(
name="pipelinewise-tap-google-analytics",
Expand All @@ -20,6 +20,7 @@
"pipelinewise-singer-python==1.*",
"google-api-python-client==1.7.9",
"oauth2client==4.1.3",
"backoff==1.3.2"
],
entry_points="""
[console_scripts]
Expand Down
90 changes: 50 additions & 40 deletions tap_google_analytics/defaults/default_report_definition.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
[
{ "name" : "website_overview",
"dimensions" :
[
{
"name": "website_overview",
"dimensions": [
"ga:date"
],
"metrics" :
[
"metrics": [
"ga:users",
"ga:newUsers",
"ga:sessions",
Expand All @@ -18,16 +17,15 @@
"ga:exitRate"
]
},
{ "name" : "traffic_sources",
"dimensions" :
[
{
"name": "traffic_sources",
"dimensions": [
"ga:date",
"ga:source",
"ga:medium",
"ga:socialNetwork"
],
"metrics" :
[
"metrics": [
"ga:users",
"ga:newUsers",
"ga:sessions",
Expand All @@ -40,15 +38,14 @@
"ga:exitRate"
]
},
{ "name" : "pages",
"dimensions" :
[
{
"name": "pages",
"dimensions": [
"ga:date",
"ga:hostname",
"ga:pagePath"
],
"metrics" :
[
"metrics": [
"ga:pageviews",
"ga:uniquePageviews",
"ga:avgTimeOnPage",
Expand All @@ -59,9 +56,9 @@
"ga:exitRate"
]
},
{ "name" : "locations",
"dimensions" :
[
{
"name": "locations",
"dimensions": [
"ga:date",
"ga:continent",
"ga:subContinent",
Expand All @@ -70,8 +67,7 @@
"ga:metro",
"ga:city"
],
"metrics" :
[
"metrics": [
"ga:users",
"ga:newUsers",
"ga:sessions",
Expand All @@ -84,46 +80,60 @@
"ga:exitRate"
]
},
{ "name" : "monthly_active_users",
"dimensions" :
[
{
"name": "monthly_active_users",
"dimensions": [
"ga:date"
],
"metrics" :
[
"metrics": [
"ga:30dayUsers"
]
},
{ "name" : "weekly_active_users",
"dimensions" :
[
{
"name": "four_weekly_active_users",
"dimensions": [
"ga:date"
],
"metrics" :
[
"metrics": [
"ga:28dayUsers"
]
},
{
"name": "two_weekly_active_users",
"dimensions": [
"ga:date"
],
"metrics": [
"ga:14dayUsers"
]
},
{
"name": "weekly_active_users",
"dimensions": [
"ga:date"
],
"metrics": [
"ga:7dayUsers"
]
},
{ "name" : "daily_active_users",
"dimensions" :
[
{
"name": "daily_active_users",
"dimensions": [
"ga:date"
],
"metrics" :
[
"metrics": [
"ga:1dayUsers"
]
},
{ "name" : "devices",
"dimensions" :
[
{
"name": "devices",
"dimensions": [
"ga:date",
"ga:deviceCategory",
"ga:operatingSystem",
"ga:browser"
],
"metrics" :
[
"metrics": [
"ga:users",
"ga:newUsers",
"ga:sessions",
Expand Down
7 changes: 7 additions & 0 deletions tap_google_analytics/ga_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def initialize_credentials(self, config):

def initialize_analyticsreporting(self):
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
Expand All @@ -100,10 +101,13 @@ def fetch_metadata(self):
"""
Fetch the valid (dimensions, metrics) for the Analytics Reporting API
and their data types.
Returns:
A map of (dimensions, metrics) hashes
Each available dimension can be found in dimensions with its data type
as the value. e.g. dimensions['ga:userType'] == STRING
Each available metric can be found in metrics with its data type
as the value. e.g. metrics['ga:sessions'] == INTEGER
"""
Expand Down Expand Up @@ -229,6 +233,7 @@ def generate_report_definition(self, stream):
giveup=is_fatal_error)
def query_api(self, report_definition, pageToken=None):
"""Queries the Analytics Reporting API V4.
Returns:
The Analytics Reporting API V4 response.
"""
Expand All @@ -249,8 +254,10 @@ def query_api(self, report_definition, pageToken=None):

def process_response(self, response):
"""Processes the Analytics Reporting API V4 response.
Args:
response: An Analytics Reporting API V4 response.
Returns: (nextPageToken, results)
nextPageToken: The next Page Token
If it is not None then the maximum pageSize has been reached
Expand Down
8 changes: 8 additions & 0 deletions tap_google_analytics/reports_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def __init__(self, config, reports_definition):
def generate_catalog(self):
"""
Generate the catalog based on the reports definition
Assumptions:
+ All streams and attributes are automatically included
+ All dimensions are also defined as keys
Expand All @@ -27,18 +28,25 @@ def generate_catalog(self):
to the schema.
This is important for defining the date range the records are for,
especially when 'ga:date' is not part of the requested Dimensions.
If 'ga:date' has not been added as one of the Dimensions, then the
{start_date, end_date} attributes are also added as keys.
For example, if a user requests to see user stats by device or by source,
the {start_date, end_date} can be used as part of the key uniquelly
identifying the generated stats.
That way we can properly identify and update rows over overlapping
runs of the tap.
+ The available (dimensions, metrics) and their data type are dynamically
fetched using the GAClient.
We use those lists to validate the dimension or metric names requested
We also use those lists to set the data type for those attributes and
cast the values accordingly (in case of integer or numeric values)
Returns:
A valid Singer.io Catalog.
"""
Expand Down

0 comments on commit 5ba0b6f

Please sign in to comment.