google-drive-list-shared.py

#!/usr/bin/env python

from __future__ import print_function
import time
import ast

from apiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools

from datetime import date
from csv import DictWriter

email_to_audit = False
SCOPES = 'https://www.googleapis.com/auth/drive.readonly.metadata'
output_filename = f'drive_audit_results-{date.today().isoformat()}.csv'

store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
    flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
    creds = tools.run_flow(flow, store)

service = discovery.build('drive', 'v3', http=creds.authorize(Http()))
results = service.files().list(
        pageSize=1000,
        fields="nextPageToken, files(name, shared, permissions, webViewLink)").execute()
token = results.get('nextPageToken', None)
items = results.get('files', [])

while token is not None:
    results = service.files().list(
            pageSize=1000,
            pageToken=token,
            fields="nextPageToken, files(name, shared, permissions, webViewLink)").execute()

    # Store the new nextPageToken on each loop iteration
    token = results.get('nextPageToken', None)
    # Append the next set of results to the items variable
    items.extend(results.get('files', []))
    print(f'Loaded {len(items)} files so far')

# The Google Drive does not return valid JSON because the property
# names are not enclosed in double quotes, they are enclosed in
# single quotes. So, use Python AST to convert the string to an
# iterable list.
items_dict = ast.literal_eval(str(items))

print("You have", len(items_dict), "files in Google Drive\n")

with open(output_filename, 'w') as output_file:
    # Iterate through the items list and only show files that have
    # shared set to True.
    fieldnames = ['name', 'link', 'shared', 'shared_publicly', 'shared_with']
    if email_to_audit:
        fieldnames.append('shared_with_audited_email')
    writer = DictWriter(output_file, fieldnames=fieldnames)
    writer.writeheader()
    for i in range(len(items_dict)):
        item = items_dict[i]
        output_row = {
            'name': item.get('name', ''),
            'link': item.get('webViewLink', ''),
            'shared': item.get('shared', ''),
            'shared_with': ','.join([
                p.get('emailAddress', p.get('displayName', p.get('type', '')))
                for p in item.get('permissions', [])
            ])
        }

        output_row['shared_publicly'] = 'anyone' in output_row['shared_with']

        if email_to_audit:
            output_row['shared_with_audited_email'] = email_to_audit in output_row['shared_with']
        writer.writerow(output_row)