diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9cc1375 --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Whatsapp extractor +*/* +md5sum.txt diff --git a/README.md b/README.md index 2f9c7d4..95cae2a 100644 --- a/README.md +++ b/README.md @@ -1,32 +1,56 @@ -# WhatsApp Google Drive Extractor -Allows WhatsApp users on Android to extract their backed up WhatsApp data from Google Drive. +WhatsApp Google Drive Extractor +=============================== -###### BRANCH UPDATES: -v1.0 - Initial release. -v1.1 - Added Python 3 support. +Allows WhatsApp users on Android to extract their backed up WhatsApp data +from Google Drive. -###### PREREQUISITES: - 1. O/S: Windows Vista, Windows 7, Windows 8, Windows 10, Mac OS X or Linux - 2. Python 2.x or 3.x - If not installed: https://www.python.org/downloads/ - 3. Android device with WhatsApp installed and the Google Drive backup feature enabled - 4. Google services device id (if you want to reduce the risk of being logged out of Google) - Search Google Play for "device id" for plenty of apps that can reveal this information - 5. Google account login credentials (username and password) +Prerequisites +------------- + 1. [Python 3][PYTHON] + 2. Android device with WhatsApp installed and the Google Drive backup + feature enabled. + 3. The device's Android ID (if you want to reduce the risk of being logged + out of Google). Run `adb shell settings get secure android_id` or Search Google Play for "device id" for plenty of apps + that can reveal this information. + 4. Google account login credentials (username and password). Create and use an App password when using 2-factor authentication: https://myaccount.google.com/apppasswords -###### INSTRUCTIONS: - 1. Extract "WhatsApp-GD-Extractor-master.zip". - 2. Edit the [auth] section in "settings.cfg". - 3. Run python WhatsAppGDExtract.py from your command console. - 4. Read the usage examples that are displayed. - 5. Run any of the examples. - -###### TROUBLESHOOTING: - 1. Check you have the required imports installed (configparser and requests). - I.E.: pip install configparser requests +Instructions +------------ + 1. Extract `WhatsApp-GD-Extractor-master.zip`. + 2. Install dependencies: Run `python3 -m pip install -r requirements.txt` + from your command console. Make sure gpsoauth is the latest version. + 3. Edit the `[auth]` section in `settings.cfg`. + 4. Run `python3 WhatsAppGDExtract.py` from your command console. + 5. Read the usage examples that are displayed. + 6. Run any of the examples. -###### CREDITS: - AUTHOR: TripCode +If downloading is interrupted, the files that were received successfully +won't be re-downloaded when running the tool one more time. After +downloading, you may verify the integrity of the downloaded files using +`md5sum --check md5sum.txt` on Linux or [md5summer][MD5SUMMER] on Windows. + + +Troubleshooting +--------------- + + 1. Check that you have the required imports installed: `python3 -m pip + install gpsoauth` + 2. If you have `Error:Need Browser`, go to this url to solve the issue: + https://accounts.google.com/b/0/DisplayUnlockCaptcha + + +Credits +------- + +Author: TripCode + +Contributors: DrDeath1122 from XDA for the multi-threading backbone part, +YuriCosta for reverse engineering the new restore system + + +[MD5SUMMER]: http://md5summer.org/ +[PYTHON]: https://www.python.org/downloads/ diff --git a/WhatsAppGDExtract.py b/WhatsAppGDExtract.py index 808572e..6b406dd 100755 --- a/WhatsAppGDExtract.py +++ b/WhatsAppGDExtract.py @@ -1,193 +1,260 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -from configparser import ConfigParser +""" +usage: python3 {} help|info|list|sync + + help Show this help. + info Show WhatsApp backups. + list Show WhatsApp backup files. + sync Download all WhatsApp backups. +""" + +from base64 import b64decode +from getpass import getpass +from multiprocessing.pool import ThreadPool +from textwrap import dedent +import configparser +import gpsoauth +import hashlib import json import os -import re import requests import sys +import traceback + +def human_size(size): + for s in ["B", "kiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]: + if abs(size) < 1024: + break + size = int(size / 1024) + return "{}{}".format(size, s) + +def have_file(file, size, md5): + """ + Determine whether the named file's contents have the given size and hash. + """ + if not os.path.exists(file) or size != os.path.getsize(file): + return False + + digest = hashlib.md5() + with open(file, "br") as input: + while True: + b = input.read(8 * 1024) + if not b: + break + digest.update(b) + + return md5 == digest.digest() + +def download_file(file, stream): + """ + Download a file from the given stream. + """ + os.makedirs(os.path.dirname(file), exist_ok=True) + with open(file, "bw") as dest: + for chunk in stream.iter_content(chunk_size=None): + dest.write(chunk) -def getGoogleAccountTokenFromAuth(): - payload = {'Email':gmail, 'Passwd':passw, 'app':client_pkg, 'client_sig':client_sig, 'parentAndroidId':devid} - request = requests.post('https://android.clients.google.com/auth', data=payload) - token = re.search('Token=(.*?)\n', request.text) - if token: - return token.group(1) - else: - quit(request.text) - -def getGoogleDriveToken(token): - payload = {'Token':token, 'app':pkg, 'client_sig':sig, 'device':devid, 'google_play_services_version':client_ver, 'service':'oauth2:https://www.googleapis.com/auth/drive.appdata https://www.googleapis.com/auth/drive.file', 'has_permission':'1'} - request = requests.post('https://android.clients.google.com/auth', data=payload) - token = re.search('Auth=(.*?)\n', request.text) - if token: - return token.group(1) - else: - quit(request.text) - -def rawGoogleDriveRequest(bearer, url): - headers = {'Authorization': 'Bearer '+bearer} - request = requests.get(url, headers=headers) - return request.text - -def downloadFileGoogleDrive(bearer, url, local): - if not os.path.exists(os.path.dirname(local)): - os.makedirs(os.path.dirname(local)) - if os.path.isfile(local): - os.remove(local) - headers = {'Authorization': 'Bearer '+bearer} - request = requests.get(url, headers=headers, stream=True) - request.raw.decode_content = True - if request.status_code == 200: - with open(local, 'wb') as asset: - for chunk in request.iter_content(1024): - asset.write(chunk) - print('Downloaded: "'+local+'".') - -def gDriveFileMap(): - global bearer - data = rawGoogleDriveRequest(bearer, 'https://www.googleapis.com/drive/v2/files') - jres = json.loads(data) - backups = [] - for result in jres['items']: +class WaBackup: + """ + Provide access to WhatsApp backups stored in Google drive. + """ + def __init__(self, gmail, password, android_id): + token = gpsoauth.perform_master_login(gmail, password, android_id) + if "Token" not in token: + quit(token) + self.auth = gpsoauth.perform_oauth( + gmail, + token["Token"], + android_id, + "oauth2:https://www.googleapis.com/auth/drive.appdata", + "com.whatsapp", + "38a0f7d505fe18fec64fbf343ecaaaf310dbd799", + ) + + def get(self, path, params=None, **kwargs): try: - if result['title'] == 'gdrive_file_map': - backups.append((result['description'], rawGoogleDriveRequest(bearer, result['downloadUrl']))) - except: - pass - if len(backups) == 0: - quit('Unable to locate google drive file map for: '+pkg) - return backups + response = requests.get( + "https://backup.googleapis.com/v1/{}".format(path), + headers={"Authorization": "Bearer {}".format(self.auth["Auth"])}, + params=params, + **kwargs, + ) + response.raise_for_status() + except requests.exceptions.HTTPError as errh: + print ("\n\nHttp Error:",errh) + except requests.exceptions.ConnectionError as errc: + print ("\n\nError Connecting:",errc) + except requests.exceptions.Timeout as errt: + print ("\n\nTimeout Error:",errt) + except requests.exceptions.RequestException as err: + print ("\n\nOOps: Something Else",err) + return response + + def get_page(self, path, page_token=None): + return self.get( + path, + None if page_token is None else {"pageToken": page_token}, + ).json() + + def list_path(self, path): + last_component = path.split("/")[-1] + page_token = None + while True: + page = self.get_page(path, page_token) + for item in page[last_component]: + yield item + if "nextPageToken" not in page: + break + page_token = page["nextPageToken"] + + def backups(self): + return self.list_path("clients/wa/backups") + + def backup_files(self, backup): + return self.list_path("{}/files".format(backup["name"])) + + def fetch(self, file): + name = os.path.sep.join(file["name"].split("/")[3:]) + md5Hash = b64decode(file["md5Hash"], validate=True) + if not have_file(name, int(file["sizeBytes"]), md5Hash): + download_file( + name, + self.get(file["name"].replace("%", "%25").replace("+", "%2B"), {"alt": "media"}, stream=True) + ) + + return name, int(file["sizeBytes"]), md5Hash + + def fetch_all(self, backup, cksums): + num_files = 0 + total_size = 0 + with ThreadPool(10) as pool: + downloads = pool.imap_unordered( + lambda file: self.fetch(file), + self.backup_files(backup) + ) + for name, size, md5Hash in downloads: + num_files += 1 + total_size += size + print( + "\rProgress: {:7.3f}% {:60}".format( + 100 * total_size / int(backup["sizeBytes"]), + os.path.basename(name)[-60:] + ), + end="", + flush=True, + ) + + cksums.write("{md5Hash} *{name}\n".format( + name=name, + md5Hash=md5Hash.hex(), + )) + + print("\n{} files ({})".format(num_files, human_size(total_size))) + def getConfigs(): - global gmail, passw, devid, pkg, sig, client_pkg, client_sig, client_ver - config = ConfigParser() + config = configparser.ConfigParser() try: - config.read('settings.cfg') - gmail = config.get('auth', 'gmail') - passw = config.get('auth', 'passw') - devid = config.get('auth', 'devid') - pkg = config.get('app', 'pkg') - sig = config.get('app', 'sig') - client_pkg = config.get('client', 'pkg') - client_sig = config.get('client', 'sig') - client_ver = config.get('client', 'ver') - except(ConfigParser.NoSectionError, ConfigParser.NoOptionError): - quit('The "settings.cfg" file is missing or corrupt!') - -def jsonPrint(data): - print(json.dumps(json.loads(data), indent=4, sort_keys=True)) - -def localFileLog(md5): - logfile = 'logs'+os.path.sep+'files.log' - if not os.path.exists(os.path.dirname(logfile)): - os.makedirs(os.path.dirname(logfile)) - with open(logfile, 'a') as log: - log.write(md5+'\n') - -def localFileList(): - logfile = 'logs'+os.path.sep+'files.log' - if os.path.isfile(logfile): - flist = open(logfile, 'r') - return [line.split('\n') for line in flist.readlines()] - else: - open(logfile, 'w') - return localFileList() + config.read("settings.cfg") + gmail = config.get("auth", "gmail") + password = config.get("auth", "password", fallback="") + if not password: + try: + password = getpass("Enter your password for {}: ".format(gmail)) + except KeyboardInterrupt: + quit("\nCancelled!") + android_id = config.get("auth", "android_id") + return { + "android_id": android_id, + "gmail": gmail, + "password": password, + } + except (configparser.NoSectionError, configparser.NoOptionError): + quit("The 'settings.cfg' file is missing or corrupt!") def createSettingsFile(): - with open('settings.cfg', 'w') as cfg: - cfg.write('[auth]\ngmail = alias@gmail.com\npassw = yourpassword\ndevid = 0000000000000000\n\n[app]\npkg = com.whatsapp\nsig = 38a0f7d505fe18fec64fbf343ecaaaf310dbd799\n\n[client]\npkg = com.google.android.gms\nsig = 38918a453d07199354f8b19af05ec6562ced5788\nver = 9877000') - -def getSingleFile(data, asset): - data = json.loads(data) - for entries in data: - if entries['f'] == asset: - return entries['f'], entries['m'], entries['r'], entries['s'] - -def getMultipleFiles(data, folder): - files = localFileList() - data = json.loads(data) - for entries in data: - if any(entries['m'] in lists for lists in files) == False or 'database' in entries['f'].lower(): - local = folder+os.path.sep+entries['f'].replace("/", os.path.sep) - if os.path.isfile(local) and 'database' not in local.lower(): - quit('Skipped: "'+local+'".') - else: - downloadFileGoogleDrive(bearer, 'https://www.googleapis.com/drive/v2/files/'+entries['r']+'?alt=media', local) - localFileLog(entries['m']) - -def runMain(mode, asset, bID): - global bearer - if os.path.isfile('settings.cfg') == False: + with open("settings.cfg", "w") as cfg: + cfg.write(dedent(""" + [auth] + gmail = alias@gmail.com + # Optional. The account password or app password when using 2FA. + # You will be prompted if omitted. + password = yourpassword + # The result of "adb shell settings get secure android_id". + android_id = 0000000000000000 + """).lstrip()) + +def backup_info(backup): + metadata = json.loads(backup["metadata"]) + for size in "backupSize", "chatdbSize", "mediaSize", "videoSize": + metadata[size] = human_size(int(metadata[size])) + print("Backup {} Size:({}) Upload Time:{}".format(backup["name"].split("/")[-1], metadata["backupSize"], backup["updateTime"])) + print(" WhatsApp version : {}".format(metadata["versionOfAppWhenBackup"])) + try: + print(" Password protected: {}".format(metadata["passwordProtectedBackupEnabled"])) + except: + pass + print(" Messages : {} ({})".format(metadata["numOfMessages"], metadata["chatdbSize"])) + print(" Media files : {} ({})".format(metadata["numOfMediaFiles"], metadata["mediaSize"])) + print(" Photos : {}".format(metadata["numOfPhotos"])) + print(" Videos : included={} ({})".format(metadata["includeVideosInBackup"], metadata["videoSize"])) + +def main(args): + if len(args) != 2 or args[1] not in ("info", "list", "sync"): + quit(__doc__.format(args[0])) + + if not os.path.isfile("settings.cfg"): createSettingsFile() - getConfigs() - bearer = getGoogleDriveToken(getGoogleAccountTokenFromAuth()) - drives = gDriveFileMap() - if mode == 'info': - for i, drive in enumerate(drives): - if len(drives) > 1: - print("Backup: "+str(i)) - jsonPrint(drive[0]) - elif mode == 'list': - for i, drive in enumerate(drives): - if len(drives) > 1: - print("Backup: "+str(i)) - jsonPrint(drive[1]) - elif mode == 'pull': - try: - drive = drives[bID] - except IndexError: - quit("Invalid backup ID: " + str(bID)) - target = getSingleFile(drive[1], asset) - try: - f = target[0] - m = target[1] - r = target[2] - s = target[3] - except TypeError: - quit('Unable to locate: "'+asset+'".') - local = 'WhatsApp'+os.path.sep+f.replace("/", os.path.sep) - if os.path.isfile(local) and 'database' not in local.lower(): - quit('Skipped: "'+local+'".') - else: - downloadFileGoogleDrive(bearer, 'https://www.googleapis.com/drive/v2/files/'+r+'?alt=media', local) - localFileLog(m) - elif mode == 'sync': - for i, drive in enumerate(drives): - folder = 'WhatsApp' - if len(drives) > 1: - print('Backup: '+str(i)) - folder = 'WhatsApp-' + str(i) - getMultipleFiles(drive[1], folder) - -def main(): - args = len(sys.argv) - if args < 2 or str(sys.argv[1]) == '-help' or str(sys.argv[1]) == 'help': - print('\nUsage: '+str(sys.argv[0])+' -help|-vers|-info|-list|-sync|-pull file [backupID]\n\nExamples:\n') - print('python '+str(sys.argv[0])+' -help (this help screen)') - print('python '+str(sys.argv[0])+' -vers (version information)') - print('python '+str(sys.argv[0])+' -info (google drive app settings)') - print('python '+str(sys.argv[0])+' -list (list all availabe files)') - print('python '+str(sys.argv[0])+' -sync (sync all files locally)') - print('python '+str(sys.argv[0])+' -pull "Databases/msgstore.db.crypt12" [backupID] (download)\n') - elif str(sys.argv[1]) == '-info' or str(sys.argv[1]) == 'info': - runMain('info', 'settings', 0) - elif str(sys.argv[1]) == '-list' or str(sys.argv[1]) == 'list': - runMain('list', 'all', 0) - elif str(sys.argv[1]) == '-sync' or str(sys.argv[1]) == 'sync': - runMain('sync', 'all', 0) - elif str(sys.argv[1]) == '-vers' or str(sys.argv[1]) == 'vers': - print('\nWhatsAppGDExtract Version 1.1 Copyright (C) 2016 by TripCode\n') - elif args < 3: - quit('\nUsage: python '+str(sys.argv[0])+' -help|-vers|-info|-list|-sync|-pull file [backupID]\n') - elif str(sys.argv[1]) == '-pull' or str(sys.argv[1]) == 'pull': - try: - bID = int(sys.argv[3]) - except (IndexError, ValueError): - bID = 0 - runMain('pull', str(sys.argv[2]), bID) - else: - quit('\nUsage: python '+str(sys.argv[0])+' -help|-vers|-info|-list|-sync|-pull file [backupID]\n') + wa_backup = WaBackup(**getConfigs()) + backups = wa_backup.backups() + + if args[1] == "info": + for backup in backups: + answer = input("\nDo you want {}? [y/n] : ".format(backup["name"].split("/")[-1])) + if not answer or answer[0].lower() != 'y': + continue + backup_info(backup) + + elif args[1] == "list": + for backup in backups: + answer = input("\nDo you want {}? [y/n] : ".format(backup["name"].split("/")[-1])) + if not answer or answer[0].lower() != 'y': + continue + num_files = 0 + total_size = 0 + for file in wa_backup.backup_files(backup): + try: + num_files += 1 + total_size += int(file["sizeBytes"]) + print(os.path.sep.join(file["name"].split("/")[3:])) + except: + print("\n#####\n\nWarning: Unexpected error in file: {}\n\nDetail: {}\n\nException: {}\n\n#####\n".format( + os.path.sep.join(file["name"].split("/")[3:]), + json.dumps(file, indent=4, sort_keys=True), + traceback.format_exc() + )) + input("Press the key to continue...") + continue + print("{} files ({})".format(num_files, human_size(total_size))) + + elif args[1] == "sync": + with open("md5sum.txt", "w", encoding="utf-8", buffering=1) as cksums: + for backup in backups: + try: + answer = input("\nDo you want {}? [y/n] : ".format(backup["name"].split("/")[-1])) + if not answer or answer[0].lower() != 'y': + continue + print("Backup Size:{} Upload Time: {}".format(human_size(int(backup["sizeBytes"])), backup["updateTime"])) + wa_backup.fetch_all(backup, cksums) + except Exception as err: + print("\n#####\n\nWarning: Unexpected error in backup: {} (Size:{} Upload Time: {})\n\nException: {}\n\n#####\n".format( + backup["name"].split("/")[-1], + human_size(int(backup["sizeBytes"])), + backup["updateTime"], + traceback.format_exc() + )) + input("Press the key to continue...") if __name__ == "__main__": - main() + main(sys.argv) diff --git a/logs/files.log b/logs/files.log deleted file mode 100644 index 8b13789..0000000 --- a/logs/files.log +++ /dev/null @@ -1 +0,0 @@ - diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..dbe1a4e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +gpsoauth==1.1.1 +urllib3<2 diff --git a/settings.cfg b/settings.cfg index 400d974..936b55b 100644 --- a/settings.cfg +++ b/settings.cfg @@ -1,13 +1,9 @@ [auth] gmail = alias@gmail.com -passw = yourpassword -devid = 0000000000000000 -[app] -pkg = com.whatsapp -sig = 38a0f7d505fe18fec64fbf343ecaaaf310dbd799 +# The account app password ou plain text password +# You will be prompted if omitted. +password = -[client] -pkg = com.google.android.gms -sig = 38918a453d07199354f8b19af05ec6562ced5788 -ver = 9877000 \ No newline at end of file +# The result of "adb shell settings get secure android_id". +android_id = 0000000000000000