Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

analytics: refactor into a module #2826

Merged
merged 46 commits into from Dec 9, 2019
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
ef1f038
analytics: refactor into a module
Nov 20, 2019
b618d03
analytics: return OS when system info not supported
Nov 26, 2019
7d3cbd7
analytics: collect system info under the same module
Nov 26, 2019
64d102c
analytics: move user_id module to a method
Nov 26, 2019
2e7a6d3
:nail_care: sort methods
Nov 26, 2019
5cb437b
analytics: move to single file
Nov 26, 2019
610d1ee
analytics: go back to daemon implementation
Nov 26, 2019
d958870
analytics: exit_code -> return_code
Nov 26, 2019
34d3004
:face_palm: fix wrong module
Nov 26, 2019
5261ed7
tests: analytics.find_or_create_user_id
Nov 26, 2019
467b110
py2: FileNotFoundError -> IOError
Nov 27, 2019
48b4cba
:nail_care: change naming and docstring
Nov 27, 2019
6bc2403
:nail_care: black
Nov 27, 2019
f995204
tests: functional and unit tests for analytics
Nov 27, 2019
2a5cac5
:nail_care: deepsource
Nov 27, 2019
e9d56a9
:nail_care: sort imports
Nov 27, 2019
787ea9c
tests: set temporary global config
Nov 27, 2019
e69fffa
py2 compat issues :shrug:
Nov 28, 2019
656f986
:nail_care: correct wording "disenabled" -> "disabled"
Nov 28, 2019
946dfc1
analytics: send report without loading it to memory
Dec 2, 2019
5596015
analytics: document why tmp_global_config is needed
Dec 2, 2019
2c8e149
analytics: use fspath instead of str
Dec 3, 2019
ca485db
analytics: define report schema and use it on tests
Dec 3, 2019
e0d47a1
:nail_care: formatting
Dec 3, 2019
8b5ca25
analytics: move report schema to tests
Dec 3, 2019
8a32b28
analytics: collect and send on daemon
Dec 3, 2019
624cbb2
:nail_care: more specific comment about analytics
Dec 3, 2019
285620d
tests: mock analytics.collect while testing daemon
Dec 3, 2019
02eaa9a
py35: support for fspath(pathlib.Path())
Dec 4, 2019
ede0103
py2: use convert_to_unicode instead of str
Dec 5, 2019
7a3aae1
tests: add unit test for analytics.system_info
Dec 5, 2019
05a6868
tests: isolate global config from analytics tests
Dec 5, 2019
71ead3f
analytics: use a tempfile for inter-process communication
Dec 5, 2019
7e8bdbd
remove pathlib / fspath changes related to the patch
Dec 6, 2019
bc2471c
tests: adjust scm_class schema
Dec 6, 2019
c82d9dc
compat: bring back unicode literals
Dec 6, 2019
f704e54
tests: stringify tmp_global_config since it doesnt return a pathlike …
Dec 6, 2019
da064e2
analytics: remove the report after sending it
Dec 6, 2019
5a80334
tests: use str, builtin_str in schema
Dec 6, 2019
2145daa
analytics: define private methods
Dec 6, 2019
c50bd17
analytics: collect execution info only when available
Dec 8, 2019
146e75b
analytics: raise error when collecting a not supported os
Dec 8, 2019
c9f958d
analytics: AttributeError -> KeyError
Dec 8, 2019
3244f10
:nail_care: add dot to the end of the comment
Dec 8, 2019
6e4c3d6
tests: require keys on analytics report schema
Dec 8, 2019
5dd6300
:nail_care: black
Dec 8, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
313 changes: 94 additions & 219 deletions dvc/analytics.py
Original file line number Diff line number Diff line change
@@ -1,256 +1,131 @@
"""Collect and send usage analytics"""
from __future__ import unicode_literals
efiop marked this conversation as resolved.
Show resolved Hide resolved

import errno
import json
import logging
import os
import platform
import requests
import sys
import tempfile
import uuid

from dvc import __version__
from dvc.utils import env2bool
from dvc.utils.compat import str
import distro

from dvc import __version__
from dvc.config import Config, to_bool
from dvc.daemon import daemon
from dvc.exceptions import NotDvcRepoError
from dvc.lock import Lock, LockError
from dvc.repo import Repo
from dvc.scm import SCM
from dvc.utils import env2bool, is_binary, makedirs
efiop marked this conversation as resolved.
Show resolved Hide resolved

logger = logging.getLogger(__name__)


class Analytics(object):
"""Class for collecting and sending usage analytics.

Args:
info (dict): optional existing analytics report.
def collect_and_send_report(arguments=None, exit_code=None):
This conversation was marked as resolved.
Show resolved Hide resolved
"""
Query the system to fill a report and send it on a detached process.

URL = "https://analytics.dvc.org"
TIMEOUT_POST = 5

USER_ID_FILE = "user_id"

PARAM_DVC_VERSION = "dvc_version"
PARAM_USER_ID = "user_id"
PARAM_SYSTEM_INFO = "system_info"

PARAM_OS = "os"

PARAM_WINDOWS_VERSION_MAJOR = "windows_version_major"
PARAM_WINDOWS_VERSION_MINOR = "windows_version_minor"
PARAM_WINDOWS_VERSION_BUILD = "windows_version_build"
PARAM_WINDOWS_VERSION_SERVICE_PACK = "windows_version_service_pack"

PARAM_MAC_VERSION = "mac_version"

PARAM_LINUX_DISTRO = "linux_distro"
PARAM_LINUX_DISTRO_VERSION = "linux_distro_version"
PARAM_LINUX_DISTRO_LIKE = "linux_distro_like"

PARAM_SCM_CLASS = "scm_class"
PARAM_IS_BINARY = "is_binary"
PARAM_CMD_CLASS = "cmd_class"
PARAM_CMD_RETURN_CODE = "cmd_return_code"

def __init__(self, info=None):
from dvc.config import Config
from dvc.lock import Lock

if info is None:
info = {}

self.info = info
A temporary file is used as a mean of communication between the
current and detached process.
"""
report = {
"cmd_class": arguments.func.__name__,
"cmd_return_code": exit_code,
"dvc_version": __version__,
"is_binary": is_binary(),
"scm_class": scm_in_use(),
"system_info": system_info(),
"user_id": find_or_create_user_id(),
}

cdir = Config.get_global_config_dir()
try:
os.makedirs(cdir)
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj:
json.dump(report, fobj)
daemon(["analytics", fobj.name])

self.user_id_file = os.path.join(cdir, self.USER_ID_FILE)
self.user_id_file_lock = Lock(self.user_id_file + ".lock")

@staticmethod
def load(path):
"""Loads analytics report from json file specified by path.
def is_enabled():
if env2bool("DVC_TEST"):
return False

Args:
path (str): path to json file with analytics report.
"""
with open(path, "r") as fobj:
analytics = Analytics(info=json.load(fobj))
os.unlink(path)
return analytics
enabled = to_bool(
Config(validate=False)
.config.get(Config.SECTION_CORE, {})
.get(Config.SECTION_CORE_ANALYTICS, "true")
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks ridiculous that we need to make this that hard. Why can't we just:

enabled = Config().config['core']['analytics']

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC In the current form we can only rely on core being present if it comes through the validator, as it will set the default values.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get what's wrong with my approach? It uses a validator, which will create core section and a default for analytics.


def _write_user_id(self):
import uuid
logger.debug("Analytics is {}enabled.".format("" if enabled else "dis"))
efiop marked this conversation as resolved.
Show resolved Hide resolved
This conversation was marked as resolved.
Show resolved Hide resolved

with open(self.user_id_file, "w+") as fobj:
user_id = str(uuid.uuid4())
info = {self.PARAM_USER_ID: user_id}
json.dump(info, fobj)
return user_id
return enabled

def _read_user_id(self):
if not os.path.exists(self.user_id_file):
return None

with open(self.user_id_file, "r") as fobj:
try:
info = json.load(fobj)
except ValueError as exc:
logger.debug("Failed to load user_id: {}".format(exc))
return None

return info[self.PARAM_USER_ID]
def send(path):
url = "https://analytics.dvc.org"

def _get_user_id(self):
from dvc.lock import LockError
with open(path) as fobj:
report = json.load(fobj)

try:
with self.user_id_file_lock:
user_id = self._read_user_id()
if user_id is None:
user_id = self._write_user_id()
return user_id
except LockError:
msg = "Failed to acquire '{}'"
logger.debug(msg.format(self.user_id_file_lock.lockfile))

def _collect_windows(self):
import sys
requests.post(url, json=report, timeout=5)
This conversation was marked as resolved.
Show resolved Hide resolved

version = sys.getwindowsversion() # pylint: disable=no-member
info = {}
info[self.PARAM_OS] = "windows"
info[self.PARAM_WINDOWS_VERSION_MAJOR] = version.major
info[self.PARAM_WINDOWS_VERSION_MINOR] = version.minor
info[self.PARAM_WINDOWS_VERSION_BUILD] = version.build
info[self.PARAM_WINDOWS_VERSION_SERVICE_PACK] = version.service_pack
return info

def _collect_darwin(self):
import platform

info = {}
info[self.PARAM_OS] = "mac"
info[self.PARAM_MAC_VERSION] = platform.mac_ver()[0]
return info

def _collect_linux(self):
import distro

info = {}
info[self.PARAM_OS] = "linux"
info[self.PARAM_LINUX_DISTRO] = distro.id()
info[self.PARAM_LINUX_DISTRO_VERSION] = distro.version()
info[self.PARAM_LINUX_DISTRO_LIKE] = distro.like()
return info

def _collect_system_info(self):
import platform

system = platform.system()

if system == "Windows":
return self._collect_windows()
def scm_in_use():
try:
scm = SCM(root_dir=Repo.find_root())
return type(scm).__name__
except NotDvcRepoError:
pass

if system == "Darwin":
return self._collect_darwin()

if system == "Linux":
return self._collect_linux()

raise NotImplementedError

def collect(self):
"""Collect analytics report."""
from dvc.scm import SCM
from dvc.utils import is_binary
from dvc.repo import Repo
from dvc.exceptions import NotDvcRepoError

self.info[self.PARAM_DVC_VERSION] = __version__
self.info[self.PARAM_IS_BINARY] = is_binary()
self.info[self.PARAM_USER_ID] = self._get_user_id()

self.info[self.PARAM_SYSTEM_INFO] = self._collect_system_info()

try:
scm = SCM(root_dir=Repo.find_root())
self.info[self.PARAM_SCM_CLASS] = type(scm).__name__
except NotDvcRepoError:
pass

def collect_cmd(self, args, ret):
"""Collect analytics info from a CLI command."""
from dvc.command.daemon import CmdDaemonAnalytics

assert isinstance(ret, int) or ret is None

if ret is not None:
self.info[self.PARAM_CMD_RETURN_CODE] = ret

if args is not None and hasattr(args, "func"):
assert args.func != CmdDaemonAnalytics
self.info[self.PARAM_CMD_CLASS] = args.func.__name__

def dump(self):
"""Save analytics report to a temporary file.

Returns:
str: path to the temporary file that contains the analytics report.
"""
import tempfile

with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj:
json.dump(self.info, fobj)
return fobj.name
def system_info():
system = platform.system()

@staticmethod
def is_enabled(cmd=None):
from dvc.config import Config, to_bool
from dvc.command.daemon import CmdDaemonBase
if system == "Windows":
version = sys.getwindowsversion()

if env2bool("DVC_TEST"):
return False
return {
"os": "windows",
"windows_version_build": version.build,
"windows_version_major": version.major,
"windows_version_minor": version.minor,
"windows_version_service_pack": version.service_pack,
}

if isinstance(cmd, CmdDaemonBase):
return False
if system == "Darwin":
return {"os": "mac", "mac_version": platform.mac_ver()[0]}

core = Config(validate=False).config.get(Config.SECTION_CORE, {})
enabled = to_bool(core.get(Config.SECTION_CORE_ANALYTICS, "true"))
logger.debug(
"Analytics is {}.".format("enabled" if enabled else "disabled")
)
return enabled
if system == "Linux":
return {
"os": "linux",
"linux_distro": distro.id(),
"linux_distro_like": distro.like(),
"linux_distro_version": distro.version(),
}

@staticmethod
def send_cmd(cmd, args, ret):
"""Collect and send analytics for CLI command.
return {"os": system.lower()}
This conversation was marked as resolved.
Show resolved Hide resolved

Args:
args (list): parsed args for the CLI command.
ret (int): return value of the CLI command.
"""
from dvc.daemon import daemon

if not Analytics.is_enabled(cmd):
return
def find_or_create_user_id():
"""
The user's ID is stored on a file under the global config directory.

analytics = Analytics()
analytics.collect_cmd(args, ret)
daemon(["analytics", analytics.dump()])
The file should contain a JSON with a "user_id" key:

def send(self):
"""Collect and send analytics."""
import requests
{"user_id": "16fd2706-8baf-433b-82eb-8c7fada847da"}

if not self.is_enabled():
return
IDs are generated randomly with UUID.
"""
config_dir = Config.get_global_config_dir()
fname = config_dir / "user_id"
lockfile = fname.with_suffix(".lock")

self.collect()
try:
with Lock(lockfile):
try:
user_id = json.load(fname.read_text())["user_id"]
except (FileNotFoundError, json.JSONDecodeError, AttributeError):
user_id = str(uuid.uuid4())
makedirs(fname.parent, exist_ok=True)
fname.write_text(json.dumps({"user_id": user_id}))

logger.debug("Sending analytics: {}".format(self.info))
return user_id

try:
requests.post(self.URL, json=self.info, timeout=self.TIMEOUT_POST)
except requests.exceptions.RequestException as exc:
logger.debug("Failed to send analytics: {}".format(str(exc)))
except LockError:
logger.debug("Failed to acquire {lock}".format(lockfile))
9 changes: 5 additions & 4 deletions dvc/command/daemon.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import unicode_literals

import os

from dvc.command.base import CmdBaseNoRepo
from dvc.command.base import fix_subparsers

Expand All @@ -10,7 +12,6 @@ class CmdDaemonBase(CmdBaseNoRepo):

class CmdDaemonUpdater(CmdDaemonBase):
def run(self):
import os
from dvc.repo import Repo
from dvc.updater import Updater

Expand All @@ -24,10 +25,10 @@ def run(self):

class CmdDaemonAnalytics(CmdDaemonBase):
def run(self):
from dvc.analytics import Analytics
from dvc import analytics

analytics = Analytics.load(self.args.target)
analytics.send()
analytics.send(self.args.target)
os.path.remove(self.args.target)

return 0

Expand Down
Loading