Skip to content

Commit 8890daf

Browse files
authored
Merge pull request #2826 from mroutis/refactor-analytics
analytics: refactor into a module
2 parents 24fc8be + 5dd6300 commit 8890daf

File tree

7 files changed

+253
-277
lines changed

7 files changed

+253
-277
lines changed

dvc/analytics.py

Lines changed: 119 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -1,256 +1,167 @@
1-
"""Collect and send usage analytics"""
2-
from __future__ import unicode_literals
3-
4-
import errno
51
import json
62
import logging
73
import os
4+
import platform
5+
import requests
6+
import sys
7+
import tempfile
8+
import uuid
9+
10+
import distro
811

912
from dvc import __version__
10-
from dvc.utils import env2bool
11-
from dvc.utils.compat import str
13+
from dvc.config import Config, to_bool
14+
from dvc.daemon import daemon
15+
from dvc.exceptions import NotDvcRepoError
16+
from dvc.lock import Lock, LockError
17+
from dvc.repo import Repo
18+
from dvc.scm import SCM
19+
from dvc.utils import env2bool, is_binary, makedirs
20+
from dvc.utils.compat import str, FileNotFoundError
1221

1322

1423
logger = logging.getLogger(__name__)
1524

1625

17-
class Analytics(object):
18-
"""Class for collecting and sending usage analytics.
19-
20-
Args:
21-
info (dict): optional existing analytics report.
26+
def collect_and_send_report(args=None, return_code=None):
2227
"""
28+
Collect information from the runtime/environment and the command
29+
being executed into a report and send it over the network.
2330
24-
URL = "https://analytics.dvc.org"
25-
TIMEOUT_POST = 5
31+
To prevent analytics from blocking the execution of the main thread,
32+
sending the report is done in a separate process.
2633
27-
USER_ID_FILE = "user_id"
34+
The inter-process communication happens through a file containing the
35+
report as a JSON, where the _collector_ generates it and the _sender_
36+
removes it after sending it.
37+
"""
38+
report = _runtime_info()
2839

29-
PARAM_DVC_VERSION = "dvc_version"
30-
PARAM_USER_ID = "user_id"
31-
PARAM_SYSTEM_INFO = "system_info"
40+
# Include command execution information on the report only when available.
41+
if args and hasattr(args, "func"):
42+
report.update({"cmd_class": args.func.__name__})
3243

33-
PARAM_OS = "os"
44+
if return_code is not None:
45+
report.update({"cmd_return_code": return_code})
3446

35-
PARAM_WINDOWS_VERSION_MAJOR = "windows_version_major"
36-
PARAM_WINDOWS_VERSION_MINOR = "windows_version_minor"
37-
PARAM_WINDOWS_VERSION_BUILD = "windows_version_build"
38-
PARAM_WINDOWS_VERSION_SERVICE_PACK = "windows_version_service_pack"
47+
with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj:
48+
json.dump(report, fobj)
49+
daemon(["analytics", fobj.name])
3950

40-
PARAM_MAC_VERSION = "mac_version"
4151

42-
PARAM_LINUX_DISTRO = "linux_distro"
43-
PARAM_LINUX_DISTRO_VERSION = "linux_distro_version"
44-
PARAM_LINUX_DISTRO_LIKE = "linux_distro_like"
52+
def is_enabled():
53+
if env2bool("DVC_TEST"):
54+
return False
4555

46-
PARAM_SCM_CLASS = "scm_class"
47-
PARAM_IS_BINARY = "is_binary"
48-
PARAM_CMD_CLASS = "cmd_class"
49-
PARAM_CMD_RETURN_CODE = "cmd_return_code"
56+
enabled = to_bool(
57+
Config(validate=False)
58+
.config.get(Config.SECTION_CORE, {})
59+
.get(Config.SECTION_CORE_ANALYTICS, "true")
60+
)
5061

51-
def __init__(self, info=None):
52-
from dvc.config import Config
53-
from dvc.lock import Lock
62+
logger.debug("Analytics is {}abled.".format("en" if enabled else "dis"))
5463

55-
if info is None:
56-
info = {}
64+
return enabled
5765

58-
self.info = info
5966

60-
cdir = Config.get_global_config_dir()
61-
try:
62-
os.makedirs(cdir)
63-
except OSError as exc:
64-
if exc.errno != errno.EEXIST:
65-
raise
67+
def send(report):
68+
"""
69+
Side effect: Removes the report after sending it.
6670
67-
self.user_id_file = os.path.join(cdir, self.USER_ID_FILE)
68-
self.user_id_file_lock = Lock(self.user_id_file + ".lock")
71+
The report is generated and stored in a temporary file, see:
72+
`collect_and_send_report`. Sending happens on another process,
73+
thus, the need of removing such file afterwards.
74+
"""
75+
url = "https://analytics.dvc.org"
76+
headers = {"content-type": "application/json"}
6977

70-
@staticmethod
71-
def load(path):
72-
"""Loads analytics report from json file specified by path.
78+
with open(report, "rb") as fobj:
79+
requests.post(url, data=fobj, headers=headers, timeout=5)
7380

74-
Args:
75-
path (str): path to json file with analytics report.
76-
"""
77-
with open(path, "r") as fobj:
78-
analytics = Analytics(info=json.load(fobj))
79-
os.unlink(path)
80-
return analytics
81+
os.remove(report)
8182

82-
def _write_user_id(self):
83-
import uuid
8483

85-
with open(self.user_id_file, "w+") as fobj:
86-
user_id = str(uuid.uuid4())
87-
info = {self.PARAM_USER_ID: user_id}
88-
json.dump(info, fobj)
89-
return user_id
84+
def _scm_in_use():
85+
try:
86+
scm = SCM(root_dir=Repo.find_root())
87+
return type(scm).__name__
88+
except NotDvcRepoError:
89+
pass
9090

91-
def _read_user_id(self):
92-
if not os.path.exists(self.user_id_file):
93-
return None
9491

95-
with open(self.user_id_file, "r") as fobj:
96-
try:
97-
info = json.load(fobj)
98-
except ValueError as exc:
99-
logger.debug("Failed to load user_id: {}".format(exc))
100-
return None
101-
102-
return info[self.PARAM_USER_ID]
103-
104-
def _get_user_id(self):
105-
from dvc.lock import LockError
92+
def _runtime_info():
93+
"""
94+
Gather information from the environment where DVC runs to fill a report.
95+
"""
96+
return {
97+
"dvc_version": __version__,
98+
"is_binary": is_binary(),
99+
"scm_class": _scm_in_use(),
100+
"system_info": _system_info(),
101+
"user_id": _find_or_create_user_id(),
102+
}
106103

107-
try:
108-
with self.user_id_file_lock:
109-
user_id = self._read_user_id()
110-
if user_id is None:
111-
user_id = self._write_user_id()
112-
return user_id
113-
except LockError:
114-
msg = "Failed to acquire '{}'"
115-
logger.debug(msg.format(self.user_id_file_lock.lockfile))
116-
117-
def _collect_windows(self):
118-
import sys
119104

120-
version = sys.getwindowsversion() # pylint: disable=no-member
121-
info = {}
122-
info[self.PARAM_OS] = "windows"
123-
info[self.PARAM_WINDOWS_VERSION_MAJOR] = version.major
124-
info[self.PARAM_WINDOWS_VERSION_MINOR] = version.minor
125-
info[self.PARAM_WINDOWS_VERSION_BUILD] = version.build
126-
info[self.PARAM_WINDOWS_VERSION_SERVICE_PACK] = version.service_pack
127-
return info
128-
129-
def _collect_darwin(self):
130-
import platform
131-
132-
info = {}
133-
info[self.PARAM_OS] = "mac"
134-
info[self.PARAM_MAC_VERSION] = platform.mac_ver()[0]
135-
return info
136-
137-
def _collect_linux(self):
138-
import distro
139-
140-
info = {}
141-
info[self.PARAM_OS] = "linux"
142-
info[self.PARAM_LINUX_DISTRO] = distro.id()
143-
info[self.PARAM_LINUX_DISTRO_VERSION] = distro.version()
144-
info[self.PARAM_LINUX_DISTRO_LIKE] = distro.like()
145-
return info
146-
147-
def _collect_system_info(self):
148-
import platform
149-
150-
system = platform.system()
105+
def _system_info():
106+
system = platform.system()
151107

152-
if system == "Windows":
153-
return self._collect_windows()
108+
if system == "Windows":
109+
version = sys.getwindowsversion()
154110

155-
if system == "Darwin":
156-
return self._collect_darwin()
157-
158-
if system == "Linux":
159-
return self._collect_linux()
160-
161-
raise NotImplementedError
162-
163-
def collect(self):
164-
"""Collect analytics report."""
165-
from dvc.scm import SCM
166-
from dvc.utils import is_binary
167-
from dvc.repo import Repo
168-
from dvc.exceptions import NotDvcRepoError
169-
170-
self.info[self.PARAM_DVC_VERSION] = __version__
171-
self.info[self.PARAM_IS_BINARY] = is_binary()
172-
self.info[self.PARAM_USER_ID] = self._get_user_id()
173-
174-
self.info[self.PARAM_SYSTEM_INFO] = self._collect_system_info()
175-
176-
try:
177-
scm = SCM(root_dir=Repo.find_root())
178-
self.info[self.PARAM_SCM_CLASS] = type(scm).__name__
179-
except NotDvcRepoError:
180-
pass
181-
182-
def collect_cmd(self, args, ret):
183-
"""Collect analytics info from a CLI command."""
184-
from dvc.command.daemon import CmdDaemonAnalytics
185-
186-
assert isinstance(ret, int) or ret is None
187-
188-
if ret is not None:
189-
self.info[self.PARAM_CMD_RETURN_CODE] = ret
190-
191-
if args is not None and hasattr(args, "func"):
192-
assert args.func != CmdDaemonAnalytics
193-
self.info[self.PARAM_CMD_CLASS] = args.func.__name__
194-
195-
def dump(self):
196-
"""Save analytics report to a temporary file.
197-
198-
Returns:
199-
str: path to the temporary file that contains the analytics report.
200-
"""
201-
import tempfile
111+
return {
112+
"os": "windows",
113+
"windows_version_build": version.build,
114+
"windows_version_major": version.major,
115+
"windows_version_minor": version.minor,
116+
"windows_version_service_pack": version.service_pack,
117+
}
202118

203-
with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj:
204-
json.dump(self.info, fobj)
205-
return fobj.name
119+
if system == "Darwin":
120+
return {"os": "mac", "mac_version": platform.mac_ver()[0]}
206121

207-
@staticmethod
208-
def is_enabled(cmd=None):
209-
from dvc.config import Config, to_bool
210-
from dvc.command.daemon import CmdDaemonBase
122+
if system == "Linux":
123+
return {
124+
"os": "linux",
125+
"linux_distro": distro.id(),
126+
"linux_distro_like": distro.like(),
127+
"linux_distro_version": distro.version(),
128+
}
211129

212-
if env2bool("DVC_TEST"):
213-
return False
130+
# We don't collect data for any other system.
131+
raise NotImplementedError
214132

215-
if isinstance(cmd, CmdDaemonBase):
216-
return False
217133

218-
core = Config(validate=False).config.get(Config.SECTION_CORE, {})
219-
enabled = to_bool(core.get(Config.SECTION_CORE_ANALYTICS, "true"))
220-
logger.debug(
221-
"Analytics is {}.".format("enabled" if enabled else "disabled")
222-
)
223-
return enabled
134+
def _find_or_create_user_id():
135+
"""
136+
The user's ID is stored on a file under the global config directory.
224137
225-
@staticmethod
226-
def send_cmd(cmd, args, ret):
227-
"""Collect and send analytics for CLI command.
138+
The file should contain a JSON with a "user_id" key:
228139
229-
Args:
230-
args (list): parsed args for the CLI command.
231-
ret (int): return value of the CLI command.
232-
"""
233-
from dvc.daemon import daemon
140+
{"user_id": "16fd2706-8baf-433b-82eb-8c7fada847da"}
234141
235-
if not Analytics.is_enabled(cmd):
236-
return
142+
IDs are generated randomly with UUID.
143+
"""
144+
config_dir = Config.get_global_config_dir()
145+
fname = os.path.join(config_dir, "user_id")
146+
lockfile = os.path.join(config_dir, "user_id.lock")
237147

238-
analytics = Analytics()
239-
analytics.collect_cmd(args, ret)
240-
daemon(["analytics", analytics.dump()])
148+
# Since the `fname` and `lockfile` are under the global config,
149+
# we need to make sure such directory exist already.
150+
makedirs(config_dir, exist_ok=True)
241151

242-
def send(self):
243-
"""Collect and send analytics."""
244-
import requests
152+
try:
153+
with Lock(lockfile):
154+
try:
155+
with open(fname, "r") as fobj:
156+
user_id = json.load(fobj)["user_id"]
245157

246-
if not self.is_enabled():
247-
return
158+
except (FileNotFoundError, ValueError, KeyError):
159+
user_id = str(uuid.uuid4())
248160

249-
self.collect()
161+
with open(fname, "w") as fobj:
162+
json.dump({"user_id": user_id}, fobj)
250163

251-
logger.debug("Sending analytics: {}".format(self.info))
164+
return user_id
252165

253-
try:
254-
requests.post(self.URL, json=self.info, timeout=self.TIMEOUT_POST)
255-
except requests.exceptions.RequestException as exc:
256-
logger.debug("Failed to send analytics: {}".format(str(exc)))
166+
except LockError:
167+
logger.debug("Failed to acquire {lockfile}".format(lockfile=lockfile))

dvc/command/daemon.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@ def run(self):
2424

2525
class CmdDaemonAnalytics(CmdDaemonBase):
2626
def run(self):
27-
from dvc.analytics import Analytics
27+
from dvc import analytics
2828

29-
analytics = Analytics.load(self.args.target)
30-
analytics.send()
29+
analytics.send(self.args.target)
3130

3231
return 0
3332

dvc/config.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313

1414
from dvc.exceptions import DvcException
1515
from dvc.exceptions import NotDvcRepoError
16-
from dvc.utils.compat import open
17-
from dvc.utils.compat import str
16+
from dvc.utils.compat import open, str
1817

1918
logger = logging.getLogger(__name__)
2019

0 commit comments

Comments
 (0)