|
1 | | -"""Collect and send usage analytics""" |
2 | | -from __future__ import unicode_literals |
3 | | - |
4 | | -import errno |
5 | 1 | import json |
6 | 2 | import logging |
7 | 3 | import os |
| 4 | +import platform |
| 5 | +import requests |
| 6 | +import sys |
| 7 | +import tempfile |
| 8 | +import uuid |
| 9 | + |
| 10 | +import distro |
8 | 11 |
|
9 | 12 | from dvc import __version__ |
10 | | -from dvc.utils import env2bool |
11 | | -from dvc.utils.compat import str |
| 13 | +from dvc.config import Config, to_bool |
| 14 | +from dvc.daemon import daemon |
| 15 | +from dvc.exceptions import NotDvcRepoError |
| 16 | +from dvc.lock import Lock, LockError |
| 17 | +from dvc.repo import Repo |
| 18 | +from dvc.scm import SCM |
| 19 | +from dvc.utils import env2bool, is_binary, makedirs |
| 20 | +from dvc.utils.compat import str, FileNotFoundError |
12 | 21 |
|
13 | 22 |
|
14 | 23 | logger = logging.getLogger(__name__) |
15 | 24 |
|
16 | 25 |
|
17 | | -class Analytics(object): |
18 | | - """Class for collecting and sending usage analytics. |
19 | | -
|
20 | | - Args: |
21 | | - info (dict): optional existing analytics report. |
| 26 | +def collect_and_send_report(args=None, return_code=None): |
22 | 27 | """ |
| 28 | + Collect information from the runtime/environment and the command |
| 29 | + being executed into a report and send it over the network. |
23 | 30 |
|
24 | | - URL = "https://analytics.dvc.org" |
25 | | - TIMEOUT_POST = 5 |
| 31 | + To prevent analytics from blocking the execution of the main thread, |
| 32 | + sending the report is done in a separate process. |
26 | 33 |
|
27 | | - USER_ID_FILE = "user_id" |
| 34 | + The inter-process communication happens through a file containing the |
| 35 | + report as a JSON, where the _collector_ generates it and the _sender_ |
| 36 | + removes it after sending it. |
| 37 | + """ |
| 38 | + report = _runtime_info() |
28 | 39 |
|
29 | | - PARAM_DVC_VERSION = "dvc_version" |
30 | | - PARAM_USER_ID = "user_id" |
31 | | - PARAM_SYSTEM_INFO = "system_info" |
| 40 | + # Include command execution information on the report only when available. |
| 41 | + if args and hasattr(args, "func"): |
| 42 | + report.update({"cmd_class": args.func.__name__}) |
32 | 43 |
|
33 | | - PARAM_OS = "os" |
| 44 | + if return_code is not None: |
| 45 | + report.update({"cmd_return_code": return_code}) |
34 | 46 |
|
35 | | - PARAM_WINDOWS_VERSION_MAJOR = "windows_version_major" |
36 | | - PARAM_WINDOWS_VERSION_MINOR = "windows_version_minor" |
37 | | - PARAM_WINDOWS_VERSION_BUILD = "windows_version_build" |
38 | | - PARAM_WINDOWS_VERSION_SERVICE_PACK = "windows_version_service_pack" |
| 47 | + with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: |
| 48 | + json.dump(report, fobj) |
| 49 | + daemon(["analytics", fobj.name]) |
39 | 50 |
|
40 | | - PARAM_MAC_VERSION = "mac_version" |
41 | 51 |
|
42 | | - PARAM_LINUX_DISTRO = "linux_distro" |
43 | | - PARAM_LINUX_DISTRO_VERSION = "linux_distro_version" |
44 | | - PARAM_LINUX_DISTRO_LIKE = "linux_distro_like" |
| 52 | +def is_enabled(): |
| 53 | + if env2bool("DVC_TEST"): |
| 54 | + return False |
45 | 55 |
|
46 | | - PARAM_SCM_CLASS = "scm_class" |
47 | | - PARAM_IS_BINARY = "is_binary" |
48 | | - PARAM_CMD_CLASS = "cmd_class" |
49 | | - PARAM_CMD_RETURN_CODE = "cmd_return_code" |
| 56 | + enabled = to_bool( |
| 57 | + Config(validate=False) |
| 58 | + .config.get(Config.SECTION_CORE, {}) |
| 59 | + .get(Config.SECTION_CORE_ANALYTICS, "true") |
| 60 | + ) |
50 | 61 |
|
51 | | - def __init__(self, info=None): |
52 | | - from dvc.config import Config |
53 | | - from dvc.lock import Lock |
| 62 | + logger.debug("Analytics is {}abled.".format("en" if enabled else "dis")) |
54 | 63 |
|
55 | | - if info is None: |
56 | | - info = {} |
| 64 | + return enabled |
57 | 65 |
|
58 | | - self.info = info |
59 | 66 |
|
60 | | - cdir = Config.get_global_config_dir() |
61 | | - try: |
62 | | - os.makedirs(cdir) |
63 | | - except OSError as exc: |
64 | | - if exc.errno != errno.EEXIST: |
65 | | - raise |
| 67 | +def send(report): |
| 68 | + """ |
| 69 | + Side effect: Removes the report after sending it. |
66 | 70 |
|
67 | | - self.user_id_file = os.path.join(cdir, self.USER_ID_FILE) |
68 | | - self.user_id_file_lock = Lock(self.user_id_file + ".lock") |
| 71 | + The report is generated and stored in a temporary file, see: |
| 72 | + `collect_and_send_report`. Sending happens on another process, |
| 73 | + thus, the need of removing such file afterwards. |
| 74 | + """ |
| 75 | + url = "https://analytics.dvc.org" |
| 76 | + headers = {"content-type": "application/json"} |
69 | 77 |
|
70 | | - @staticmethod |
71 | | - def load(path): |
72 | | - """Loads analytics report from json file specified by path. |
| 78 | + with open(report, "rb") as fobj: |
| 79 | + requests.post(url, data=fobj, headers=headers, timeout=5) |
73 | 80 |
|
74 | | - Args: |
75 | | - path (str): path to json file with analytics report. |
76 | | - """ |
77 | | - with open(path, "r") as fobj: |
78 | | - analytics = Analytics(info=json.load(fobj)) |
79 | | - os.unlink(path) |
80 | | - return analytics |
| 81 | + os.remove(report) |
81 | 82 |
|
82 | | - def _write_user_id(self): |
83 | | - import uuid |
84 | 83 |
|
85 | | - with open(self.user_id_file, "w+") as fobj: |
86 | | - user_id = str(uuid.uuid4()) |
87 | | - info = {self.PARAM_USER_ID: user_id} |
88 | | - json.dump(info, fobj) |
89 | | - return user_id |
| 84 | +def _scm_in_use(): |
| 85 | + try: |
| 86 | + scm = SCM(root_dir=Repo.find_root()) |
| 87 | + return type(scm).__name__ |
| 88 | + except NotDvcRepoError: |
| 89 | + pass |
90 | 90 |
|
91 | | - def _read_user_id(self): |
92 | | - if not os.path.exists(self.user_id_file): |
93 | | - return None |
94 | 91 |
|
95 | | - with open(self.user_id_file, "r") as fobj: |
96 | | - try: |
97 | | - info = json.load(fobj) |
98 | | - except ValueError as exc: |
99 | | - logger.debug("Failed to load user_id: {}".format(exc)) |
100 | | - return None |
101 | | - |
102 | | - return info[self.PARAM_USER_ID] |
103 | | - |
104 | | - def _get_user_id(self): |
105 | | - from dvc.lock import LockError |
| 92 | +def _runtime_info(): |
| 93 | + """ |
| 94 | + Gather information from the environment where DVC runs to fill a report. |
| 95 | + """ |
| 96 | + return { |
| 97 | + "dvc_version": __version__, |
| 98 | + "is_binary": is_binary(), |
| 99 | + "scm_class": _scm_in_use(), |
| 100 | + "system_info": _system_info(), |
| 101 | + "user_id": _find_or_create_user_id(), |
| 102 | + } |
106 | 103 |
|
107 | | - try: |
108 | | - with self.user_id_file_lock: |
109 | | - user_id = self._read_user_id() |
110 | | - if user_id is None: |
111 | | - user_id = self._write_user_id() |
112 | | - return user_id |
113 | | - except LockError: |
114 | | - msg = "Failed to acquire '{}'" |
115 | | - logger.debug(msg.format(self.user_id_file_lock.lockfile)) |
116 | | - |
117 | | - def _collect_windows(self): |
118 | | - import sys |
119 | 104 |
|
120 | | - version = sys.getwindowsversion() # pylint: disable=no-member |
121 | | - info = {} |
122 | | - info[self.PARAM_OS] = "windows" |
123 | | - info[self.PARAM_WINDOWS_VERSION_MAJOR] = version.major |
124 | | - info[self.PARAM_WINDOWS_VERSION_MINOR] = version.minor |
125 | | - info[self.PARAM_WINDOWS_VERSION_BUILD] = version.build |
126 | | - info[self.PARAM_WINDOWS_VERSION_SERVICE_PACK] = version.service_pack |
127 | | - return info |
128 | | - |
129 | | - def _collect_darwin(self): |
130 | | - import platform |
131 | | - |
132 | | - info = {} |
133 | | - info[self.PARAM_OS] = "mac" |
134 | | - info[self.PARAM_MAC_VERSION] = platform.mac_ver()[0] |
135 | | - return info |
136 | | - |
137 | | - def _collect_linux(self): |
138 | | - import distro |
139 | | - |
140 | | - info = {} |
141 | | - info[self.PARAM_OS] = "linux" |
142 | | - info[self.PARAM_LINUX_DISTRO] = distro.id() |
143 | | - info[self.PARAM_LINUX_DISTRO_VERSION] = distro.version() |
144 | | - info[self.PARAM_LINUX_DISTRO_LIKE] = distro.like() |
145 | | - return info |
146 | | - |
147 | | - def _collect_system_info(self): |
148 | | - import platform |
149 | | - |
150 | | - system = platform.system() |
| 105 | +def _system_info(): |
| 106 | + system = platform.system() |
151 | 107 |
|
152 | | - if system == "Windows": |
153 | | - return self._collect_windows() |
| 108 | + if system == "Windows": |
| 109 | + version = sys.getwindowsversion() |
154 | 110 |
|
155 | | - if system == "Darwin": |
156 | | - return self._collect_darwin() |
157 | | - |
158 | | - if system == "Linux": |
159 | | - return self._collect_linux() |
160 | | - |
161 | | - raise NotImplementedError |
162 | | - |
163 | | - def collect(self): |
164 | | - """Collect analytics report.""" |
165 | | - from dvc.scm import SCM |
166 | | - from dvc.utils import is_binary |
167 | | - from dvc.repo import Repo |
168 | | - from dvc.exceptions import NotDvcRepoError |
169 | | - |
170 | | - self.info[self.PARAM_DVC_VERSION] = __version__ |
171 | | - self.info[self.PARAM_IS_BINARY] = is_binary() |
172 | | - self.info[self.PARAM_USER_ID] = self._get_user_id() |
173 | | - |
174 | | - self.info[self.PARAM_SYSTEM_INFO] = self._collect_system_info() |
175 | | - |
176 | | - try: |
177 | | - scm = SCM(root_dir=Repo.find_root()) |
178 | | - self.info[self.PARAM_SCM_CLASS] = type(scm).__name__ |
179 | | - except NotDvcRepoError: |
180 | | - pass |
181 | | - |
182 | | - def collect_cmd(self, args, ret): |
183 | | - """Collect analytics info from a CLI command.""" |
184 | | - from dvc.command.daemon import CmdDaemonAnalytics |
185 | | - |
186 | | - assert isinstance(ret, int) or ret is None |
187 | | - |
188 | | - if ret is not None: |
189 | | - self.info[self.PARAM_CMD_RETURN_CODE] = ret |
190 | | - |
191 | | - if args is not None and hasattr(args, "func"): |
192 | | - assert args.func != CmdDaemonAnalytics |
193 | | - self.info[self.PARAM_CMD_CLASS] = args.func.__name__ |
194 | | - |
195 | | - def dump(self): |
196 | | - """Save analytics report to a temporary file. |
197 | | -
|
198 | | - Returns: |
199 | | - str: path to the temporary file that contains the analytics report. |
200 | | - """ |
201 | | - import tempfile |
| 111 | + return { |
| 112 | + "os": "windows", |
| 113 | + "windows_version_build": version.build, |
| 114 | + "windows_version_major": version.major, |
| 115 | + "windows_version_minor": version.minor, |
| 116 | + "windows_version_service_pack": version.service_pack, |
| 117 | + } |
202 | 118 |
|
203 | | - with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: |
204 | | - json.dump(self.info, fobj) |
205 | | - return fobj.name |
| 119 | + if system == "Darwin": |
| 120 | + return {"os": "mac", "mac_version": platform.mac_ver()[0]} |
206 | 121 |
|
207 | | - @staticmethod |
208 | | - def is_enabled(cmd=None): |
209 | | - from dvc.config import Config, to_bool |
210 | | - from dvc.command.daemon import CmdDaemonBase |
| 122 | + if system == "Linux": |
| 123 | + return { |
| 124 | + "os": "linux", |
| 125 | + "linux_distro": distro.id(), |
| 126 | + "linux_distro_like": distro.like(), |
| 127 | + "linux_distro_version": distro.version(), |
| 128 | + } |
211 | 129 |
|
212 | | - if env2bool("DVC_TEST"): |
213 | | - return False |
| 130 | + # We don't collect data for any other system. |
| 131 | + raise NotImplementedError |
214 | 132 |
|
215 | | - if isinstance(cmd, CmdDaemonBase): |
216 | | - return False |
217 | 133 |
|
218 | | - core = Config(validate=False).config.get(Config.SECTION_CORE, {}) |
219 | | - enabled = to_bool(core.get(Config.SECTION_CORE_ANALYTICS, "true")) |
220 | | - logger.debug( |
221 | | - "Analytics is {}.".format("enabled" if enabled else "disabled") |
222 | | - ) |
223 | | - return enabled |
| 134 | +def _find_or_create_user_id(): |
| 135 | + """ |
| 136 | + The user's ID is stored on a file under the global config directory. |
224 | 137 |
|
225 | | - @staticmethod |
226 | | - def send_cmd(cmd, args, ret): |
227 | | - """Collect and send analytics for CLI command. |
| 138 | + The file should contain a JSON with a "user_id" key: |
228 | 139 |
|
229 | | - Args: |
230 | | - args (list): parsed args for the CLI command. |
231 | | - ret (int): return value of the CLI command. |
232 | | - """ |
233 | | - from dvc.daemon import daemon |
| 140 | + {"user_id": "16fd2706-8baf-433b-82eb-8c7fada847da"} |
234 | 141 |
|
235 | | - if not Analytics.is_enabled(cmd): |
236 | | - return |
| 142 | + IDs are generated randomly with UUID. |
| 143 | + """ |
| 144 | + config_dir = Config.get_global_config_dir() |
| 145 | + fname = os.path.join(config_dir, "user_id") |
| 146 | + lockfile = os.path.join(config_dir, "user_id.lock") |
237 | 147 |
|
238 | | - analytics = Analytics() |
239 | | - analytics.collect_cmd(args, ret) |
240 | | - daemon(["analytics", analytics.dump()]) |
| 148 | + # Since the `fname` and `lockfile` are under the global config, |
| 149 | + # we need to make sure such directory exist already. |
| 150 | + makedirs(config_dir, exist_ok=True) |
241 | 151 |
|
242 | | - def send(self): |
243 | | - """Collect and send analytics.""" |
244 | | - import requests |
| 152 | + try: |
| 153 | + with Lock(lockfile): |
| 154 | + try: |
| 155 | + with open(fname, "r") as fobj: |
| 156 | + user_id = json.load(fobj)["user_id"] |
245 | 157 |
|
246 | | - if not self.is_enabled(): |
247 | | - return |
| 158 | + except (FileNotFoundError, ValueError, KeyError): |
| 159 | + user_id = str(uuid.uuid4()) |
248 | 160 |
|
249 | | - self.collect() |
| 161 | + with open(fname, "w") as fobj: |
| 162 | + json.dump({"user_id": user_id}, fobj) |
250 | 163 |
|
251 | | - logger.debug("Sending analytics: {}".format(self.info)) |
| 164 | + return user_id |
252 | 165 |
|
253 | | - try: |
254 | | - requests.post(self.URL, json=self.info, timeout=self.TIMEOUT_POST) |
255 | | - except requests.exceptions.RequestException as exc: |
256 | | - logger.debug("Failed to send analytics: {}".format(str(exc))) |
| 166 | + except LockError: |
| 167 | + logger.debug("Failed to acquire {lockfile}".format(lockfile=lockfile)) |
0 commit comments