-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PARO-720] ENH Model sharing helpers #315
Changes from 8 commits
83e8b1b
873d177
a666b69
9ac4a02
8c28769
612a2fe
dd5a8e8
cb59bc9
df1a9f3
15e9f16
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
from collections import namedtuple | ||
import logging | ||
|
||
from civis import APIClient | ||
from civis.ml._model import _PRED_TEMPLATES | ||
|
||
__all__ = ['list_models'] | ||
__all__ = ['list_models', 'put_models_shares_groups', | ||
'put_models_shares_users', 'delete_models_shares_groups', | ||
'delete_models_shares_users'] | ||
log = logging.getLogger(__name__) | ||
|
||
# sentinel value for default author value | ||
SENTINEL = namedtuple('Sentinel', [])() | ||
|
@@ -58,3 +62,261 @@ def list_models(job_type="train", author=SENTINEL, client=None, **kwargs): | |
author=author, | ||
**kwargs) | ||
return models | ||
|
||
|
||
def put_models_shares_users(id, user_ids, permission_level, | ||
client=None, | ||
share_email_body='DEFAULT', | ||
send_shared_email='DEFAULT'): | ||
"""Set the permissions users have on this object | ||
|
||
Use this on both training and scoring jobs. | ||
If used on a training job, note that "read" permission is | ||
sufficient to score the model. | ||
|
||
Parameters | ||
---------- | ||
id : integer | ||
The ID of the resource that is shared. | ||
user_ids : list | ||
An array of one or more user IDs. | ||
permission_level : string | ||
Options are: "read", "write", or "manage". | ||
client : :class:`civis.APIClient`, optional | ||
If not provided, an :class:`civis.APIClient` object will be | ||
created from the :envvar:`CIVIS_API_KEY`. | ||
share_email_body : string, optional | ||
Custom body text for e-mail sent on a share. | ||
send_shared_email : boolean, optional | ||
Send email to the recipients of a share. | ||
|
||
Returns | ||
------- | ||
readers : dict:: | ||
- users : list:: | ||
- id : integer | ||
- name : string | ||
- groups : list:: | ||
- id : integer | ||
- name : string | ||
writers : dict:: | ||
- users : list:: | ||
- id : integer | ||
- name : string | ||
- groups : list:: | ||
- id : integer | ||
- name : string | ||
owners : dict:: | ||
- users : list:: | ||
- id : integer | ||
- name : string | ||
- groups : list:: | ||
- id : integer | ||
- name : string | ||
total_user_shares : integer | ||
For owners, the number of total users shared. For writers and readers, | ||
the number of visible users shared. | ||
total_group_shares : integer | ||
For owners, the number of total groups shared. For writers and readers, | ||
the number of visible groups shared. | ||
""" | ||
kwargs = {} | ||
if send_shared_email != 'DEFAULT': | ||
kwargs['send_shared_email'] = send_shared_email | ||
if share_email_body != 'DEFAULT': | ||
kwargs['share_email_body'] = share_email_body | ||
return _share_model(id, user_ids, permission_level, entity_type='users', | ||
client=client, **kwargs) | ||
|
||
|
||
def put_models_shares_groups(id, group_ids, permission_level, | ||
client=None, | ||
share_email_body='DEFAULT', | ||
send_shared_email='DEFAULT'): | ||
"""Set the permissions groups have on this model. | ||
|
||
Use this on both training and scoring jobs. | ||
If used on a training job, note that "read" permission is | ||
sufficient to score the model. | ||
|
||
Parameters | ||
---------- | ||
id : integer | ||
The ID of the resource that is shared. | ||
group_ids : list | ||
An array of one or more group IDs. | ||
permission_level : string | ||
Options are: "read", "write", or "manage". | ||
client : :class:`civis.APIClient`, optional | ||
If not provided, an :class:`civis.APIClient` object will be | ||
created from the :envvar:`CIVIS_API_KEY`. | ||
share_email_body : string, optional | ||
Custom body text for e-mail sent on a share. | ||
send_shared_email : boolean, optional | ||
Send email to the recipients of a share. | ||
|
||
Returns | ||
------- | ||
readers : dict:: | ||
- users : list:: | ||
- id : integer | ||
- name : string | ||
- groups : list:: | ||
- id : integer | ||
- name : string | ||
writers : dict:: | ||
- users : list:: | ||
- id : integer | ||
- name : string | ||
- groups : list:: | ||
- id : integer | ||
- name : string | ||
owners : dict:: | ||
- users : list:: | ||
- id : integer | ||
- name : string | ||
- groups : list:: | ||
- id : integer | ||
- name : string | ||
total_user_shares : integer | ||
For owners, the number of total users shared. For writers and readers, | ||
the number of visible users shared. | ||
total_group_shares : integer | ||
For owners, the number of total groups shared. For writers and readers, | ||
the number of visible groups shared. | ||
""" | ||
kwargs = {} | ||
if send_shared_email != 'DEFAULT': | ||
kwargs['send_shared_email'] = send_shared_email | ||
if share_email_body != 'DEFAULT': | ||
kwargs['share_email_body'] = share_email_body | ||
return _share_model(id, group_ids, permission_level, entity_type='groups', | ||
client=client, **kwargs) | ||
|
||
|
||
def _share_model(job_id, entity_ids, permission_level, entity_type, | ||
client=None, **kwargs): | ||
"""Share a container job and all run outputs with requested entities""" | ||
client = client or APIClient() | ||
if entity_type not in ['groups', 'users']: | ||
raise ValueError("'entity_type' must be one of ['groups', 'users']. " | ||
"Got '{0}'.".format(entity_type)) | ||
|
||
log.debug("Sharing object %d with %s %s at permission level %s.", | ||
job_id, entity_type, entity_ids, permission_level) | ||
_func = getattr(client.scripts, "put_containers_shares_" + entity_type) | ||
result = _func(job_id, entity_ids, permission_level, **kwargs) | ||
|
||
# CivisML relies on several run outputs attached to each model run. | ||
# Go through and share all outputs on each run. | ||
runs = client.scripts.list_containers_runs(job_id, iterator=True) | ||
for run in runs: | ||
log.debug("Sharing outputs on %d, run %s.", job_id, run.id) | ||
outputs = client.scripts.list_containers_runs_outputs(job_id, run.id) | ||
for _output in outputs: | ||
if _output['object_type'] == 'File': | ||
_func = getattr(client.files, "put_shares_" + entity_type) | ||
if _output['name'] == 'log.txt' and permission_level == 'read': | ||
# Require "write" level permission or higher to view | ||
# the debug logs. | ||
continue | ||
obj_permission = permission_level | ||
elif _output['object_type'] == 'Project': | ||
_func = getattr(client.projects, "put_shares_" + entity_type) | ||
if permission_level == 'read': | ||
# Users must be able to add to projects to use the model | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just for my own understanding, why is this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The CivisML training job keeps a "scoring jobs" project as a run output. CivisML scoring jobs add themselves to this project so that users (and the Civis Platform UI) can link from training jobs to all dependent scoring jobs. Users need "write" permission to add things to projects. I reasoned that users would expect that "read" permission on a model would give them the ability to make their own scoring jobs based on it. Does that make sense? The project is the only thing which would prevent you from scoring with only read permissions, and the error you get is a bit cryptic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I didn't think about whether a "read" user should be able to score based off the original model rather than a clone. That reasoning makes sense to me, thanks. Follow-up question: what permissions does the original user have on a scoring job created by the new user in this way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original user wouldn't have any special permissions on the scoring job by default. If you don't have "read" permission on the scoring job, it wouldn't be visible to you in the project. I think this is expected behavior -- I could imagine the "scoring" project being filled with scoring jobs, and no one user being able to see all of them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good. I agree that this is expected behavior. |
||
obj_permission = 'write' | ||
else: | ||
obj_permission = permission_level | ||
elif _output['object_type'] == 'JSONValue': | ||
_func = getattr(client.json_values, | ||
"put_shares_" + entity_type) | ||
obj_permission = permission_level | ||
else: | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to issue a message to the debug log if you hit this condition? Also, are there any outputs where we'd expect to hit this condition? If you write OOS scores to a table, does that show up as a run output? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, a debug message definitely makes sense. I'll add that. I don't expect to ever hit this condition. I believe that Tables are the only possible run output not covered in this loop, and you can't grant people permissions on tables through the API endpoints. CivisML doesn't add any tables as run outputs, AFAIK. |
||
_oid = _output['object_id'] | ||
# Don't send share emails for any of the run outputs. | ||
_func(_oid, entity_ids, obj_permission, send_shared_email=False) | ||
|
||
return result | ||
|
||
|
||
def delete_models_shares_users(id, user_id, client=None): | ||
"""Revoke the permissions a user has on this object | ||
|
||
Use this function on both training and scoring jobs. | ||
|
||
Parameters | ||
---------- | ||
id : integer | ||
The ID of the resource that is shared. | ||
user_id : integer | ||
The ID of the user. | ||
client : :class:`civis.APIClient`, optional | ||
If not provided, an :class:`civis.APIClient` object will be | ||
created from the :envvar:`CIVIS_API_KEY`. | ||
|
||
Returns | ||
------- | ||
None | ||
Response code 204: success | ||
""" | ||
return _unshare_model(id, user_id, entity_type='users', client=client) | ||
|
||
|
||
def delete_models_shares_groups(id, group_id, client=None): | ||
"""Revoke the permissions a group has on this object | ||
|
||
Use this function on both training and scoring jobs. | ||
|
||
Parameters | ||
---------- | ||
id : integer | ||
The ID of the resource that is shared. | ||
group_id : integer | ||
The ID of the group. | ||
client : :class:`civis.APIClient`, optional | ||
If not provided, an :class:`civis.APIClient` object will be | ||
created from the :envvar:`CIVIS_API_KEY`. | ||
|
||
Returns | ||
------- | ||
None | ||
Response code 204: success | ||
""" | ||
return _unshare_model(id, group_id, entity_type='groups', client=client) | ||
|
||
|
||
def _unshare_model(job_id, entity_id, entity_type, client=None): | ||
"""Revoke permissions on a container job and all run outputs | ||
for the requested entity (singular) | ||
""" | ||
client = client or APIClient() | ||
if entity_type not in ['groups', 'users']: | ||
raise ValueError("'entity_type' must be one of ['groups', 'users']. " | ||
"Got '{0}'.".format(entity_type)) | ||
|
||
log.debug("Revoking permissions on object %d for %s %s.", | ||
job_id, entity_type, entity_id) | ||
_func = getattr(client.scripts, "delete_containers_shares_" + entity_type) | ||
result = _func(job_id, entity_id) | ||
|
||
# CivisML relies on several run outputs attached to each model run. | ||
# Go through and revoke permissions for outputs on each run. | ||
runs = client.scripts.list_containers_runs(job_id, iterator=True) | ||
endpoint_name = "delete_shares_" + entity_type | ||
for run in runs: | ||
log.debug("Unsharing outputs on %d, run %s.", job_id, run.id) | ||
outputs = client.scripts.list_containers_runs_outputs(job_id, run.id) | ||
for _output in outputs: | ||
if _output['object_type'] == 'File': | ||
_func = getattr(client.files, endpoint_name) | ||
elif _output['object_type'] == 'Project': | ||
_func = getattr(client.projects, endpoint_name) | ||
elif _output['object_type'] == 'JSONValue': | ||
_func = getattr(client.json_values, endpoint_name) | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same note here, would it be useful to log skipped outputs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, will do. |
||
continue | ||
_func(_output['object_id'], entity_id) | ||
|
||
return result |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm confused by the type annotation here. It looks like "users" and "groups" are lists of dicts, right? Should this say
list[dict]
to specify that explicitly?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I copy-pasted this directly from the corresponding doc strings for other sharing endpoints. I'll take a look.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
readers
is a single dictionary. It contains keysusers
andgroups
, each of which is a list of dictionaries. In Python notation, I think you're right that the types ofusers
andgroups
would belist[dict]
, but I think that what's show here is also correct, and consistent with the type notation used in the other API endpoints.