Skip to content

Commit

Permalink
Fix thread-safety when fetching user secret in Google Colab (#1953)
Browse files Browse the repository at this point in the history
* Fix thread-safety when fetching user secret

* unused var

* request secret only once

* Update src/huggingface_hub/utils/_token.py

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

---------

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>
  • Loading branch information
Wauplin and osanseviero committed Jan 5, 2024
1 parent 9c90f47 commit 939a0fa
Showing 1 changed file with 61 additions and 44 deletions.
105 changes: 61 additions & 44 deletions src/huggingface_hub/utils/_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
import os
import warnings
from pathlib import Path
from threading import Lock
from typing import Optional

from .. import constants
from ._runtime import is_google_colab


_CHECK_GOOGLE_COLAB_SECRET = True
_IS_GOOGLE_COLAB_CHECKED = False
_GOOGLE_COLAB_SECRET_LOCK = Lock()
_GOOGLE_COLAB_SECRET: Optional[str] = None


def get_token() -> Optional[str]:
Expand All @@ -42,52 +45,66 @@ def get_token() -> Optional[str]:


def _get_token_from_google_colab() -> Optional[str]:
if not is_google_colab():
return None

global _CHECK_GOOGLE_COLAB_SECRET
if not _CHECK_GOOGLE_COLAB_SECRET: # request access only once
return None

try:
from google.colab import userdata
from google.colab.errors import Error as ColabError
except ImportError:
return None
"""Get token from Google Colab secrets vault using `google.colab.userdata.get(...)`.
try:
token = userdata.get("HF_TOKEN")
except userdata.NotebookAccessError:
# Means the user has a secret call `HF_TOKEN` and got a popup "please grand access to HF_TOKEN" and refused it
# => warn user but ignore error => do not re-request access to user
warnings.warn(
"\nAccess to the secret `HF_TOKEN` has not been granted on this notebook."
"\nYou will not be requested again."
"\nPlease restart the session if you want to be prompted again."
)
_CHECK_GOOGLE_COLAB_SECRET = False
return None
except userdata.SecretNotFoundError:
# Means the user did not define a `HF_TOKEN` secret => warn
warnings.warn(
"\nThe secret `HF_TOKEN` does not exist in your Colab secrets."
"\nTo authenticate with the Hugging Face Hub, create a token in your settings tab "
"(https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session."
"\nYou will be able to reuse this secret in all of your notebooks."
"\nPlease note that authentication is recommended but still optional to access public models or datasets."
)
return None
except ColabError as e:
# Something happen but we don't know what => recommend to open a GitHub issue
warnings.warn(
f"\nError while fetching `HF_TOKEN` secret value from your vault: '{str(e)}'."
"\nYou are not authenticated with the Hugging Face Hub in this notebook."
"\nIf the error persists, please let us know by opening an issue on GitHub "
"(https://github.com/huggingface/huggingface_hub/issues/new)."
)
Token is read from the vault only once per session and then stored in a global variable to avoid re-requesting
access to the vault.
"""
if not is_google_colab():
return None

return _clean_token(token)
# `google.colab.userdata` is not thread-safe
# This can lead to a deadlock if multiple threads try to access it at the same time
# (typically when using `snapshot_download`)
# => use a lock
# See https://github.com/huggingface/huggingface_hub/issues/1952 for more details.
with _GOOGLE_COLAB_SECRET_LOCK:
global _GOOGLE_COLAB_SECRET
global _IS_GOOGLE_COLAB_CHECKED

if _IS_GOOGLE_COLAB_CHECKED: # request access only once
return _GOOGLE_COLAB_SECRET

try:
from google.colab import userdata
from google.colab.errors import Error as ColabError
except ImportError:
return None

try:
token = userdata.get("HF_TOKEN")
_GOOGLE_COLAB_SECRET = _clean_token(token)
except userdata.NotebookAccessError:
# Means the user has a secret call `HF_TOKEN` and got a popup "please grand access to HF_TOKEN" and refused it
# => warn user but ignore error => do not re-request access to user
warnings.warn(
"\nAccess to the secret `HF_TOKEN` has not been granted on this notebook."
"\nYou will not be requested again."
"\nPlease restart the session if you want to be prompted again."
)
_GOOGLE_COLAB_SECRET = None
except userdata.SecretNotFoundError:
# Means the user did not define a `HF_TOKEN` secret => warn
warnings.warn(
"\nThe secret `HF_TOKEN` does not exist in your Colab secrets."
"\nTo authenticate with the Hugging Face Hub, create a token in your settings tab "
"(https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session."
"\nYou will be able to reuse this secret in all of your notebooks."
"\nPlease note that authentication is recommended but still optional to access public models or datasets."
)
_GOOGLE_COLAB_SECRET = None
except ColabError as e:
# Something happen but we don't know what => recommend to open a GitHub issue
warnings.warn(
f"\nError while fetching `HF_TOKEN` secret value from your vault: '{str(e)}'."
"\nYou are not authenticated with the Hugging Face Hub in this notebook."
"\nIf the error persists, please let us know by opening an issue on GitHub "
"(https://github.com/huggingface/huggingface_hub/issues/new)."
)
_GOOGLE_COLAB_SECRET = None

_IS_GOOGLE_COLAB_CHECKED = True
return _GOOGLE_COLAB_SECRET


def _get_token_from_environment() -> Optional[str]:
Expand Down

0 comments on commit 939a0fa

Please sign in to comment.