diff --git a/docs/source/cn/guides/hf_file_system.md b/docs/source/cn/guides/hf_file_system.md index a077da701b..96ba6db1ea 100644 --- a/docs/source/cn/guides/hf_file_system.md +++ b/docs/source/cn/guides/hf_file_system.md @@ -11,26 +11,25 @@ ## 使用方法 ```python ->>> from huggingface_hub import HfFileSystem ->>> fs = HfFileSystem() +>>> from huggingface_hub import hffs >>> # 列出目录中的所有文件 ->>> fs.ls("datasets/my-username/my-dataset-repo/data", detail=False) +>>> hffs.ls("datasets/my-username/my-dataset-repo/data", detail=False) ['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv'] >>> # 列出仓库中的所有 ".csv" 文件 ->>> fs.glob("datasets/my-username/my-dataset-repo/**/*.csv") +>>> hffs.glob("datasets/my-username/my-dataset-repo/**/*.csv") ['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv'] >>> # 读取远程文件 ->>> with fs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f: +>>> with hffs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f: ... train_data = f.readlines() >>> # 远程文件内容读取为字符串 ->>> train_data = fs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev") +>>> train_data = hffs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev") >>> # 写入远程文件 ->>> with fs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f: +>>> with hffs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f: ... f.write("text,label") ... f.write("Fantastic movie!,good") ``` @@ -109,7 +108,7 @@ hf://[][@]/ ```python >>> from huggingface_hub import HfFileSystem ->>> fs = HfFileSystem(token=token) +>>> hffs = HfFileSystem(token=token) ``` 如果您以这种方式登录,请注意在共享源代码时不要意外泄露令牌! diff --git a/docs/source/de/guides/hf_file_system.md b/docs/source/de/guides/hf_file_system.md index e33cc97cd4..ed08282df4 100644 --- a/docs/source/de/guides/hf_file_system.md +++ b/docs/source/de/guides/hf_file_system.md @@ -9,26 +9,25 @@ Zusätzlich zur [`HfApi`] bietet die `huggingface_hub` Bibliothek [`HfFileSystem ## Verwendung ```python ->>> from huggingface_hub import HfFileSystem ->>> fs = HfFileSystem() +>>> from huggingface_hub import hffs >>> # Alle Dateien in einem Verzeichnis auflisten ->>> fs.ls("datasets/my-username/my-dataset-repo/data", detail=False) +>>> hffs.ls("datasets/my-username/my-dataset-repo/data", detail=False) ['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv'] >>> # Alle ".csv"-Dateien in einem Repo auflisten ->>> fs.glob("datasets/my-username/my-dataset-repo/**.csv") +>>> hffs.glob("datasets/my-username/my-dataset-repo/**.csv") ['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv'] >>> # Eine entfernte Datei lesen ->>> with fs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f: +>>> with hffs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f: ... train_data = f.readlines() >>> # Den Inhalt einer entfernten Datei als Zeichenkette / String lesen ->>> train_data = fs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev") +>>> train_data = hffs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev") >>> # Eine entfernte Datei schreiben ->>> with fs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f: +>>> with hffs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f: ... f.write("text,label") ... f.write("Fantastic movie!,good") ``` @@ -104,7 +103,7 @@ Es ist auch möglich, sich programmatisch anzumelden, indem Sie Ihr `token` als ```python >>> from huggingface_hub import HfFileSystem ->>> fs = HfFileSystem(token=token) +>>> hffs = HfFileSystem(token=token) ``` Wenn Sie sich auf diese Weise anmelden, seien Sie vorsichtig, das Token nicht versehentlich zu veröffentlichen, wenn Sie Ihren Quellcode teilen! diff --git a/docs/source/en/guides/hf_file_system.md b/docs/source/en/guides/hf_file_system.md index 60526e34c4..6211a7ba66 100644 --- a/docs/source/en/guides/hf_file_system.md +++ b/docs/source/en/guides/hf_file_system.md @@ -14,26 +14,25 @@ In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSys ## Usage ```python ->>> from huggingface_hub import HfFileSystem ->>> fs = HfFileSystem() +>>> from huggingface_hub import hffs >>> # List all files in a directory ->>> fs.ls("datasets/my-username/my-dataset-repo/data", detail=False) +>>> hffs.ls("datasets/my-username/my-dataset-repo/data", detail=False) ['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv'] >>> # List all ".csv" files in a repo ->>> fs.glob("datasets/my-username/my-dataset-repo/**/*.csv") +>>> hffs.glob("datasets/my-username/my-dataset-repo/**/*.csv") ['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv'] >>> # Read a remote file ->>> with fs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f: +>>> with hffs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f: ... train_data = f.readlines() >>> # Read the content of a remote file as a string ->>> train_data = fs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev") +>>> train_data = hffs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev") >>> # Write a remote file ->>> with fs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f: +>>> with hffs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f: ... f.write("text,label") ... f.write("Fantastic movie!,good") ``` @@ -112,7 +111,7 @@ It is also possible to log in programmatically by passing your `token` as an arg ```python >>> from huggingface_hub import HfFileSystem ->>> fs = HfFileSystem(token=token) +>>> hffs = HfFileSystem(token=token) ``` If you log in this way, be careful not to accidentally leak the token when sharing your source code! diff --git a/docs/source/ko/guides/hf_file_system.md b/docs/source/ko/guides/hf_file_system.md index 74a02f71f5..798d1a3c70 100644 --- a/docs/source/ko/guides/hf_file_system.md +++ b/docs/source/ko/guides/hf_file_system.md @@ -9,26 +9,25 @@ rendered properly in your Markdown viewer. ## 사용법[[usage]] ```python ->>> from huggingface_hub import HfFileSystem ->>> fs = HfFileSystem() +>>> from huggingface_hub import hffs >>> # 디렉터리의 모든 파일 나열하기 ->>> fs.ls("datasets/my-username/my-dataset-repo/data", detail=False) +>>> hffs.ls("datasets/my-username/my-dataset-repo/data", detail=False) ['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv'] >>> # 저장소(repo)에서 ".csv" 파일 모두 나열하기 ->>> fs.glob("datasets/my-username/my-dataset-repo/**.csv") +>>> hffs.glob("datasets/my-username/my-dataset-repo/**.csv") ['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv'] >>> # 원격 파일 읽기 ->>> with fs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f: +>>> with hffs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f: ... train_data = f.readlines() >>> # 문자열로 원격 파일의 내용 읽기 ->>> train_data = fs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev") +>>> train_data = hffs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev") >>> # 원격 파일 쓰기 ->>> with fs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f: +>>> with hffs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f: ... f.write("text,label") ... f.write("Fantastic movie!,good") ``` @@ -103,7 +102,7 @@ hf://[][@]/ ```python >>> from huggingface_hub import HfFileSystem ->>> fs = HfFileSystem(token=token) +>>> hffs = HfFileSystem(token=token) ``` 이렇게 로그인하는 경우 소스 코드를 공유할 때 토큰이 실수로 누출되지 않도록 주의해야 합니다! diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index d495589cd6..062326af93 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -296,6 +296,7 @@ "HfFileSystemFile", "HfFileSystemResolvedPath", "HfFileSystemStreamFile", + "hffs", ], "hub_mixin": [ "ModelHubMixin", @@ -883,6 +884,7 @@ "hf_hub_download", "hf_hub_url", "hf_raise_for_status", + "hffs", "inspect_job", "inspect_scheduled_job", "interpreter_login", @@ -1312,6 +1314,7 @@ def __dir__(): HfFileSystemFile, # noqa: F401 HfFileSystemResolvedPath, # noqa: F401 HfFileSystemStreamFile, # noqa: F401 + hffs, # noqa: F401 ) from .hub_mixin import ( ModelHubMixin, # noqa: F401 diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py index 614eb6cc15..ab419912ff 100644 --- a/src/huggingface_hub/hf_file_system.py +++ b/src/huggingface_hub/hf_file_system.py @@ -133,22 +133,26 @@ class HfFileSystem(fsspec.AbstractFileSystem, metaclass=_Cached): Usage: ```python - >>> from huggingface_hub import HfFileSystem - - >>> fs = HfFileSystem() + >>> from huggingface_hub import hffs >>> # List files - >>> fs.glob("my-username/my-model/*.bin") + >>> hffs.glob("my-username/my-model/*.bin") ['my-username/my-model/pytorch_model.bin'] - >>> fs.ls("datasets/my-username/my-dataset", detail=False) + >>> hffs.ls("datasets/my-username/my-dataset", detail=False) ['datasets/my-username/my-dataset/.gitattributes', 'datasets/my-username/my-dataset/README.md', 'datasets/my-username/my-dataset/data.json'] >>> # Read/write files - >>> with fs.open("my-username/my-model/pytorch_model.bin") as f: + >>> with hffs.open("my-username/my-model/pytorch_model.bin") as f: ... data = f.read() - >>> with fs.open("my-username/my-model/pytorch_model.bin", "wb") as f: + >>> with hffs.open("my-username/my-model/pytorch_model.bin", "wb") as f: ... f.write(data) ``` + + Specify a token for authentication: + ```python + >>> from huggingface_hub import HfFileSystem + >>> hffs = HfFileSystem(token=token) + ``` """ root_marker = "" @@ -1255,3 +1259,6 @@ def make_instance(cls, args, kwargs, instance_state): for attr, state_value in instance_state.items(): setattr(fs, attr, state_value) return fs + + +hffs = HfFileSystem()