diff --git a/libs/core/langchain_core/utils/loading.py b/libs/core/langchain_core/utils/loading.py index 9e3f83ec70f5a..10ab94bcc4e61 100644 --- a/libs/core/langchain_core/utils/loading.py +++ b/libs/core/langchain_core/utils/loading.py @@ -9,16 +9,28 @@ import requests +from langchain_core._api.deprecation import deprecated + DEFAULT_REF = os.environ.get("LANGCHAIN_HUB_DEFAULT_REF", "master") +LANGCHAINHUB_REPO = "https://raw.githubusercontent.com/hwchase17/langchain-hub/" URL_BASE = os.environ.get( "LANGCHAIN_HUB_URL_BASE", - "https://raw.githubusercontent.com/hwchase17/langchain-hub/{ref}/", + LANGCHAINHUB_REPO + "{ref}/", ) HUB_PATH_RE = re.compile(r"lc(?P@[^:]+)?://(?P.*)") T = TypeVar("T") +@deprecated( + since="0.1.30", + removal="0.2", + message=( + "Using the hwchase17/langchain-hub " + "repo for prompts is deprecated. Please use " + "https://smith.langchain.com/hub instead." + ), +) def try_load_from_hub( path: Union[str, Path], loader: Callable[[str], T], @@ -43,6 +55,8 @@ def try_load_from_hub( # Instead, use PurePosixPath to ensure that forward slashes are used as the # path separator, regardless of the operating system. full_url = urljoin(URL_BASE.format(ref=ref), PurePosixPath(remote_path).__str__()) + if not full_url.startswith(LANGCHAINHUB_REPO): + raise ValueError(f"Invalid hub path: {path}") r = requests.get(full_url, timeout=5) if r.status_code != 200: diff --git a/libs/core/poetry.lock b/libs/core/poetry.lock index 3ecb233f5ff48..1cf584cae401d 100644 --- a/libs/core/poetry.lock +++ b/libs/core/poetry.lock @@ -2214,6 +2214,25 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "responses" +version = "0.25.0" +description = "A utility library for mocking out the `requests` Python library." +optional = false +python-versions = ">=3.8" +files = [ + {file = "responses-0.25.0-py3-none-any.whl", hash = "sha256:2f0b9c2b6437db4b528619a77e5d565e4ec2a9532162ac1a131a83529db7be1a"}, + {file = "responses-0.25.0.tar.gz", hash = "sha256:01ae6a02b4f34e39bffceb0fc6786b67a25eae919c6368d05eabc8d9576c2a66"}, +] + +[package.dependencies] +pyyaml = "*" +requests = ">=2.30.0,<3.0" +urllib3 = ">=1.25.10,<3.0" + +[package.extras] +tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"] + [[package]] name = "rfc3339-validator" version = "0.1.4" @@ -2796,4 +2815,4 @@ extended-testing = ["jinja2"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "8fe07123109b62d7210542d8aff20df6df00819e5b0f36bc12f02206c5161c43" +content-hash = "de97591989f083b89c7a7bc6dabba87e29e13fddc812450d5196d564b2c02ce1" diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 6fcc4a721f12d..045602969585d 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -58,6 +58,7 @@ pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" grandalf = "^0.8" pytest-profiling = "^1.7.0" +responses = "^0.25.0" [tool.poetry.group.test_integration] diff --git a/libs/community/tests/unit_tests/utilities/test_loading.py b/libs/core/tests/unit_tests/utils/test_loading.py similarity index 91% rename from libs/community/tests/unit_tests/utilities/test_loading.py rename to libs/core/tests/unit_tests/utils/test_loading.py index 961bcbd8733dd..89678f32dcfac 100644 --- a/libs/community/tests/unit_tests/utilities/test_loading.py +++ b/libs/core/tests/unit_tests/utils/test_loading.py @@ -9,6 +9,7 @@ import pytest import responses + from langchain_core.utils.loading import DEFAULT_REF, URL_BASE, try_load_from_hub @@ -94,3 +95,12 @@ def test_failed_request(mocked_responses: responses.RequestsMock) -> None: with pytest.raises(ValueError, match=re.compile("Could not find file at .*")): try_load_from_hub(f"lc://{path}", loader, "chains", {"json"}) loader.assert_not_called() + + +def test_path_traversal() -> None: + """Test that a path traversal attack is prevented.""" + path = "lc://chains/../../../../../../../../../it.json" + loader = Mock() + + with pytest.raises(ValueError): + try_load_from_hub(path, loader, "chains", {"json"})