|
22 | 22 | from urllib.parse import urlparse
|
23 | 23 |
|
24 | 24 | import requests
|
| 25 | +from langchain_core._api.deprecation import deprecated |
25 | 26 | from langchain_core.documents import Document
|
26 | 27 | from langchain_core.utils import get_from_dict_or_env
|
27 | 28 |
|
|
47 | 48 | logger = logging.getLogger(__file__)
|
48 | 49 |
|
49 | 50 |
|
| 51 | +@deprecated( |
| 52 | + since="0.3.22", |
| 53 | + removal="1.0", |
| 54 | + alternative_import="langchain_unstructured.UnstructuredPDFLoader", |
| 55 | +) |
50 | 56 | class UnstructuredPDFLoader(UnstructuredFileLoader):
|
51 | 57 | """Load `PDF` files using `Unstructured`.
|
52 | 58 |
|
@@ -174,6 +180,11 @@ def source(self) -> str:
|
174 | 180 | return self.web_path if self.web_path is not None else self.file_path
|
175 | 181 |
|
176 | 182 |
|
| 183 | +@deprecated( |
| 184 | + since="0.3.22", |
| 185 | + removal="1.0", |
| 186 | + alternative_import="langchain_unstructured.UnstructuredPDFLoader", |
| 187 | +) |
177 | 188 | class OnlinePDFLoader(BasePDFLoader):
|
178 | 189 | """Load online `PDF`."""
|
179 | 190 |
|
@@ -423,6 +434,11 @@ def lazy_load(
|
423 | 434 | yield from self.parser.parse(blob)
|
424 | 435 |
|
425 | 436 |
|
| 437 | +@deprecated( |
| 438 | + since="0.3.22", |
| 439 | + removal="1.0", |
| 440 | + alternative="langchain_community.document_loaders.generic.GenericLoader", |
| 441 | +) |
426 | 442 | class PyPDFDirectoryLoader(BaseLoader):
|
427 | 443 | """Load and parse a directory of PDF files using 'pypdf' library.
|
428 | 444 |
|
@@ -1422,4 +1438,10 @@ def lazy_load(self) -> Iterator[Document]:
|
1422 | 1438 |
|
1423 | 1439 |
|
1424 | 1440 | # Legacy: only for backwards compatibility. Use PyPDFLoader instead
|
1425 |
| -PagedPDFSplitter = PyPDFLoader |
| 1441 | +@deprecated( |
| 1442 | + since="0.0.30", |
| 1443 | + removal="1.0", |
| 1444 | + alternative="PyPDFLoader", |
| 1445 | +) |
| 1446 | +class PagedPDFSplitter(PyPDFLoader): |
| 1447 | + pass |
0 commit comments