-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Render entire PDFs instead of single pages #840
Changes from all commits
b7d9678
99ea466
a3c0023
cd2706c
6910e05
f63398a
eaef837
3c4fe71
60e07cc
cf46813
5e745a9
1bec0b3
f8f300f
820e4b6
4395e39
e0b036b
9d138a3
47f9a17
786e5eb
4d62561
0de7037
ee51b9e
ae1d95f
78448e9
f1cd17a
d4d9db0
23e6aba
3acccb8
17fd596
5f11ce6
b8202fd
de97cff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,14 +2,17 @@ | |
target-version = "py38" | ||
select = ["E", "F", "I", "UP"] | ||
ignore = ["E501", "E701"] # line too long, multiple statements on one line | ||
src = ["app/backend"] | ||
src = ["app/backend", "scripts"] | ||
|
||
[tool.ruff.isort] | ||
known-local-folder = ["scripts"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fyi @mattmsft This known-local-folder addition fixed the auto-import sorting that was lumping scripts together with third-party on the preplib files. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! |
||
|
||
[tool.black] | ||
line-length = 120 | ||
|
||
[tool.pytest.ini_options] | ||
addopts = "-ra" | ||
pythonpath = ["app/backend"] | ||
pythonpath = ["app/backend", "scripts"] | ||
|
||
[tool.coverage.paths] | ||
source = ["scripts", "app"] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
import os | ||
import sys | ||
from tempfile import NamedTemporaryFile | ||
|
||
import pytest | ||
from conftest import MockAzureCredential | ||
|
||
from scripts.prepdocslib.blobmanager import BlobManager | ||
from scripts.prepdocslib.listfilestrategy import File | ||
|
||
|
||
@pytest.fixture | ||
def blob_manager(monkeypatch): | ||
return BlobManager( | ||
endpoint=f"https://{os.environ['AZURE_STORAGE_ACCOUNT']}.blob.core.windows.net", | ||
credential=MockAzureCredential(), | ||
container=os.environ["AZURE_STORAGE_CONTAINER"], | ||
verbose=True, | ||
) | ||
|
||
|
||
@pytest.mark.asyncio | ||
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher") | ||
async def test_upload_and_remove(monkeypatch, mock_env, blob_manager): | ||
with NamedTemporaryFile(suffix=".pdf") as temp_file: | ||
f = File(temp_file.file) | ||
filename = f.content.name.split("/tmp/")[1] | ||
|
||
# Set up mocks used by upload_blob | ||
async def mock_exists(*args, **kwargs): | ||
return True | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists) | ||
|
||
async def mock_upload_blob(self, name, *args, **kwargs): | ||
assert name == filename | ||
return True | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.upload_blob", mock_upload_blob) | ||
|
||
await blob_manager.upload_blob(f) | ||
|
||
# Set up mocks used by remove_blob | ||
def mock_list_blob_names(*args, **kwargs): | ||
assert kwargs.get("name_starts_with") == filename.split(".pdf")[0] | ||
|
||
class AsyncBlobItemsIterator: | ||
def __init__(self, file): | ||
self.files = [file, "dontdelete.pdf"] | ||
|
||
def __aiter__(self): | ||
return self | ||
|
||
async def __anext__(self): | ||
if self.files: | ||
return self.files.pop() | ||
raise StopAsyncIteration | ||
|
||
return AsyncBlobItemsIterator(filename) | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.list_blob_names", mock_list_blob_names) | ||
|
||
async def mock_delete_blob(self, name, *args, **kwargs): | ||
assert name == filename | ||
return True | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.delete_blob", mock_delete_blob) | ||
|
||
await blob_manager.remove_blob(f.content.name) | ||
|
||
|
||
@pytest.mark.asyncio | ||
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher") | ||
async def test_upload_and_remove_all(monkeypatch, mock_env, blob_manager): | ||
with NamedTemporaryFile(suffix=".pdf") as temp_file: | ||
f = File(temp_file.file) | ||
print(f.content.name) | ||
filename = f.content.name.split("/tmp/")[1] | ||
|
||
# Set up mocks used by upload_blob | ||
async def mock_exists(*args, **kwargs): | ||
return True | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists) | ||
|
||
async def mock_upload_blob(self, name, *args, **kwargs): | ||
assert name == filename | ||
return True | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.upload_blob", mock_upload_blob) | ||
|
||
await blob_manager.upload_blob(f) | ||
|
||
# Set up mocks used by remove_blob | ||
def mock_list_blob_names(*args, **kwargs): | ||
assert kwargs.get("name_starts_with") is None | ||
|
||
class AsyncBlobItemsIterator: | ||
def __init__(self, file): | ||
self.files = [file] | ||
|
||
def __aiter__(self): | ||
return self | ||
|
||
async def __anext__(self): | ||
if self.files: | ||
return self.files.pop() | ||
raise StopAsyncIteration | ||
|
||
return AsyncBlobItemsIterator(filename) | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.list_blob_names", mock_list_blob_names) | ||
|
||
async def mock_delete_blob(self, name, *args, **kwargs): | ||
assert name == filename | ||
return True | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.delete_blob", mock_delete_blob) | ||
|
||
await blob_manager.remove_blob() | ||
|
||
|
||
@pytest.mark.asyncio | ||
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher") | ||
async def test_create_container_upon_upload(monkeypatch, mock_env, blob_manager): | ||
with NamedTemporaryFile(suffix=".pdf") as temp_file: | ||
f = File(temp_file.file) | ||
filename = f.content.name.split("/tmp/")[1] | ||
|
||
# Set up mocks used by upload_blob | ||
async def mock_exists(*args, **kwargs): | ||
return False | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists) | ||
|
||
async def mock_create_container(*args, **kwargs): | ||
return | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.create_container", mock_create_container) | ||
|
||
async def mock_upload_blob(self, name, *args, **kwargs): | ||
assert name == filename | ||
return True | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.upload_blob", mock_upload_blob) | ||
|
||
await blob_manager.upload_blob(f) | ||
|
||
|
||
@pytest.mark.asyncio | ||
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher") | ||
async def test_dont_remove_if_no_container(monkeypatch, mock_env, blob_manager): | ||
async def mock_exists(*args, **kwargs): | ||
return False | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists) | ||
|
||
async def mock_delete_blob(*args, **kwargs): | ||
assert False, "delete_blob() shouldn't have been called" | ||
|
||
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.delete_blob", mock_delete_blob) | ||
|
||
await blob_manager.remove_blob() | ||
|
||
|
||
def test_sourcepage_from_file_page(): | ||
assert BlobManager.sourcepage_from_file_page("test.pdf", 0) == "test.pdf#page=1" | ||
assert BlobManager.sourcepage_from_file_page("test.html", 0) == "test.html" | ||
|
||
|
||
def test_blob_name_from_file_name(): | ||
assert BlobManager.blob_name_from_file_name("tmp/test.pdf") == "test.pdf" | ||
assert BlobManager.blob_name_from_file_name("tmp/test.html") == "test.html" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so this will still work with folks who didn't re-run prepdocs after this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep! I just did another manual test to make sure, its working with an old env with individual pages.