Skip to content

Commit b6fd13a

Browse files
committed
update testcase for better readibility
1 parent b1c8bad commit b6fd13a

File tree

1 file changed

+71
-46
lines changed

1 file changed

+71
-46
lines changed

tests/system/large/blob/test_function.py

Lines changed: 71 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -287,19 +287,38 @@ def test_blob_image_normalize_to_bq(images_mm_df: bpd.DataFrame, bq_connection:
287287
assert actual.dtype == dtypes.BYTES_DTYPE
288288

289289

290-
@pytest.fixture(scope="session")
291-
def pdf_uris() -> list[str]:
292-
return [
293-
"uri: gs://bigframes_blob_test/pdfs/test-protected.pdf",
294-
"uri: gs://bigframes_blob_test/pdfs/sample-local-pdf.pdf",
295-
]
296-
297-
298-
@pytest.mark.parametrize("verbose", (True, False))
290+
@pytest.mark.parametrize(
291+
"verbose, expected",
292+
[
293+
(
294+
True,
295+
pd.Series(
296+
[
297+
{"status": "File has not been decrypted", "content": ""},
298+
{
299+
"status": "",
300+
"content": "Sample PDF This is a testing file. Some dummy messages are used for testing purposes. ",
301+
},
302+
]
303+
),
304+
),
305+
(
306+
False,
307+
pd.Series(
308+
[
309+
"",
310+
"Sample PDF This is a testing file. Some dummy messages are used for testing purposes. ",
311+
],
312+
name="pdf",
313+
),
314+
),
315+
],
316+
)
299317
def test_blob_pdf_extract(
300318
pdf_mm_df: bpd.DataFrame,
301319
verbose: bool,
302320
bq_connection: str,
321+
expected: pd.Series,
303322
):
304323
bigframes.options.experiments.blob = True
305324

@@ -310,57 +329,63 @@ def test_blob_pdf_extract(
310329
.to_pandas()
311330
)
312331

313-
extract_data = [
314-
{"status": "File has not been decrypted", "content": ""},
315-
{
316-
"status": "",
317-
"content": "Sample PDF This is a testing file. Some dummy messages are used for testing purposes. ",
318-
},
319-
]
320-
if verbose:
321-
expected = pd.Series(extract_data)
322-
else:
323-
content_values = [item["content"] for item in extract_data]
324-
expected = pd.Series(content_values)
325-
326332
pd.testing.assert_series_equal(
327333
actual,
328334
expected,
329335
check_dtype=False,
330336
check_index=False,
331-
check_names=False,
332337
)
333338

334339

335-
@pytest.mark.parametrize("verbose", (True, False))
336-
def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, verbose: bool, bq_connection: str):
340+
@pytest.mark.parametrize(
341+
"verbose, expected",
342+
[
343+
(
344+
True,
345+
pd.Series(
346+
[
347+
{"status": "File has not been decrypted", "content": []},
348+
{
349+
"status": "",
350+
"content": [
351+
"Sample PDF This is a testing file. Some ",
352+
"dummy messages are used for testing ",
353+
"purposes. ",
354+
],
355+
},
356+
]
357+
),
358+
),
359+
(
360+
False,
361+
pd.Series(
362+
[
363+
pd.NA,
364+
"Sample PDF This is a testing file. Some ",
365+
"dummy messages are used for testing ",
366+
"purposes. ",
367+
],
368+
),
369+
),
370+
],
371+
)
372+
def test_blob_pdf_chunk(
373+
pdf_mm_df: bpd.DataFrame, verbose: bool, bq_connection: str, expected: pd.Series
374+
):
337375
bigframes.options.experiments.blob = True
338376

339-
actual = pdf_mm_df["pdf"].blob.pdf_chunk(
340-
connection=bq_connection, chunk_size=50, overlap_size=10, verbose=verbose
377+
actual = (
378+
pdf_mm_df["pdf"]
379+
.blob.pdf_chunk(
380+
connection=bq_connection, chunk_size=50, overlap_size=10, verbose=verbose
381+
)
382+
.explode()
383+
.to_pandas()
341384
)
342385

343-
chunk_data = [
344-
{"status": "File has not been decrypted", "content": []},
345-
{
346-
"status": "",
347-
"content": [
348-
"Sample PDF This is a testing file. Some ",
349-
"dummy messages are used for testing ",
350-
"purposes. ",
351-
],
352-
},
353-
]
354-
if verbose:
355-
actual = actual.explode()
356-
expected = pd.Series(chunk_data)
357-
else:
358-
content_values = [item["content"] for item in chunk_data]
359-
expected = pd.Series(content_values)
360386
pd.testing.assert_series_equal(
361-
actual.to_pandas(),
387+
actual,
362388
expected,
363389
check_dtype=False,
364390
check_index=False,
365-
check_names=False,
366391
)

0 commit comments

Comments
 (0)