@@ -560,9 +560,9 @@ def pdf_extract(
560560 self ,
561561 * ,
562562 connection : Optional [str ] = None ,
563- max_batching_rows : int = 8192 ,
564- container_cpu : Union [float , int ] = 0.33 ,
565- container_memory : str = "512Mi " ,
563+ max_batching_rows : int = 1 ,
564+ container_cpu : Union [float , int ] = 2 ,
565+ container_memory : str = "1Gi " ,
566566 ) -> bigframes .series .Series :
567567 """Extracts text from PDF URLs and saves the text as string.
568568
@@ -574,10 +574,10 @@ def pdf_extract(
574574 connection (str or None, default None): BQ connection used for
575575 function internet transactions, and the output blob if "dst"
576576 is str. If None, uses default connection of the session.
577- max_batching_rows (int, default 8,192 ): Max number of rows per batch
577+ max_batching_rows (int, default 1 ): Max number of rows per batch
578578 send to cloud run to execute the function.
579- container_cpu (int or float, default 0.33 ): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
580- container_memory (str, default "512Mi "): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
579+ container_cpu (int or float, default 2 ): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
580+ container_memory (str, default "1Gi "): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
581581
582582 Returns:
583583 bigframes.series.Series: conatins all text from a pdf file
@@ -604,11 +604,11 @@ def pdf_chunk(
604604 self ,
605605 * ,
606606 connection : Optional [str ] = None ,
607- chunk_size : int = 1000 ,
607+ chunk_size : int = 2000 ,
608608 overlap_size : int = 200 ,
609- max_batching_rows : int = 8192 ,
610- container_cpu : Union [float , int ] = 0.33 ,
611- container_memory : str = "512Mi " ,
609+ max_batching_rows : int = 1 ,
610+ container_cpu : Union [float , int ] = 2 ,
611+ container_memory : str = "1Gi " ,
612612 ) -> bigframes .series .Series :
613613 """Extracts and chunks text from PDF URLs and saves the text as
614614 arrays of strings.
@@ -620,15 +620,15 @@ def pdf_chunk(
620620 connection (str or None, default None): BQ connection used for
621621 function internet transactions, and the output blob if "dst"
622622 is str. If None, uses default connection of the session.
623- chunk_size (int, default 1000 ): the desired size of each text chunk
623+ chunk_size (int, default 2000 ): the desired size of each text chunk
624624 (number of characters).
625625 overlap_size (int, default 200): the number of overlapping characters
626626 between consective chunks. The helps to ensure context is
627627 perserved across chunk boundaries.
628- max_batching_rows (int, default 8,192 ): Max number of rows per batch
628+ max_batching_rows (int, default 1 ): Max number of rows per batch
629629 send to cloud run to execute the function.
630- container_cpu (int or float, default 0.33 ): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
631- container_memory (str, default "512Mi "): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
630+ container_cpu (int or float, default 2 ): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
631+ container_memory (str, default "1Gi "): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
632632
633633 Returns:
634634 bigframe.series.Series: Series of array[str], where each string is a
0 commit comments