diff --git a/.flake8 b/.flake8 index 2e4387498..87f6e408c 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index b8edda51c..a3da1b0d4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 + digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 +# created: 2023-08-02T10:53:29.114535628Z diff --git a/.github/auto-label.yaml b/.github/auto-label.yaml index 41bff0b53..b2016d119 100644 --- a/.github/auto-label.yaml +++ b/.github/auto-label.yaml @@ -1,4 +1,4 @@ -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 4d6a1d0f6..0cb0d0dd0 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2018 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index f8137d0ae..8e39a2cc4 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/populate-secrets.sh b/.kokoro/populate-secrets.sh index f52514257..6f3972140 100755 --- a/.kokoro/populate-secrets.sh +++ b/.kokoro/populate-secrets.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC. +# Copyright 2023 Google LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 1c4d62370..9eafe0be3 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/release.sh b/.kokoro/release.sh index c6a7c9460..078fc1c20 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 6ae81b743..cb8bbaa2e 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -38,3 +38,12 @@ env_vars: { key: "SECRET_MANAGER_KEYS" value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" } + +# Store the packages we uploaded to PyPI. That way, we have a record of exactly +# what we published, which we can use to generate SBOMs and attestations. +action { + define_artifacts { + regex: "github/python-bigquery/**/*.tar.gz" + strip_prefix: "github/python-bigquery" + } +} diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 66a2172a7..029bd342d 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -20,9 +20,9 @@ cachetools==5.2.0 \ --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db # via google-auth -certifi==2022.12.7 \ - --hash=sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3 \ - --hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18 +certifi==2023.7.22 \ + --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ + --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 # via requests cffi==1.15.1 \ --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ @@ -113,28 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==39.0.1 \ - --hash=sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4 \ - --hash=sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f \ - --hash=sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502 \ - --hash=sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41 \ - --hash=sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965 \ - --hash=sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e \ - --hash=sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc \ - --hash=sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad \ - --hash=sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505 \ - --hash=sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388 \ - --hash=sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6 \ - --hash=sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2 \ - --hash=sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac \ - --hash=sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695 \ - --hash=sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6 \ - --hash=sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336 \ - --hash=sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0 \ - --hash=sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c \ - --hash=sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106 \ - --hash=sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a \ - --hash=sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8 +cryptography==41.0.3 \ + --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \ + --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \ + --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \ + --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \ + --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \ + --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \ + --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \ + --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \ + --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \ + --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \ + --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \ + --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \ + --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \ + --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \ + --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \ + --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \ + --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \ + --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \ + --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \ + --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \ + --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \ + --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \ + --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de # via # gcp-releasetool # secretstorage @@ -394,9 +396,9 @@ pycparser==2.21 \ --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 # via cffi -pygments==2.13.0 \ - --hash=sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1 \ - --hash=sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42 +pygments==2.15.0 \ + --hash=sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094 \ + --hash=sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500 # via # readme-renderer # rich @@ -419,9 +421,9 @@ readme-renderer==37.3 \ --hash=sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273 \ --hash=sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343 # via twine -requests==2.28.1 \ - --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 \ - --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349 +requests==2.31.0 \ + --hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \ + --hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1 # via # gcp-releasetool # google-api-core diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh index ba3a707b0..63ac41dfa 100755 --- a/.kokoro/test-samples-against-head.sh +++ b/.kokoro/test-samples-against-head.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 2c6500cae..5a0f5fab6 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 11c042d34..50b35a48c 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh index f39236e94..d85b1f267 100755 --- a/.kokoro/trampoline.sh +++ b/.kokoro/trampoline.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2017 Google Inc. +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 4af6cdc26..59a7cf3a9 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5405cc8ff..19409cbd3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 + rev: 6.1.0 hooks: - id: flake8 diff --git a/.trampolinerc b/.trampolinerc index 0eee72ab6..a7dfeb42c 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Template for .trampolinerc - # Add required env vars here. required_envvars+=( ) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bbde01f4..cf64e2222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,55 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.11.4](https://github.com/googleapis/python-bigquery/compare/v3.11.3...v3.11.4) (2023-07-19) + + +### Bug Fixes + +* Updates typing in function definitions ([#1613](https://github.com/googleapis/python-bigquery/issues/1613)) ([db755ce](https://github.com/googleapis/python-bigquery/commit/db755ce5d2ae21e458f33f02cf63d2e5fbc45cf5)) + +## [3.11.3](https://github.com/googleapis/python-bigquery/compare/v3.11.2...v3.11.3) (2023-06-27) + + +### Bug Fixes + +* Type annotations include Optional when None is accepted ([#1554](https://github.com/googleapis/python-bigquery/issues/1554)) ([6c1ab80](https://github.com/googleapis/python-bigquery/commit/6c1ab802b09124ba837d6d5358962e3fce2d4a2c)) + +## [3.11.2](https://github.com/googleapis/python-bigquery/compare/v3.11.1...v3.11.2) (2023-06-21) + + +### Bug Fixes + +* Updates tests based on revised hacker_news tables ([#1591](https://github.com/googleapis/python-bigquery/issues/1591)) ([d73cf49](https://github.com/googleapis/python-bigquery/commit/d73cf495b8dfa032a43dc1d58599d0691aaa0efb)) + +## [3.11.1](https://github.com/googleapis/python-bigquery/compare/v3.11.0...v3.11.1) (2023-06-09) + + +### Documentation + +* Add/reformat return types for cloud RAD docs ([#1582](https://github.com/googleapis/python-bigquery/issues/1582)) ([6efdce1](https://github.com/googleapis/python-bigquery/commit/6efdce13cc3b25d37d22a856f2308daed569e637)) + +## [3.11.0](https://github.com/googleapis/python-bigquery/compare/v3.10.0...v3.11.0) (2023-06-01) + + +### Features + +* Add remote function options to routines ([#1558](https://github.com/googleapis/python-bigquery/issues/1558)) ([84ad11d](https://github.com/googleapis/python-bigquery/commit/84ad11d00d99d279e4e6e0fa4ca60e59575b1dad)) + + +### Bug Fixes + +* Filter None values from OpenTelemetry attributes ([#1567](https://github.com/googleapis/python-bigquery/issues/1567)) ([9ea2e21](https://github.com/googleapis/python-bigquery/commit/9ea2e21c35783782993d1ad2d3b910bbe9981ce2)) +* Handle case when expirationMs is None ([#1553](https://github.com/googleapis/python-bigquery/issues/1553)) ([fa6e13d](https://github.com/googleapis/python-bigquery/commit/fa6e13d5006caadb36899b4e2a24ca82b7f11b17)) +* Raise most recent exception when not able to fetch query job after starting the job ([#1362](https://github.com/googleapis/python-bigquery/issues/1362)) ([09cc1df](https://github.com/googleapis/python-bigquery/commit/09cc1df6babaf90ea0b0a6fd926f8013822a31ed)) + +## [3.10.0](https://github.com/googleapis/python-bigquery/compare/v3.9.0...v3.10.0) (2023-04-18) + + +### Features + +* Add date, datetime, time, timestamp dtype to to_dataframe ([#1547](https://github.com/googleapis/python-bigquery/issues/1547)) ([64e913d](https://github.com/googleapis/python-bigquery/commit/64e913d73832f6363466cbea5ace2337c86fa58b)) + ## [3.9.0](https://github.com/googleapis/python-bigquery/compare/v3.8.0...v3.9.0) (2023-03-28) diff --git a/MANIFEST.in b/MANIFEST.in index e783f4c62..e0a667053 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/docs/conf.py b/docs/conf.py index 5c83fd79e..d0468e25a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/docs/design/query-retries.md b/docs/design/query-retries.md index 1bac82f5c..08d75302b 100644 --- a/docs/design/query-retries.md +++ b/docs/design/query-retries.md @@ -73,7 +73,7 @@ value, the client library uses the jobs.insert REST API to start a query job. Before it issues this request, it sets a job ID. This job ID remains constant across API retries. -If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response. +If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response. Note: `jobs.get` requires the location of the query. It will fail with 404 if the location is not specified and the job is not in the US multi-region. #### Retrying the jobs.query API via the retry parameter diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index ebd5b3109..40e3a1578 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -93,6 +93,7 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType +from google.cloud.bigquery.routine import RemoteFunctionOptions from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.standard_sql import StandardSqlDataType @@ -154,6 +155,7 @@ "Routine", "RoutineArgument", "RoutineReference", + "RemoteFunctionOptions", # Shared helpers "SchemaField", "PolicyTagList", diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 33fc72261..09daaa2a2 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -64,7 +64,7 @@ def query_jobs_insert( job_config: Optional[job.QueryJobConfig], job_id: Optional[str], job_id_prefix: Optional[str], - location: str, + location: Optional[str], project: str, retry: retries.Retry, timeout: Optional[float], @@ -105,7 +105,7 @@ def do_query(): timeout=timeout, ) except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc + raise else: return query_job else: @@ -215,7 +215,7 @@ def query_jobs_query( client: "Client", query: str, job_config: Optional[job.QueryJobConfig], - location: str, + location: Optional[str], project: str, retry: retries.Retry, timeout: Optional[float], diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 601aa13df..a14dbec9b 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -290,6 +290,10 @@ def default_types_mapper( int_dtype: Union[Any, None] = None, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = None, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = None, + timestamp_dtype: Union[Any, None] = None, ): """Create a mapping from pyarrow types to pandas types. @@ -321,13 +325,28 @@ def types_mapper(arrow_data_type): elif ( # If date_as_object is True, we know some DATE columns are # out-of-bounds of what is supported by pandas. - not date_as_object + date_dtype is not None + and not date_as_object and pyarrow.types.is_date(arrow_data_type) ): - return db_dtypes.DateDtype() + return date_dtype - elif pyarrow.types.is_time(arrow_data_type): - return db_dtypes.TimeDtype() + elif ( + datetime_dtype is not None + and pyarrow.types.is_timestamp(arrow_data_type) + and arrow_data_type.tz is None + ): + return datetime_dtype + + elif ( + timestamp_dtype is not None + and pyarrow.types.is_timestamp(arrow_data_type) + and arrow_data_type.tz is not None + ): + return timestamp_dtype + + elif time_dtype is not None and pyarrow.types.is_time(arrow_data_type): + return time_dtype return types_mapper diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d8fbfb69e..11cceea42 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -307,7 +307,7 @@ def close(self): def get_service_account_email( self, - project: str = None, + project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> str: @@ -327,7 +327,8 @@ def get_service_account_email( before using ``retry``. Returns: - str: service account email address + str: + service account email address Example: @@ -354,7 +355,7 @@ def get_service_account_email( def list_projects( self, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -416,11 +417,11 @@ def api_request(*args, **kwargs): def list_datasets( self, - project: str = None, + project: Optional[str] = None, include_all: bool = False, - filter: str = None, + filter: Optional[str] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -497,7 +498,9 @@ def api_request(*args, **kwargs): page_size=page_size, ) - def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: + def dataset( + self, dataset_id: str, project: Optional[str] = None + ) -> DatasetReference: """Deprecated: Construct a reference to a dataset. .. deprecated:: 1.24.0 @@ -889,7 +892,7 @@ def set_iam_policy( self, table: Union[Table, TableReference, TableListItem, str], policy: Policy, - updateMask: str = None, + updateMask: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: @@ -1349,7 +1352,7 @@ def list_models( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1426,7 +1429,7 @@ def list_routines( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1503,7 +1506,7 @@ def list_tables( self, dataset: Union[Dataset, DatasetReference, DatasetListItem, str], max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: Optional[int] = None, @@ -1861,9 +1864,9 @@ def _get_query_results( self, job_id: str, retry: retries.Retry, - project: str = None, + project: Optional[str] = None, timeout_ms: Optional[int] = None, - location: str = None, + location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1932,7 +1935,8 @@ def job_from_resource( resource (Dict): one job resource from API response Returns: - The job instance, constructed via the resource. + Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]: + The job instance, constructed via the resource. """ config = resource.get("configuration", {}) if "load" in config: @@ -2037,8 +2041,8 @@ def create_job( def get_job( self, job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], - project: str = None, - location: str = None, + project: Optional[str] = None, + location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: @@ -2064,7 +2068,8 @@ def get_job( before using ``retry``. Returns: - Job instance, based on the resource returned by the API. + Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: + Job instance, based on the resource returned by the API. """ extra_params = {"projection": "full"} @@ -2100,8 +2105,8 @@ def get_job( def cancel_job( self, job_id: str, - project: str = None, - location: str = None, + project: Optional[str] = None, + location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: @@ -2178,12 +2183,12 @@ def cancel_job( def list_jobs( self, - project: str = None, + project: Optional[str] = None, parent_job: Optional[Union[QueryJob, str]] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, all_users: bool = None, - state_filter: str = None, + state_filter: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, @@ -2294,11 +2299,11 @@ def load_table_from_uri( self, source_uris: Union[str, Sequence[str]], destination: Union[Table, TableReference, TableListItem, str], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: @@ -2383,11 +2388,11 @@ def load_table_from_file( rewind: bool = False, size: Optional[int] = None, num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2491,11 +2496,11 @@ def load_table_from_dataframe( dataframe: "pandas.DataFrame", destination: Union[Table, TableReference, str], num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, parquet_compression: str = "snappy", timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: @@ -2748,11 +2753,11 @@ def load_table_from_json( json_rows: Iterable[Dict[str, Any]], destination: Union[Table, TableReference, TableListItem, str], num_retries: int = _DEFAULT_NUM_RETRIES, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: LoadJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[LoadJobConfig] = None, timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -3061,11 +3066,11 @@ def copy_table( Sequence[Union[Table, TableReference, TableListItem, str]], ], destination: Union[Table, TableReference, TableListItem, str], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: CopyJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[CopyJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.CopyJob: @@ -3167,11 +3172,11 @@ def extract_table( self, source: Union[Table, TableReference, TableListItem, Model, ModelReference, str], destination_uris: Union[str, Sequence[str]], - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, - job_config: ExtractJobConfig = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, + job_config: Optional[ExtractJobConfig] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, source_type: str = "Table", @@ -3266,11 +3271,11 @@ def extract_table( def query( self, query: str, - job_config: QueryJobConfig = None, - job_id: str = None, - job_id_prefix: str = None, - location: str = None, - project: str = None, + job_config: Optional[QueryJobConfig] = None, + job_id: Optional[str] = None, + job_id_prefix: Optional[str] = None, + location: Optional[str] = None, + project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, @@ -3560,7 +3565,7 @@ def insert_rows_json( ] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, ignore_unknown_values: bool = None, - template_suffix: str = None, + template_suffix: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Sequence[dict]: @@ -3752,7 +3757,7 @@ def list_rows( table: Union[Table, TableListItem, TableReference, str], selected_fields: Sequence[SchemaField] = None, max_results: Optional[int] = None, - page_token: str = None, + page_token: Optional[str] = None, start_index: Optional[int] = None, page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, @@ -3954,12 +3959,13 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path: "PathType"): + def schema_from_json(self, file_or_path: "PathType") -> List[SchemaField]: """Takes a file object or file path that contains json that describes a table schema. Returns: - List of schema field objects. + List[SchemaField]: + List of :class:`~google.cloud.bigquery.schema.SchemaField` objects. """ if isinstance(file_or_path, io.IOBase): return self._schema_from_json_file_object(file_or_path) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 0edd29359..513c32d9c 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -139,7 +139,7 @@ def from_api_repr(cls, resource: dict) -> "DatasetReference": @classmethod def from_string( - cls, dataset_id: str, default_project: str = None + cls, dataset_id: str, default_project: Optional[str] = None ) -> "DatasetReference": """Construct a dataset reference from dataset ID string. diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e4e3d22fc..553853630 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -90,6 +90,12 @@ class DefaultPandasDTypes(enum.Enum): INT_DTYPE = object() """Specifies default integer dtype""" + DATE_DTYPE = object() + """Specifies default date dtype""" + + TIME_DTYPE = object() + """Specifies default time dtype""" + class DestinationFormat(object): """The exported file format. The default value is :attr:`CSV`. diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 4073e0137..a6267be41 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -703,7 +703,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): self._set_properties(api_response) def exists( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> bool: """API call: test for the existence of the job via a GET request @@ -748,7 +751,10 @@ def exists( return True def reload( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ): """API call: refresh job properties via a GET request. @@ -785,7 +791,10 @@ def reload( self._set_properties(api_response) def cancel( - self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + client=None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> bool: """API call: cancel job via a POST request @@ -855,7 +864,7 @@ def _set_future_result(self): def done( self, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: float = None, + timeout: Optional[float] = None, reload: bool = True, ) -> bool: """Checks if the job is complete. @@ -881,7 +890,9 @@ def done( return self.state == _DONE_STATE def result( # type: ignore # (signature complaint) - self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: Optional[float] = None, ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e4807cc63..7dddc8278 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -58,6 +58,11 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import db_dtypes # type: ignore +except ImportError: # pragma: NO COVER + db_dtypes = None + if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. @@ -764,7 +769,6 @@ def __init__(self, job_id, query, client, job_config=None): _helpers._set_sub_prop( self._properties, ["configuration", "query", "query"], query ) - self._query_results = None self._done_timeout = None self._transport_timeout = None @@ -1313,7 +1317,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): raise def _reload_query_results( - self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None ): """Refresh the cached query results. @@ -1332,6 +1336,15 @@ def _reload_query_results( # the timeout from the futures API is respected. See: # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 timeout_ms = None + + # Python_API_core, as part of a major rewrite of the deadline, timeout, + # retry process sets the timeout value as a Python object(). + # Our system does not natively handle that and instead expects + # either none or a numeric value. If passed a Python object, convert to + # None. + if type(self._done_timeout) == object: # pragma: NO COVER + self._done_timeout = None + if self._done_timeout is not None: # Subtract a buffer for context switching, network latency, etc. api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS @@ -1392,7 +1405,7 @@ def result( # type: ignore # (complaints about the overloaded signature) page_size: Optional[int] = None, max_results: Optional[int] = None, retry: "retries.Retry" = DEFAULT_RETRY, - timeout: float = None, + timeout: Optional[float] = None, start_index: Optional[int] = None, job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: @@ -1544,7 +1557,7 @@ def do_get_result(): # that should only exist here in the QueryJob method. def to_arrow( self, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, @@ -1621,7 +1634,7 @@ def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_as_object: bool = False, @@ -1629,6 +1642,10 @@ def to_dataframe( int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, + timestamp_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1689,7 +1706,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 int_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) @@ -1699,7 +1716,7 @@ def to_dataframe( Integer types can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 float_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) @@ -1709,7 +1726,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 string_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to @@ -1719,7 +1736,50 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 + + date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date + type, instead of relying on the default ``db_dtypes.DateDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Date type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type + + .. versionadded:: 3.10.0 + + datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime + type, instead of relying on the default ``numpy.dtype("datetime64[ns]``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type + + .. versionadded:: 3.10.0 + + time_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time + type, instead of relying on the default ``db_dtypes.TimeDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("object")``. BigQuery Time type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + + .. versionadded:: 3.10.0 + + timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp + type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type + + .. versionadded:: 3.10.0 Returns: pandas.DataFrame: @@ -1747,6 +1807,10 @@ def to_dataframe( int_dtype=int_dtype, float_dtype=float_dtype, string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, ) # If changing the signature of this method, make sure to apply the same @@ -1756,7 +1820,7 @@ def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_column: Optional[str] = None, diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 3d0a66ba8..be02c1686 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -87,16 +87,38 @@ def create_span(name, attributes=None, client=None, job_ref=None): def _get_final_span_attributes(attributes=None, client=None, job_ref=None): - final_attributes = {} - final_attributes.update(_default_attributes.copy()) + """Compiles attributes from: client, job_ref, user-provided attributes. + + Attributes from all of these sources are merged together. Note the + attributes are added sequentially based on perceived order of precendence: + i.e. attributes added last may overwrite attributes added earlier. + + Args: + attributes (Optional[dict]): + Additional attributes that pertain to + the specific API call (i.e. not a default attribute) + + client (Optional[google.cloud.bigquery.client.Client]): + Pass in a Client object to extract any attributes that may be + relevant to it and add them to the final_attributes + + job_ref (Optional[google.cloud.bigquery.job._AsyncJob]) + Pass in a _AsyncJob object to extract any attributes that may be + relevant to it and add them to the final_attributes. + + Returns: dict + """ + + collected_attributes = _default_attributes.copy() + if client: - client_attributes = _set_client_attributes(client) - final_attributes.update(client_attributes) + collected_attributes.update(_set_client_attributes(client)) if job_ref: - job_attributes = _set_job_attributes(job_ref) - final_attributes.update(job_attributes) + collected_attributes.update(_set_job_attributes(job_ref)) if attributes: - final_attributes.update(attributes) + collected_attributes.update(attributes) + + final_attributes = {k: v for k, v in collected_attributes.items() if v is not None} return final_attributes diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py index 7353073c8..e576b0d49 100644 --- a/google/cloud/bigquery/routine/__init__.py +++ b/google/cloud/bigquery/routine/__init__.py @@ -20,6 +20,7 @@ from google.cloud.bigquery.routine.routine import RoutineArgument from google.cloud.bigquery.routine.routine import RoutineReference from google.cloud.bigquery.routine.routine import RoutineType +from google.cloud.bigquery.routine.routine import RemoteFunctionOptions __all__ = ( @@ -28,4 +29,5 @@ "RoutineArgument", "RoutineReference", "RoutineType", + "RemoteFunctionOptions", ) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 3c0919003..ef33d507e 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -67,6 +67,7 @@ class Routine(object): "type_": "routineType", "description": "description", "determinism_level": "determinismLevel", + "remote_function_options": "remoteFunctionOptions", } def __init__(self, routine_ref, **kwargs) -> None: @@ -297,6 +298,37 @@ def determinism_level(self): def determinism_level(self, value): self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value + @property + def remote_function_options(self): + """Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]: Configures remote function + options for a routine. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.routine.RemoteFunctionOptions` or + :data:`None`. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["remote_function_options"] + ) + if prop is not None: + return RemoteFunctionOptions.from_api_repr(prop) + + @remote_function_options.setter + def remote_function_options(self, value): + api_repr = value + if isinstance(value, RemoteFunctionOptions): + api_repr = value.to_api_repr() + elif value is not None: + raise ValueError( + "value must be google.cloud.bigquery.routine.RemoteFunctionOptions " + "or None" + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["remote_function_options"] + ] = api_repr + @classmethod def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. @@ -505,7 +537,7 @@ def from_api_repr(cls, resource: dict) -> "RoutineReference": @classmethod def from_string( - cls, routine_id: str, default_project: str = None + cls, routine_id: str, default_project: Optional[str] = None ) -> "RoutineReference": """Factory: construct a routine reference from routine ID string. @@ -563,3 +595,124 @@ def __str__(self): This is a fully-qualified ID, including the project ID and dataset ID. """ return "{}.{}.{}".format(self.project, self.dataset_id, self.routine_id) + + +class RemoteFunctionOptions(object): + """Configuration options for controlling remote BigQuery functions.""" + + _PROPERTY_TO_API_FIELD = { + "endpoint": "endpoint", + "connection": "connection", + "max_batching_rows": "maxBatchingRows", + "user_defined_context": "userDefinedContext", + } + + def __init__( + self, + endpoint=None, + connection=None, + max_batching_rows=None, + user_defined_context=None, + _properties=None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + + if endpoint is not None: + self.endpoint = endpoint + if connection is not None: + self.connection = connection + if max_batching_rows is not None: + self.max_batching_rows = max_batching_rows + if user_defined_context is not None: + self.user_defined_context = user_defined_context + + @property + def connection(self): + """string: Fully qualified name of the user-provided connection object which holds the authentication information to send requests to the remote service. + + Format is "projects/{projectId}/locations/{locationId}/connections/{connectionId}" + """ + return _helpers._str_or_none(self._properties.get("connection")) + + @connection.setter + def connection(self, value): + self._properties["connection"] = _helpers._str_or_none(value) + + @property + def endpoint(self): + """string: Endpoint of the user-provided remote service + + Example: "https://us-east1-my_gcf_project.cloudfunctions.net/remote_add" + """ + return _helpers._str_or_none(self._properties.get("endpoint")) + + @endpoint.setter + def endpoint(self, value): + self._properties["endpoint"] = _helpers._str_or_none(value) + + @property + def max_batching_rows(self): + """int64: Max number of rows in each batch sent to the remote service. + + If absent or if 0, BigQuery dynamically decides the number of rows in a batch. + """ + return _helpers._int_or_none(self._properties.get("maxBatchingRows")) + + @max_batching_rows.setter + def max_batching_rows(self, value): + self._properties["maxBatchingRows"] = _helpers._str_or_none(value) + + @property + def user_defined_context(self): + """Dict[str, str]: User-defined context as a set of key/value pairs, + which will be sent as function invocation context together with + batched arguments in the requests to the remote service. The total + number of bytes of keys and values must be less than 8KB. + """ + return self._properties.get("userDefinedContext") + + @user_defined_context.setter + def user_defined_context(self, value): + if not isinstance(value, dict): + raise ValueError("value must be dictionary") + self._properties["userDefinedContext"] = value + + @classmethod + def from_api_repr(cls, resource: dict) -> "RemoteFunctionOptions": + """Factory: construct remote function options given its API representation. + + Args: + resource (Dict[str, object]): Resource, as returned from the API. + + Returns: + google.cloud.bigquery.routine.RemoteFunctionOptions: + Python object, as parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this RemoteFunctionOptions. + + Returns: + Dict[str, object]: Remote function options represented as an API resource. + """ + return self._properties + + def __eq__(self, other): + if not isinstance(other, RemoteFunctionOptions): + return NotImplemented + return self._properties == other._properties + + def __ne__(self, other): + return not self == other + + def __repr__(self): + all_properties = [ + "{}={}".format(property_name, repr(getattr(self, property_name))) + for property_name in sorted(self._PROPERTY_TO_API_FIELD) + ] + return "RemoteFunctionOptions({})".format(", ".join(all_properties)) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index ebf34e4cd..20a1bc92f 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -16,7 +16,7 @@ import collections import enum -from typing import Any, Dict, Iterable, Union +from typing import Any, Dict, Iterable, Optional, Union from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -124,7 +124,7 @@ def __init__( name: str, field_type: str, mode: str = "NULLABLE", - default_value_expression: str = None, + default_value_expression: Optional[str] = None, description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, fields: Iterable["SchemaField"] = (), policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 93b0da67f..462447d51 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -228,7 +228,7 @@ def __init__(self, dataset_ref: "DatasetReference", table_id: str): @classmethod def from_string( - cls, table_id: str, default_project: str = None + cls, table_id: str, default_project: Optional[str] = None ) -> "TableReference": """Construct a table reference from table ID string. @@ -687,7 +687,11 @@ def partition_expiration(self, value): if self.time_partitioning is None: self._properties[api_field] = {"type": TimePartitioningType.DAY} - self._properties[api_field]["expirationMs"] = str(value) + + if value is None: + self._properties[api_field]["expirationMs"] = None + else: + self._properties[api_field]["expirationMs"] = str(value) @property def clustering_fields(self): @@ -1741,7 +1745,7 @@ def to_arrow_iterable( # changes to job.QueryJob.to_arrow() def to_arrow( self, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, ) -> "pyarrow.Table": @@ -1928,13 +1932,17 @@ def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_as_object: bool = False, bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE, + datetime_dtype: Union[Any, None] = None, + time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, + timestamp_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1999,7 +2007,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 int_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) @@ -2009,7 +2017,7 @@ def to_dataframe( Integer types can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 float_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) @@ -2019,7 +2027,7 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 string_dtype (Optional[pandas.Series.dtype, None]): If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to @@ -2029,7 +2037,50 @@ def to_dataframe( type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type - .. versionadded:: 3.7.1 + .. versionadded:: 3.8.0 + + date_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date + type, instead of relying on the default ``db_dtypes.DateDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Date type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type + + .. versionadded:: 3.10.0 + + datetime_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime + type, instead of relying on the default ``numpy.dtype("datetime64[ns]``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type + + .. versionadded:: 3.10.0 + + time_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time + type, instead of relying on the default ``db_dtypes.TimeDtype()``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("object")``. BigQuery Time type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + + .. versionadded:: 3.10.0 + + timestamp_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. + ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp + type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``. + If you explicitly set the value to ``None``, then the data type will be + ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery + Datetime type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type + + .. versionadded:: 3.10.0 Returns: pandas.DataFrame: @@ -2059,6 +2110,9 @@ def to_dataframe( if int_dtype is DefaultPandasDTypes.INT_DTYPE: int_dtype = pandas.Int64Dtype() + if time_dtype is DefaultPandasDTypes.TIME_DTYPE: + time_dtype = db_dtypes.TimeDtype() + if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) @@ -2071,6 +2125,24 @@ def to_dataframe( if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"): raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE) + if ( + date_dtype is not None + and date_dtype is not DefaultPandasDTypes.DATE_DTYPE + and not hasattr(date_dtype, "__from_arrow__") + ): + raise ValueError("date_dtype", _NO_SUPPORTED_DTYPE) + + if datetime_dtype is not None and not hasattr(datetime_dtype, "__from_arrow__"): + raise ValueError("datetime_dtype", _NO_SUPPORTED_DTYPE) + + if time_dtype is not None and not hasattr(time_dtype, "__from_arrow__"): + raise ValueError("time_dtype", _NO_SUPPORTED_DTYPE) + + if timestamp_dtype is not None and not hasattr( + timestamp_dtype, "__from_arrow__" + ): + raise ValueError("timestamp_dtype", _NO_SUPPORTED_DTYPE) + if dtypes is None: dtypes = {} @@ -2086,25 +2158,29 @@ def to_dataframe( create_bqstorage_client=create_bqstorage_client, ) - # When converting date or timestamp values to nanosecond precision, the result - # can be out of pyarrow bounds. To avoid the error when converting to - # Pandas, we set the date_as_object or timestamp_as_object parameter to True, - # if necessary. - date_as_object = not all( - self.__can_cast_timestamp_ns(col) - for col in record_batch - # Type can be date32 or date64 (plus units). - # See: https://arrow.apache.org/docs/python/api/datatypes.html - if pyarrow.types.is_date(col.type) - ) + # Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error, + # when pyarrow converts date values to nanosecond precision. To avoid the error, we + # set the date_as_object parameter to True, if necessary. + date_as_object = False + if date_dtype is DefaultPandasDTypes.DATE_DTYPE: + date_dtype = db_dtypes.DateDtype() + date_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be date32 or date64 (plus units). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if pyarrow.types.is_date(col.type) + ) - timestamp_as_object = not all( - self.__can_cast_timestamp_ns(col) - for col in record_batch - # Type can be datetime and timestamp (plus units and time zone). - # See: https://arrow.apache.org/docs/python/api/datatypes.html - if pyarrow.types.is_timestamp(col.type) - ) + timestamp_as_object = False + if datetime_dtype is None and timestamp_dtype is None: + timestamp_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be datetime and timestamp (plus units and time zone). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if pyarrow.types.is_timestamp(col.type) + ) if len(record_batch) > 0: df = record_batch.to_pandas( @@ -2117,6 +2193,10 @@ def to_dataframe( int_dtype=int_dtype, float_dtype=float_dtype, string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, ), ) else: @@ -2150,7 +2230,7 @@ def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, - progress_bar_type: str = None, + progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, ) -> "geopandas.GeoDataFrame": @@ -2317,6 +2397,10 @@ def to_dataframe( int_dtype=None, float_dtype=None, string_dtype=None, + date_dtype=None, + datetime_dtype=None, + time_dtype=None, + timestamp_dtype=None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2330,6 +2414,10 @@ def to_dataframe( int_dtype (Any): Ignored. Added for compatibility with RowIterator. float_dtype (Any): Ignored. Added for compatibility with RowIterator. string_dtype (Any): Ignored. Added for compatibility with RowIterator. + date_dtype (Any): Ignored. Added for compatibility with RowIterator. + datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. + time_dtype (Any): Ignored. Added for compatibility with RowIterator. + timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0bc275357..a97ccc0c8 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.9.0" +__version__ = "3.11.4" diff --git a/noxfile.py b/noxfile.py index 8464e4980..3c9ba5eb5 100644 --- a/noxfile.py +++ b/noxfile.py @@ -375,7 +375,9 @@ def lint(session): serious code quality issues. """ - session.install("flake8", BLACK_VERSION) + # Pin flake8 to 6.0.0 + # See https://github.com/googleapis/python-bigquery/issues/1635 + session.install("flake8==6.0.0", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") @@ -402,7 +404,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.9") def docs(session): """Build the docs.""" @@ -425,13 +427,15 @@ def docs(session): ) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.9") def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") session.install( - "sphinx==4.0.2", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "gcp-sphinx-docfx-yaml", + "alabaster", + "recommonmark", ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index e0ec46254..6585a560a 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.2.2 -mock==5.0.1 +pytest==7.4.0 +mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7c6d98192..db17aeddf 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,45 +1,45 @@ -attrs==22.2.0 -certifi==2022.12.7 +attrs==23.1.0 +certifi==2023.7.22 cffi==1.15.1 -charset-normalizer==3.1.0 -click==8.1.3 +charset-normalizer==3.2.0 +click==8.1.6 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.1.1 -Fiona==1.9.2 +Fiona==1.9.4.post1 geojson==3.0.1 geopandas===0.10.2; python_version == '3.7' -geopandas==0.12.2; python_version >= '3.8' -google-api-core==2.11.0 -google-auth==2.17.2 -google-cloud-bigquery==3.9.0 -google-cloud-bigquery-storage==2.19.1 -google-cloud-core==2.3.2 +geopandas==0.13.2; python_version >= '3.8' +google-api-core==2.11.1 +google-auth==2.22.0 +google-cloud-bigquery==3.11.4 +google-cloud-bigquery-storage==2.22.0 +google-cloud-core==2.3.3 google-crc32c==1.5.0 -google-resumable-media==2.4.1 -googleapis-common-protos==1.59.0 -grpcio==1.53.0 +google-resumable-media==2.5.0 +googleapis-common-protos==1.60.0 +grpcio==1.56.2 idna==3.4 -libcst==0.4.9 -munch==2.5.0 +libcst==1.0.1 +munch==4.0.0 mypy-extensions==1.0.0 -packaging==23.0 +packaging==23.1 pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' -proto-plus==1.22.2 -pyarrow==11.0.0 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 +pandas==2.0.3; python_version >= '3.8' +proto-plus==1.22.3 +pyarrow==12.0.1 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 pycparser==2.21 -pyparsing==3.0.9 +pyparsing==3.1.1 python-dateutil==2.8.2 pytz==2023.3 -PyYAML==6.0 -requests==2.28.2 +PyYAML==6.0.1 +requests==2.31.0 rsa==4.9 Shapely==2.0.1 six==1.16.0 -typing-extensions==4.5.0 -typing-inspect==0.8.0 +typing-extensions==4.7.1 +typing-inspect==0.9.0 urllib3==1.26.15 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 3ed7558d5..514f09705 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.2 -mock==5.0.1 +pytest==7.4.0 +mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 2d7e464a8..ae61f71ff 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,15 +1,15 @@ db-dtypes==1.1.1 -google-cloud-bigquery-storage==2.19.1 +google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.53.0 -ipywidgets==8.0.6 +grpcio==1.56.2 +ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.12.0; python_version >= '3.9' +ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.1; python_version >= '3.8' +matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' -pyarrow==11.0.0 +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1 pytz==2023.3 -typing-extensions==4.5.0 +typing-extensions==4.7.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 3ed7558d5..514f09705 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ google-cloud-testutils==1.3.3 -pytest==7.2.2 -mock==5.0.1 +pytest==7.4.0 +mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8f14d0dc1..0541486c0 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,16 +1,16 @@ db-dtypes==1.1.1 -google-cloud-bigquery==3.9.0 -google-cloud-bigquery-storage==2.19.1 +google-cloud-bigquery==3.11.4 +google-cloud-bigquery-storage==2.22.0 google-auth-oauthlib==1.0.0 -grpcio==1.53.0 -ipywidgets==8.0.6 +grpcio==1.56.2 +ipywidgets==8.1.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' -ipython==8.12.0; python_version >= '3.9' +ipython==8.14.0; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' -matplotlib==3.7.1; python_version >= '3.8' +matplotlib==3.7.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' -pandas==2.0.0; python_version >= '3.8' -pyarrow==11.0.0 +pandas==2.0.3; python_version >= '3.8' +pyarrow==12.0.1 pytz==2023.3 -typing-extensions==4.5.0 +typing-extensions==4.7.1 diff --git a/samples/tests/test_copy_table.py b/samples/tests/test_copy_table.py index d5a6c121e..3953e3162 100644 --- a/samples/tests/test_copy_table.py +++ b/samples/tests/test_copy_table.py @@ -28,8 +28,6 @@ def test_copy_table( random_table_id: str, client: "bigquery.Client", ) -> None: - pytest.skip("b/210907595: copy fails for shakespeare table") - copy_table.copy_table(table_with_data_id, random_table_id) out, err = capsys.readouterr() assert "A copy of the table created." in out diff --git a/samples/tests/test_copy_table_cmek.py b/samples/tests/test_copy_table_cmek.py index 1bdec2f35..7cac15723 100644 --- a/samples/tests/test_copy_table_cmek.py +++ b/samples/tests/test_copy_table_cmek.py @@ -23,8 +23,6 @@ def test_copy_table_cmek( table_with_data_id: str, kms_key_name: str, ) -> None: - pytest.skip("b/210907595: copy fails for shakespeare table") - copy_table_cmek.copy_table_cmek(random_table_id, table_with_data_id, kms_key_name) out, err = capsys.readouterr() assert "A copy of the table created" in out diff --git a/scripts/decrypt-secrets.sh b/scripts/decrypt-secrets.sh index 21f6d2a26..0018b421d 100755 --- a/scripts/decrypt-secrets.sh +++ b/scripts/decrypt-secrets.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2015 Google Inc. All rights reserved. +# Copyright 2023 Google LLC All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/readme-gen/readme_gen.py b/scripts/readme-gen/readme_gen.py index 91b59676b..1acc11983 100644 --- a/scripts/readme-gen/readme_gen.py +++ b/scripts/readme-gen/readme_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2016 Google Inc +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,17 +33,17 @@ autoescape=True, ) -README_TMPL = jinja_env.get_template('README.tmpl.rst') +README_TMPL = jinja_env.get_template("README.tmpl.rst") def get_help(file): - return subprocess.check_output(['python', file, '--help']).decode() + return subprocess.check_output(["python", file, "--help"]).decode() def main(): parser = argparse.ArgumentParser() - parser.add_argument('source') - parser.add_argument('--destination', default='README.rst') + parser.add_argument("source") + parser.add_argument("--destination", default="README.rst") args = parser.parse_args() @@ -51,9 +51,9 @@ def main(): root = os.path.dirname(source) destination = os.path.join(root, args.destination) - jinja_env.globals['get_help'] = get_help + jinja_env.globals["get_help"] = get_help - with io.open(source, 'r') as f: + with io.open(source, "r") as f: config = yaml.load(f) # This allows get_help to execute in the right directory. @@ -61,9 +61,9 @@ def main(): output = README_TMPL.render(config) - with io.open(destination, 'w') as f: + with io.open(destination, "w") as f: f.write(output) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/setup.cfg b/setup.cfg index 25892161f..37b63aa49 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 1437328a8..8fd532f4c 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1358,8 +1358,6 @@ def test_extract_table(self): self.assertIn("Bharney Rhubble", got) def test_copy_table(self): - pytest.skip("b/210907595: copy fails for shakespeare table") - # If we create a new table to copy from, the test won't work # because the new rows will be stored in the streaming buffer, # and copy jobs don't read the streaming buffer. @@ -1706,8 +1704,8 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): cursor.execute( """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp + FROM `bigquery-public-data.hacker_news.full` ORDER BY `id` ASC LIMIT 100000 """ @@ -1717,27 +1715,28 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): field_name = operator.itemgetter(0) fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] - # Since DB API is not thread safe, only a single result stream should be # requested by the BQ storage client, meaning that results should arrive # in the sorted order. + expected_data = [ [ - ("by", "sama"), - ("id", 15), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), + ("by", "pg"), + ("id", 1), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 21, 51, tzinfo=UTC)), ], [ - ("by", "pg"), - ("id", 17), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), + ("by", "phyllis"), + ("id", 2), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 30, 28, tzinfo=UTC)), ], [ - ("by", "pg"), - ("id", 22), - ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), + ("by", "phyllis"), + ("id", 3), + ("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)), ], ] + self.assertEqual(fetched_data, expected_data) def test_dbapi_dry_run_query(self): @@ -1769,8 +1768,8 @@ def test_dbapi_connection_does_not_leak_sockets(self): cursor.execute( """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp + FROM `bigquery-public-data.hacker_news.full` ORDER BY `id` ASC LIMIT 100000 """ diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 91305b450..726b68f7c 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -34,6 +34,7 @@ pandas = pytest.importorskip("pandas", minversion="0.23.0") +pyarrow = pytest.importorskip("pyarrow") numpy = pytest.importorskip("numpy") bigquery_storage = pytest.importorskip( @@ -739,8 +740,8 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( def test_query_results_to_dataframe(bigquery_client): QUERY = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp, dead + FROM `bigquery-public-data.hacker_news.full` LIMIT 10 """ @@ -748,12 +749,12 @@ def test_query_results_to_dataframe(bigquery_client): assert isinstance(df, pandas.DataFrame) assert len(df) == 10 # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] + column_names = ["id", "by", "timestamp", "dead"] assert list(df) == column_names # verify the column names exp_datatypes = { "id": int, - "author": str, - "time_ts": pandas.Timestamp, + "by": str, + "timestamp": pandas.Timestamp, "dead": bool, } for _, row in df.iterrows(): @@ -765,8 +766,8 @@ def test_query_results_to_dataframe(bigquery_client): def test_query_results_to_dataframe_w_bqstorage(bigquery_client): query = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` + SELECT id, `by`, timestamp, dead + FROM `bigquery-public-data.hacker_news.full` LIMIT 10 """ @@ -778,12 +779,12 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client): assert isinstance(df, pandas.DataFrame) assert len(df) == 10 # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] + column_names = ["id", "by", "timestamp", "dead"] assert list(df) == column_names exp_datatypes = { "id": int, - "author": str, - "time_ts": pandas.Timestamp, + "by": str, + "timestamp": pandas.Timestamp, "dead": bool, } for index, row in df.iterrows(): @@ -1109,6 +1110,103 @@ def test_list_rows_nullable_scalars_extreme_dtypes( assert df.dtypes["string_col"].name == "object" +@pytest.mark.parametrize( + ("max_results",), + ( + (None,), + (10,), + ), # Use BQ Storage API. # Use REST API. +) +def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( + bigquery_client, scalars_extreme_table, max_results +): + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + + df = bigquery_client.list_rows( + scalars_extreme_table, + max_results=max_results, + selected_fields=schema, + ).to_dataframe( + bool_dtype=pandas.BooleanDtype(), + int_dtype=pandas.Int64Dtype(), + float_dtype=( + pandas.Float64Dtype() + if hasattr(pandas, "Float64Dtype") + else pandas.StringDtype() + ), + string_dtype=pandas.StringDtype(), + date_dtype=( + pandas.ArrowDtype(pyarrow.date32()) + if hasattr(pandas, "ArrowDtype") + else None + ), + datetime_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + time_dtype=( + pandas.ArrowDtype(pyarrow.time64("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + timestamp_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC")) + if hasattr(pandas, "ArrowDtype") + else None + ), + ) + + # These pandas dtypes are handled by the custom dtypes. + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["float64_col"].name == "Float64" + assert df.dtypes["int64_col"].name == "Int64" + assert df.dtypes["string_col"].name == "string" + + assert ( + df.dtypes["date_col"].name == "date32[day][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "datetime64[ns]" + ) + assert ( + df.dtypes["datetime_col"].name == "timestamp[us][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + assert ( + df.dtypes["timestamp_col"].name == "timestamp[us, tz=UTC][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + assert ( + df.dtypes["time_col"].name == "time64[us][pyarrow]" + if hasattr(pandas, "ArrowDtype") + else "object" + ) + + # decimal.Decimal is used to avoid loss of precision. + assert df.dtypes["numeric_col"].name == "object" + assert df.dtypes["bignumeric_col"].name == "object" + + # pandas uses Python bytes objects. + assert df.dtypes["bytes_col"].name == "object" + + def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index a9760aa9b..a662e92d4 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -18,6 +18,7 @@ from google.api_core import exceptions import google.api_core.retry +from google.api_core.future import polling import mock import pytest @@ -970,7 +971,7 @@ def test_result_default_wo_state(self): client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - self.assertIs(job.result(), job) + self.assertIs(job.result(retry=polling.DEFAULT_RETRY), job) begin_call = mock.call( method="POST", diff --git a/tests/unit/routine/test_remote_function_options.py b/tests/unit/routine/test_remote_function_options.py new file mode 100644 index 000000000..b476dca1e --- /dev/null +++ b/tests/unit/routine/test_remote_function_options.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +ENDPOINT = "https://some.endpoint" +CONNECTION = "connection_string" +MAX_BATCHING_ROWS = 50 +USER_DEFINED_CONTEXT = { + "foo": "bar", +} + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine import RemoteFunctionOptions + + return RemoteFunctionOptions + + +def test_ctor(target_class): + + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + assert options.endpoint == ENDPOINT + assert options.connection == CONNECTION + assert options.max_batching_rows == MAX_BATCHING_ROWS + assert options.user_defined_context == USER_DEFINED_CONTEXT + + +def test_empty_ctor(target_class): + options = target_class() + assert options._properties == {} + options = target_class(_properties=None) + assert options._properties == {} + options = target_class(_properties={}) + assert options._properties == {} + + +def test_ctor_bad_context(target_class): + with pytest.raises(ValueError, match="value must be dictionary"): + target_class(user_defined_context=[1, 2, 3, 4]) + + +def test_from_api_repr(target_class): + resource = { + "endpoint": ENDPOINT, + "connection": CONNECTION, + "maxBatchingRows": MAX_BATCHING_ROWS, + "userDefinedContext": USER_DEFINED_CONTEXT, + "someRandomField": "someValue", + } + options = target_class.from_api_repr(resource) + assert options.endpoint == ENDPOINT + assert options.connection == CONNECTION + assert options.max_batching_rows == MAX_BATCHING_ROWS + assert options.user_defined_context == USER_DEFINED_CONTEXT + assert options._properties["someRandomField"] == "someValue" + + +def test_from_api_repr_w_minimal_resource(target_class): + resource = {} + options = target_class.from_api_repr(resource) + assert options.endpoint is None + assert options.connection is None + assert options.max_batching_rows is None + assert options.user_defined_context is None + + +def test_from_api_repr_w_unknown_fields(target_class): + resource = {"thisFieldIsNotInTheProto": "just ignore me"} + options = target_class.from_api_repr(resource) + assert options._properties is resource + + +def test_eq(target_class): + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + other_options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + assert options == other_options + assert not (options != other_options) + + empty_options = target_class() + assert not (options == empty_options) + assert options != empty_options + + notanarg = object() + assert not (options == notanarg) + assert options != notanarg + + +def test_repr(target_class): + options = target_class( + endpoint=ENDPOINT, + connection=CONNECTION, + max_batching_rows=MAX_BATCHING_ROWS, + user_defined_context=USER_DEFINED_CONTEXT, + ) + actual_repr = repr(options) + assert actual_repr == ( + "RemoteFunctionOptions(connection='connection_string', endpoint='https://some.endpoint', max_batching_rows=50, user_defined_context={'foo': 'bar'})" + ) diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index 80a3def73..87767200c 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -75,6 +75,13 @@ def test_ctor_w_properties(target_class): description = "A routine description." determinism_level = bigquery.DeterminismLevel.NOT_DETERMINISTIC + options = bigquery.RemoteFunctionOptions( + endpoint="https://some.endpoint", + connection="connection_string", + max_batching_rows=99, + user_defined_context={"foo": "bar"}, + ) + actual_routine = target_class( routine_id, arguments=arguments, @@ -84,6 +91,7 @@ def test_ctor_w_properties(target_class): type_=type_, description=description, determinism_level=determinism_level, + remote_function_options=options, ) ref = RoutineReference.from_string(routine_id) @@ -97,6 +105,18 @@ def test_ctor_w_properties(target_class): assert ( actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC ) + assert actual_routine.remote_function_options == options + + +def test_ctor_invalid_remote_function_options(target_class): + with pytest.raises( + ValueError, + match=".*must be google.cloud.bigquery.routine.RemoteFunctionOptions.*", + ): + target_class( + "my-proj.my_dset.my_routine", + remote_function_options=object(), + ) def test_from_api_repr(target_class): @@ -126,6 +146,14 @@ def test_from_api_repr(target_class): "someNewField": "someValue", "description": "A routine description.", "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, + "remoteFunctionOptions": { + "endpoint": "https://some.endpoint", + "connection": "connection_string", + "maxBatchingRows": 50, + "userDefinedContext": { + "foo": "bar", + }, + }, } actual_routine = target_class.from_api_repr(resource) @@ -160,6 +188,10 @@ def test_from_api_repr(target_class): assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." assert actual_routine.determinism_level == "DETERMINISTIC" + assert actual_routine.remote_function_options.endpoint == "https://some.endpoint" + assert actual_routine.remote_function_options.connection == "connection_string" + assert actual_routine.remote_function_options.max_batching_rows == 50 + assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"} def test_from_api_repr_tvf_function(target_class): @@ -261,6 +293,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.type_ is None assert actual_routine.description is None assert actual_routine.determinism_level is None + assert actual_routine.remote_function_options is None def test_from_api_repr_w_unknown_fields(target_class): @@ -421,6 +454,24 @@ def test_from_api_repr_w_unknown_fields(target_class): ["someNewField"], {"someNewField": "someValue"}, ), + ( + { + "routineType": "SCALAR_FUNCTION", + "remoteFunctionOptions": { + "endpoint": "https://some_endpoint", + "connection": "connection_string", + "max_batching_rows": 101, + }, + }, + ["remote_function_options"], + { + "remoteFunctionOptions": { + "endpoint": "https://some_endpoint", + "connection": "connection_string", + "max_batching_rows": 101, + }, + }, + ), ], ) def test_build_resource(object_under_test, resource, filter_fields, expected): @@ -497,6 +548,12 @@ def test_set_description_w_none(object_under_test): assert object_under_test._properties["description"] is None +def test_set_remote_function_options_w_none(object_under_test): + object_under_test.remote_function_options = None + assert object_under_test.remote_function_options is None + assert object_under_test._properties["remoteFunctionOptions"] is None + + def test_repr(target_class): model = target_class("my-proj.my_dset.my_routine") actual_routine = repr(model) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c155e2bc6..cf0aa4028 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5092,12 +5092,14 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): QueryJob, "_begin", side_effect=job_create_error ) get_job_patcher = mock.patch.object( - client, "get_job", side_effect=DataLoss("we lost yor job, sorry") + client, "get_job", side_effect=DataLoss("we lost your job, sorry") ) with job_begin_patcher, get_job_patcher: - # If get job request fails, the original exception should be raised. - with pytest.raises(Conflict, match="Job already exists."): + # If get job request fails but supposedly there does exist a job + # with this ID already, raise the exception explaining why we + # couldn't recover the job. + with pytest.raises(DataLoss, match="we lost your job, sorry"): client.query("SELECT 1;", job_id=None) def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 9bdd7b596..a221bc89e 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -46,6 +46,7 @@ PYARROW_VERSION = pkg_resources.parse_version("0.0.1") if pyarrow: + import pyarrow import pyarrow.types PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) @@ -1189,6 +1190,25 @@ def test_to_api_repr_w_custom_field(self): } self.assertEqual(resource, exp_resource) + def test_to_api_repr_w_unsetting_expiration(self): + from google.cloud.bigquery.table import TimePartitioningType + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.partition_expiration = None + resource = table.to_api_repr() + + exp_resource = { + "tableReference": table_ref.to_api_repr(), + "labels": {}, + "timePartitioning": { + "expirationMs": None, + "type": TimePartitioningType.DAY, + }, + } + self.assertEqual(resource, exp_resource) + def test__build_resource_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -3471,11 +3491,45 @@ def test_to_dataframe_w_dtypes_mapper(self): SchemaField("age", "INTEGER"), SchemaField("seconds", "INT64"), SchemaField("miles", "FLOAT64"), + SchemaField("date", "DATE"), + SchemaField("datetime", "DATETIME"), + SchemaField("time", "TIME"), + SchemaField("timestamp", "TIMESTAMP"), ] row_data = [ - ["Phred Phlyntstone", "true", "32", "23000", "1.77"], - ["Bharney Rhubble", "false", "33", "454000", "6.66"], - ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + [ + "Phred Phlyntstone", + "true", + "32", + "23000", + "1.77", + "1999-12-01", + "1999-12-31T00:00:00.000000", + "00:00:00.000000", + "1433836800000000", + ], + [ + "Bharney Rhubble", + "false", + "33", + "454000", + "6.66", + "4567-06-14", + "4567-12-31T00:00:00.000000", + "12:00:00.232413", + "81953424000000000", + ], + [ + "Wylma Phlyntstone", + "true", + "29", + "341000", + "2.0", + "9999-12-31", + "9999-12-31T23:59:59.999999", + "23:59:59.999999", + "253402261199999999", + ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3492,18 +3546,136 @@ def test_to_dataframe_w_dtypes_mapper(self): else pandas.StringDtype() ), string_dtype=pandas.StringDtype(), + date_dtype=( + pandas.ArrowDtype(pyarrow.date32()) + if hasattr(pandas, "ArrowDtype") + else None + ), + datetime_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + time_dtype=( + pandas.ArrowDtype(pyarrow.time64("us")) + if hasattr(pandas, "ArrowDtype") + else None + ), + timestamp_dtype=( + pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC")) + if hasattr(pandas, "ArrowDtype") + else None + ), ) self.assertIsInstance(df, pandas.DataFrame) + + self.assertEqual(list(df.complete), [True, False, True]) self.assertEqual(df.complete.dtype.name, "boolean") + + self.assertEqual(list(df.age), [32, 33, 29]) self.assertEqual(df.age.dtype.name, "Int32") + + self.assertEqual(list(df.seconds), [23000, 454000, 341000]) self.assertEqual(df.seconds.dtype.name, "Int32") + self.assertEqual( - df.miles.dtype.name, - "Float64" if hasattr(pandas, "Float64Dtype") else "string", + list(df.name), ["Phred Phlyntstone", "Bharney Rhubble", "Wylma Phlyntstone"] ) self.assertEqual(df.name.dtype.name, "string") + if hasattr(pandas, "Float64Dtype"): + self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) + self.assertEqual(df.miles.dtype.name, "Float64") + else: + self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) + self.assertEqual(df.miles.dtype.name, "string") + + if hasattr(pandas, "ArrowDtype"): + self.assertEqual( + list(df.date), + [ + datetime.date(1999, 12, 1), + datetime.date(4567, 6, 14), + datetime.date(9999, 12, 31), + ], + ) + self.assertEqual(df.date.dtype.name, "date32[day][pyarrow]") + + self.assertEqual( + list(df.datetime), + [ + datetime.datetime(1999, 12, 31, 0, 0), + datetime.datetime(4567, 12, 31, 0, 0), + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ) + self.assertEqual(df.datetime.dtype.name, "timestamp[us][pyarrow]") + + self.assertEqual( + list(df.time), + [ + datetime.time(0, 0), + datetime.time(12, 0, 0, 232413), + datetime.time(23, 59, 59, 999999), + ], + ) + self.assertEqual(df.time.dtype.name, "time64[us][pyarrow]") + + self.assertEqual( + list(df.timestamp), + [ + datetime.datetime(2015, 6, 9, 8, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(4567, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime( + 9999, 12, 31, 12, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + ], + ) + self.assertEqual(df.timestamp.dtype.name, "timestamp[us, tz=UTC][pyarrow]") + else: + self.assertEqual( + list(df.date), + [ + pandas.Timestamp("1999-12-01 00:00:00"), + pandas.Timestamp("2229-03-27 01:41:45.161793536"), + pandas.Timestamp("1816-03-29 05:56:08.066277376"), + ], + ) + self.assertEqual(df.date.dtype.name, "datetime64[ns]") + + self.assertEqual( + list(df.datetime), + [ + datetime.datetime(1999, 12, 31, 0, 0), + datetime.datetime(4567, 12, 31, 0, 0), + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ) + self.assertEqual(df.datetime.dtype.name, "object") + + self.assertEqual( + list(df.time), + [ + datetime.time(0, 0), + datetime.time(12, 0, 0, 232413), + datetime.time(23, 59, 59, 999999), + ], + ) + self.assertEqual(df.time.dtype.name, "object") + + self.assertEqual( + list(df.timestamp), + [ + datetime.datetime(2015, 6, 9, 8, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(4567, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime( + 9999, 12, 31, 12, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + ], + ) + self.assertEqual(df.timestamp.dtype.name, "object") + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_none_dtypes_mapper(self): from google.cloud.bigquery.schema import SchemaField @@ -3514,11 +3686,23 @@ def test_to_dataframe_w_none_dtypes_mapper(self): SchemaField("age", "INTEGER"), SchemaField("seconds", "INT64"), SchemaField("miles", "FLOAT64"), + SchemaField("date", "DATE"), + SchemaField("datetime", "DATETIME"), + SchemaField("time", "TIME"), + SchemaField("timestamp", "TIMESTAMP"), ] row_data = [ - ["Phred Phlyntstone", "true", "32", "23000", "1.77"], - ["Bharney Rhubble", "false", "33", "454000", "6.66"], - ["Wylma Phlyntstone", "true", "29", "341000", "2.0"], + [ + "Phred Phlyntstone", + "true", + "32", + "23000", + "1.77", + "1999-12-01", + "1999-12-31T00:00:00.000000", + "23:59:59.999999", + "1433836800000000", + ], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3531,6 +3715,10 @@ def test_to_dataframe_w_none_dtypes_mapper(self): int_dtype=None, float_dtype=None, string_dtype=None, + date_dtype=None, + datetime_dtype=None, + time_dtype=None, + timestamp_dtype=None, ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(df.complete.dtype.name, "bool") @@ -3538,6 +3726,10 @@ def test_to_dataframe_w_none_dtypes_mapper(self): self.assertEqual(df.seconds.dtype.name, "int64") self.assertEqual(df.miles.dtype.name, "float64") self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.date.dtype.name, "datetime64[ns]") + self.assertEqual(df.datetime.dtype.name, "datetime64[ns]") + self.assertEqual(df.time.dtype.name, "object") + self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]") @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_unsupported_dtypes_mapper(self): @@ -3575,6 +3767,26 @@ def test_to_dataframe_w_unsupported_dtypes_mapper(self): create_bqstorage_client=False, string_dtype=numpy.dtype("object"), ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + date_dtype=numpy.dtype("object"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + datetime_dtype=numpy.dtype("datetime64[us]"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + time_dtype=numpy.dtype("datetime64[us]"), + ) + with self.assertRaises(ValueError): + row_iterator.to_dataframe( + create_bqstorage_client=False, + timestamp_dtype=numpy.dtype("datetime64[us]"), + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_column_dtypes(self):