diff --git a/docs/reading.rst b/docs/reading.rst index c5e814bf..a1ba0a5e 100644 --- a/docs/reading.rst +++ b/docs/reading.rst @@ -28,7 +28,8 @@ destination DataFrame as well as a preferred column order as follows: 'SELECT * FROM `test_dataset.test_table`', project_id=projectid, index_col='index_column_name', - col_order=['col1', 'col2', 'col3']) + col_order=['col1', 'col2', 'col3'], + columns=['col1', 'col2']) Querying with legacy SQL syntax ------------------------------- diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 26a566d9..9f091d0b 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -733,6 +733,7 @@ def read_gbq( project_id=None, index_col=None, col_order=None, + columns=None, reauth=False, auth_local_webserver=True, dialect=None, @@ -774,6 +775,8 @@ def read_gbq( col_order : list(str), optional List of BigQuery column names in the desired order for results DataFrame. + columns : list(str), optional + List of BigQuery column names to return, alias for col_order reauth : boolean, default False Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. @@ -964,6 +967,9 @@ def read_gbq( 'Index column "{0}" does not exist in DataFrame.'.format(index_col) ) + # Creating an alias for col_order, which is columns + col_order = col_order or columns + # Change the order of columns in the DataFrame based on provided list if col_order is not None: if sorted(col_order) == sorted(final_df.columns): diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 9aac2357..183bcb8a 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -600,6 +600,26 @@ def test_tokyo(self, tokyo_dataset, tokyo_table, project_id): ) assert df["max_year"][0] >= 2000 + def test_columns_and_col_order(self, project_id): + query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" + columns = ["string_2", "string_1"] + col_order = ["string_3", "string_1", "string_2"] + result_frame = gbq.read_gbq( + query, + project_id=project_id, + columns=columns, + col_order=col_order, + credentials=self.credentials, + dialect="standard", + ) + correct_frame = DataFrame( + {"string_1": ["a"], "string_2": ["b"], "string_3": ["c"]} + )[col_order] + tm.assert_frame_equal(result_frame, correct_frame) + + # Verify that col_order is prioritized over columns + assert sorted(col_order) == sorted(result_frame.columns) + class TestToGBQIntegration(object): @pytest.fixture(autouse=True, scope="function")