Skip to content

Commit

Permalink
feat: add 'columns' as an alias for 'col_order'
Browse files Browse the repository at this point in the history
  • Loading branch information
kiraksi committed Nov 21, 2023
1 parent 4051266 commit 7abf188
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
3 changes: 2 additions & 1 deletion docs/reading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ destination DataFrame as well as a preferred column order as follows:
'SELECT * FROM `test_dataset.test_table`',
project_id=projectid,
index_col='index_column_name',
col_order=['col1', 'col2', 'col3'])
col_order=['col1', 'col2', 'col3'],
columns=['col1', 'col2'])
Querying with legacy SQL syntax
-------------------------------
Expand Down
6 changes: 6 additions & 0 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ def read_gbq(
project_id=None,
index_col=None,
col_order=None,
columns=None,
reauth=False,
auth_local_webserver=True,
dialect=None,
Expand Down Expand Up @@ -774,6 +775,8 @@ def read_gbq(
col_order : list(str), optional
List of BigQuery column names in the desired order for results
DataFrame.
columns : list(str), optional
List of BigQuery column names to return, alias for col_order
reauth : boolean, default False
Force Google BigQuery to re-authenticate the user. This is useful
if multiple accounts are used.
Expand Down Expand Up @@ -964,6 +967,9 @@ def read_gbq(
'Index column "{0}" does not exist in DataFrame.'.format(index_col)
)

# Creating an alias for col_order, which is columns
col_order = col_order or columns

# Change the order of columns in the DataFrame based on provided list
if col_order is not None:
if sorted(col_order) == sorted(final_df.columns):
Expand Down
20 changes: 20 additions & 0 deletions tests/system/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,26 @@ def test_tokyo(self, tokyo_dataset, tokyo_table, project_id):
)
assert df["max_year"][0] >= 2000

def test_columns_and_col_order(self, project_id):
query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
columns = ["string_2", "string_1"]
col_order = ["string_3", "string_1", "string_2"]
result_frame = gbq.read_gbq(
query,
project_id=project_id,
columns=columns,
col_order=col_order,
credentials=self.credentials,
dialect="standard",
)
correct_frame = DataFrame(
{"string_1": ["a"], "string_2": ["b"], "string_3": ["c"]}
)[col_order]
tm.assert_frame_equal(result_frame, correct_frame)

# Verify that col_order is prioritized over columns
assert sorted(col_order) == sorted(result_frame.columns)


class TestToGBQIntegration(object):
@pytest.fixture(autouse=True, scope="function")
Expand Down

0 comments on commit 7abf188

Please sign in to comment.