Skip to content

Commit

Permalink
Add an option to process only last rows (#159)
Browse files Browse the repository at this point in the history
* Add an option to process only last rows

* docs

* format

* changelog

* fixes

* adding more logging

* format

* adjusting for index by 1

* format
  • Loading branch information
annagav authored Jul 1, 2024
1 parent 3af84e3 commit af10c0d
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/mitol/google_sheets/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ MITOL_GOOGLE_SHEETS_DRIVE_CLIENT_SECRET=<Client secret from step 1>
MITOL_GOOGLE_SHEETS_DRIVE_API_PROJECT_ID=<Project ID from step 2>
MITOL_GOOGLE_SHEETS_PROCESSOR_APP_NAME=<Name of the app processing the request>
MITOL_GOOGLE_SHEETS_ENROLLMENT_CHANGE_SHEET_ID=<Change of enrollment request sheet ID from step 3>
MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM=<Optional: the number of rows to process from the bottom>
```


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<!--
A new scriv changelog fragment.
Uncomment the section that is right (remove the HTML comment wrapper).
-->

<!--
### Removed
- A bullet item for the Removed category.
-->
<!--
### Added
- Added MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM to control how many rows
from the spreadsheet are being processed.
-->
<!--
### Changed
- A bullet item for the Changed category.
-->
<!--
### Deprecated
- A bullet item for the Deprecated category.
-->
<!--
### Fixed
- A bullet item for the Fixed category.
-->
<!--
### Security
- A bullet item for the Security category.
-->
7 changes: 7 additions & 0 deletions src/mitol/google_sheets/settings/google_sheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@
"ID of the Google Sheet that contains the enrollment change request worksheets (refunds, transfers, etc)"
),
)
MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM = get_string(
name="MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM",
default=30,
description=(
"Process only the last N rows of data. If set to 0 then process all rows. "
),
)
MITOL_GOOGLE_SHEETS_DATE_FORMAT = get_string(
name="MITOL_GOOGLE_SHEETS_DATE_FORMAT",
default="%m/%d/%Y",
Expand Down
35 changes: 31 additions & 4 deletions src/mitol/google_sheets/sheet_handler_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def worksheet(self):

def get_enumerated_rows(self):
"""
Yields enumerated data rows of a spreadsheet (excluding header row(s))
Yields enumerated data rows of a spreadsheet (excluding header row(s)).
Yields:
Tuple[int, List[str]]: Row index (according to the Google Sheet, NOT zero-indexed) paired with the list
Expand Down Expand Up @@ -146,9 +146,21 @@ def update_sheet_from_results(self, grouped_row_results):
processed_row_results = grouped_row_results.get(ResultType.PROCESSED, [])
if processed_row_results:
self.update_completed_rows(processed_row_results)
log.warning(
"Successfully processed rows in %s (%s): %s",
self.sheet_metadata.sheet_name,
self.sheet_metadata.worksheet_name,
[row_result.row_index for row_result in processed_row_results],
)
failed_row_results = grouped_row_results.get(ResultType.FAILED, [])
if failed_row_results:
self.update_row_errors(failed_row_results)
log.warning(
"Processed rows with errors in %s (%s): %s",
self.sheet_metadata.sheet_name,
self.sheet_metadata.worksheet_name,
[row_result.row_index for row_result in failed_row_results],
)
out_of_sync_row_results = grouped_row_results.get(ResultType.OUT_OF_SYNC, [])
if out_of_sync_row_results:
log.warning(
Expand Down Expand Up @@ -259,7 +271,7 @@ def process_sheet(self, limit_row_index=None):
try:
row_result = self.process_row(row_index, row_data)
except Exception as exc:
log.exception("Error processing row from google sheets")
log.exception("Error processing row %s from google sheets", row_index)
row_result = RowResult(
row_index=row_index,
row_db_record=None,
Expand Down Expand Up @@ -314,14 +326,29 @@ def worksheet(self):
def get_enumerated_rows(self):
# Only yield rows in the spreadsheet that come after the legacy rows
# (i.e.: the rows of data that were manually entered before we started automating this process)
row_count = len(self.worksheet.get_all_values(include_tailing_empty_rows=False))
first_row_to_process = self.start_row
if int(settings.MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM) > 0:
# allow to choose to process only last few rows
new_first_row = (
row_count
- int(settings.MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM)
+ 1
)
first_row_to_process = (
new_first_row if new_first_row > self.start_row else self.start_row
)
logging.warning(
"Going to process the sheet starting with row %s", first_row_to_process
)
return enumerate(
get_data_rows_after_start(
self.worksheet,
start_row=self.start_row,
start_row=first_row_to_process,
start_col=1,
end_col=self.sheet_metadata.num_columns,
),
start=self.start_row,
start=first_row_to_process,
)

def update_completed_rows(self, success_row_results):
Expand Down
2 changes: 2 additions & 0 deletions src/mitol/google_sheets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ def get_data_rows_after_start(
"""
Yields the data rows of a spreadsheet starting with a given row and spanning a given column range
until empty rows are encountered.
If MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM == 0 it will process all rows. Otherwise, it will process
only the last few rows.
Args:
worksheet (pygsheets.worksheet.Worksheet): Worksheet object
Expand Down

0 comments on commit af10c0d

Please sign in to comment.