diff --git a/server/utils/README.md b/server/utils/README.md new file mode 100644 index 000000000..1a00556ac --- /dev/null +++ b/server/utils/README.md @@ -0,0 +1,7 @@ +The utils directory will serve as a location for mutltiple utilty tools. get_request_data_csv.py can be used to make an API call to fetch 311 request data from the 311 data server for a given start date and end date. + +The 311 request data from [lacity.org](https://data.lacity.org/browse?q=MyLA311%20Service%20Request%20Data%20&sortBy=relevance) has 34 columns. The get_request_data_csv.py script can be run from the command line passing the arguments start_date and end_date that lets you retreive the 311 request data from the [311 data server](https://dev-api.311-data.org/docs). The 311 server processes the data from lacity.org. The data cleaning procedure is mentioned [here](https://github.com/hackforla/311-data/blob/dev/docs/data_loading.md). The result is written to a csv file and saved in the current working directory of the user. A preview of the data_final dataframe is printed in the command line. + +Example: `python get_311_request_data_csv.py "2021-01-01" "2021-01-03"` will return 261 rows and 15 columns. + +![image](https://user-images.githubusercontent.com/10836669/188473763-52bc9474-0878-432c-b4e8-6e4ff21dcda2.png) diff --git a/server/utils/get_request_data_csv.py b/server/utils/get_request_data_csv.py new file mode 100644 index 000000000..84e8eefb0 --- /dev/null +++ b/server/utils/get_request_data_csv.py @@ -0,0 +1,50 @@ +import argparse +import requests +import pandas as pd + +REQUESTS_BATCH_SIZE = 10000 + +def get_311_request_data(start_date, end_date): + """Fetches 311 requests from the 311 data server. + + Retreives 311 requests from the 311 data server for a given start_date and end_date. + + Args: + start_date: The date from which the 311 request data have to be collected. Datatype: Datetime. + end_date: The date upto which the 311 request data have to be fetched. Datatype: Datetime. + + Return: + Dataframe data_final is returned with 15 columns. The dataframe is saved as a CSV file ('data_final.csv') in the current directory. + """ + + skip = 0 + all_requests = [] + while True: + url=f'https://dev-api.311-data.org/requests?start_date={start_date}&end_date={end_date}&skip={skip}&limit={REQUESTS_BATCH_SIZE}' + response = requests.get(url) + data = response.json() + all_requests.extend(data) + skip += REQUESTS_BATCH_SIZE + if len(data) < skip: + break + data_final = pd.DataFrame(all_requests) + data_final.sort_values(by='createdDate', inplace = True, ignore_index = True) + return data_final + +def main(): + """Prints out the preview of the dataframe data_final in the command line. + The result is written to a csv file and saved in the current working directory of the user. + """ + + parser = argparse.ArgumentParser(description='Gets 311 request data from the server') + parser.add_argument('start_date', type=str, help='The start date that has to be entered') + parser.add_argument('end_date', type=str, help='The end data that has to be entered') + args = parser.parse_args() + start_date = args.start_date + end_date = args.end_date + data_final = get_311_request_data(start_date, end_date) + data_final.to_csv('data_final.csv') + print(data_final) + +if __name__ == "__main__": + main()