Skip to content

Commit 83cd170

Browse files
authored
Merge pull request #379 from J-B-Mugundh/main
Implemented Caching in loaded datasets for Improved Performance
2 parents ed0dc7f + dd41b12 commit 83cd170

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

streamlit/home.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,21 @@
99
# DATA LOADING
1010
#######################################
1111

12-
st.set_page_config(layout='wide')
12+
st.set_page_config(layout='wide')
13+
14+
@st.cache_data # Caching data loading functions
15+
def load_data(url):
16+
return pd.read_csv(url)
1317

1418
# Loading data files from the 'streamlit' directory
15-
df = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv')
16-
df2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv')
17-
full_data2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv')
18-
full_data2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv')
19-
full_df2020 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv')
20-
df2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv')
21-
df2021 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv')
22-
df2022 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv')
19+
df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv')
20+
df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv')
21+
full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv')
22+
full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv')
23+
full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv')
24+
df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv')
25+
df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv')
26+
df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv')
2327

2428
# Filter the 2020 dataframe
2529
df2020 = df[df['SalaryUSD'] < 200000]
@@ -109,12 +113,14 @@
109113
}
110114
df_ai.replace(short_mapping, inplace=True)
111115

116+
@st.cache_data
112117
def mean_salary(df):
113118
mean_salary = df[df['SalaryUSD'] <= 1000000]['SalaryUSD'].mean()
114119
df.loc[df['SalaryUSD'] > 1000000, 'SalaryUSD'] = mean_salary
115120
return df
116121

117122
# Function to create value count plots for each column
123+
@st.cache_data
118124
def plot_value_counts(column_name):
119125
colors = ['skyblue', 'yellow']
120126
fig = px.bar(df_ai[column_name].value_counts().reset_index(), x='index', y=column_name, color_discrete_sequence=[random.choice(colors)])

0 commit comments

Comments
 (0)