|
9 | 9 | # DATA LOADING
|
10 | 10 | #######################################
|
11 | 11 |
|
12 |
| -st.set_page_config(layout='wide') |
| 12 | +st.set_page_config(layout='wide') |
| 13 | + |
| 14 | +@st.cache_data # Caching data loading functions |
| 15 | +def load_data(url): |
| 16 | + return pd.read_csv(url) |
13 | 17 |
|
14 | 18 | # Loading data files from the 'streamlit' directory
|
15 |
| -df = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv') |
16 |
| -df2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv') |
17 |
| -full_data2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv') |
18 |
| -full_data2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv') |
19 |
| -full_df2020 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv') |
20 |
| -df2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv') |
21 |
| -df2021 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv') |
22 |
| -df2022 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv') |
| 19 | +df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv') |
| 20 | +df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv') |
| 21 | +full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv') |
| 22 | +full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv') |
| 23 | +full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv') |
| 24 | +df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv') |
| 25 | +df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv') |
| 26 | +df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv') |
23 | 27 |
|
24 | 28 | # Filter the 2020 dataframe
|
25 | 29 | df2020 = df[df['SalaryUSD'] < 200000]
|
|
109 | 113 | }
|
110 | 114 | df_ai.replace(short_mapping, inplace=True)
|
111 | 115 |
|
| 116 | +@st.cache_data |
112 | 117 | def mean_salary(df):
|
113 | 118 | mean_salary = df[df['SalaryUSD'] <= 1000000]['SalaryUSD'].mean()
|
114 | 119 | df.loc[df['SalaryUSD'] > 1000000, 'SalaryUSD'] = mean_salary
|
115 | 120 | return df
|
116 | 121 |
|
117 | 122 | # Function to create value count plots for each column
|
| 123 | +@st.cache_data |
118 | 124 | def plot_value_counts(column_name):
|
119 | 125 | colors = ['skyblue', 'yellow']
|
120 | 126 | fig = px.bar(df_ai[column_name].value_counts().reset_index(), x='index', y=column_name, color_discrete_sequence=[random.choice(colors)])
|
|
0 commit comments