| 
9 | 9 | # DATA LOADING  | 
10 | 10 | #######################################  | 
11 | 11 | 
 
  | 
12 |  | -st.set_page_config(layout='wide')  | 
 | 12 | +st.set_page_config(layout='wide')   | 
 | 13 | +   | 
 | 14 | +@st.cache_data # Caching data loading functions  | 
 | 15 | +def load_data(url):  | 
 | 16 | +    return pd.read_csv(url)  | 
13 | 17 | 
 
  | 
14 | 18 | # Loading data files from the 'streamlit' directory  | 
15 |  | -df = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv')  | 
16 |  | -df2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv')  | 
17 |  | -full_data2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv')  | 
18 |  | -full_data2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv')  | 
19 |  | -full_df2020 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv')  | 
20 |  | -df2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv')  | 
21 |  | -df2021 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv')  | 
22 |  | -df2022 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv')  | 
 | 19 | +df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv')  | 
 | 20 | +df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv')  | 
 | 21 | +full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv')  | 
 | 22 | +full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv')  | 
 | 23 | +full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv')  | 
 | 24 | +df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv')  | 
 | 25 | +df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv')  | 
 | 26 | +df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv')  | 
23 | 27 | 
 
  | 
24 | 28 | # Filter the 2020 dataframe  | 
25 | 29 | df2020 = df[df['SalaryUSD'] < 200000]  | 
 | 
109 | 113 | }  | 
110 | 114 | df_ai.replace(short_mapping, inplace=True)  | 
111 | 115 | 
 
  | 
 | 116 | +@st.cache_data  | 
112 | 117 | def mean_salary(df):  | 
113 | 118 |     mean_salary = df[df['SalaryUSD'] <= 1000000]['SalaryUSD'].mean()  | 
114 | 119 |     df.loc[df['SalaryUSD'] > 1000000, 'SalaryUSD'] = mean_salary  | 
115 | 120 |     return df  | 
116 | 121 | 
 
  | 
117 | 122 | # Function to create value count plots for each column  | 
 | 123 | +@st.cache_data  | 
118 | 124 | def plot_value_counts(column_name):  | 
119 | 125 |     colors = ['skyblue', 'yellow']  | 
120 | 126 |     fig = px.bar(df_ai[column_name].value_counts().reset_index(), x='index', y=column_name, color_discrete_sequence=[random.choice(colors)])  | 
 | 
0 commit comments