diff --git a/surveys/2020-12-jupyter-survey/README.md b/surveys/2020-12-jupyter-survey/README.md new file mode 100644 index 0000000..9222650 --- /dev/null +++ b/surveys/2020-12-jupyter-survey/README.md @@ -0,0 +1,15 @@ +# 2020 Jupyter Survey + +This folder contains data from the 2020 Jupyter Survey lead by Layne Sadler (@layne-sadler). It includes: +- [all_responses.csv](./data/all_responses.csv) A CSV file containing all responses from multiple choice or matrix questions. +- [text_fields.csv](./data/text_fields.csv) A CSV file containing all responses from text fields. +- [all_responses.ipynb](./all_responses.ipynb) A notebook containing visualizations and analysis from all_responses.csv and text_fields.csv. It requires Python 3.7+ with pandas and ploty_express which are imported at the top of the notebook. It also requires [jupyterlab-plotly](https://www.npmjs.com/package/jupyterlab-plotly) which can be installed with`jupyter labextension install jupyterlab-plotly`. +- [all_responses.html](./all_responses.html) An HTML version of all_responses.ipynb. + +Responses were collected on [SurveyMonkey](https://www.surveymonkey.com/) from December 2020 to February 2021. This survey was open to anyone familiar with Project Jupyter with more direct outreach to those involved in the Jupyter ecosystem, current and former. It was advertised on the [Jupyter blog](https://blog.jupyter.org/), [Project Jupyter Google Group](https://groups.google.com/g/jupyter/), [LinkedIn](https://www.linkedin.com), [binder](https://mybinder.org/), the [Project Jupyter website](https://jupyter.org/), [Jupyter Discourse](https://discourse.jupyter.org/), and [Project Jupyter Twitter](https://twitter.com/ProjectJupyter). + +## Background +This survey was made in response to discussions at [jupyterlab/team-compass #80](https://github.com/jupyterlab/team-compass/issues/80) and was intended to help provide a better sense of the community outside of development-centered meetings and places. It was also meant to help guide the [JupyterLab 4.0](https://github.com/jupyterlab/jupyterlab/issues/9647) roadmap. + +## Credits +Tim George @tgeorgeux, Brian Granger @ellisonbg, Ali Colleen Neff, Isabela Presedo-Floyd @isabela-pf, Luciano Resende @lresende, Layne Sadler @layne-sadler diff --git a/surveys/2020-12-jupyter-survey/all_responses.html b/surveys/2020-12-jupyter-survey/all_responses.html new file mode 100755 index 0000000..ef465af --- /dev/null +++ b/surveys/2020-12-jupyter-survey/all_responses.html @@ -0,0 +1,23215 @@ + + +
+ + +These first few charts are not executed in linear order. I pulled them up from the bottom.
+ +px.bar(weighted_df, x='points', y='question', title='Weighted Pain Points', height=1500, width=900, **points_color_kwargs)
+
px.bar(compt_df, x='points', y='question', title='Jupyter vs Alternatives', height=600, width=800, **points_color_kwargs)
+
px.bar(freq_df, x='weighted_freq', y='question', title='Use Cases by Frequency', height=600, width=800, **freq_color_kwargs)
+
(q3-7) Sound familiar? Predominantly used by data scientists, researchers, and academics using Python for visualization, data wrangling, documenting research [<- needs improvement], and ML.
+(q19-20) Extreme pain points: autocompletion, version control, track changes.
+(q18c) Collaborators are either working on different parts of a project or entirely separate projects.
+(q18-q19) People want to publish to shared location in order to share knowledge [see RStudio Server].
+(q14) Most people either scale vertically or don't know how to scale.
+(q15) The pain points raised in scale, data, and collaboration seem addressable.
+(q6) Usage: Local machine/ venv > Google Colab > JupyterHub > HPC > Docker.
+(q3) Keep an eye on: Julia already half as popular as R and almost matching SQL. Dask as popular as Spark.
+Infrequent: content creation, find ext, dev ext.
+Worse: writing sftw, tests, docs, pipelines, dev ext.
+Usage:
+Big dropoff between monthly and weekly.
+Languages:
+Scala used less than Javascript.
+Env:
+Data sources:
+Pain points - data:
+Low: mvc/ orm
+Pain points - scale:
+Low: batch, budget, spark
+Pain points - UI:
+Low: desktop app, hidden files, finding failed cells, edit other docs.
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+
df = pd.read_csv('data/all_responses.csv')
+
len(df.columns.tolist()) - 1 #'Respondent ID'
+
215+
def series_counts_to_frame(main_df:object, col_name:object):
+ col_series = main_df[col_name]
+ frame = col_series.value_counts().to_frame()
+ frame = frame.rename(columns={col_name:'count'})
+
+ frame[col_name] = frame.index
+ frame = frame.rename(columns={col_name:'options'})
+ frame = frame.reset_index(drop=True)
+
+ return frame
+
counts_color_kwargs = dict(
+ template='plotly_dark', color='count',
+ color_continuous_scale=px.colors.sequential.Teal[::-1],
+)
+
points_color_kwargs = dict(
+ template='plotly_dark', color='points',
+ color_continuous_scale=px.colors.sequential.Teal[::-1],
+)
+
freq_color_kwargs = dict(
+ template='plotly_dark', color='weighted_freq',
+ color_continuous_scale=px.colors.sequential.Teal[::-1],
+)
+
compare_color_kwargs = dict(
+ color='tool', template='plotly_dark', height=300,
+ color_discrete_sequence=['lightblue','salmon']
+)
+
q1_name = '1. How frequently do you use Jupyter?'
+
q1 = series_counts_to_frame(main_df=df, col_name=q1_name)
+q1 = q1.iloc[::-1]
+
px.bar(q1, x='count', y='options', title=q1_name, height=350, **counts_color_kwargs)
+
q2_name = '2. How long have you been using Jupyter?'
+
q2 = series_counts_to_frame(main_df=df, col_name=q2_name)
+q2 = q2.reindex([4, 2, 3, 1, 0])
+
px.bar(q2, x='count', y='options', title=q2_name, height=350, **counts_color_kwargs)
+
q3_name = '3. What languages do you use in Jupyter? (pick up to 4)'
+
cols_3 = [col for col in df.columns.tolist() if col.startswith('3.')]
+
counts_3_records = []
+
for col in cols_3:
+ count = df[col].count()
+ col = col.split("3. ", 1).pop()
+ record = {"language":col, "count":count}
+ counts_3_records.append(record)
+
counts_3_df = pd.DataFrame.from_records(counts_3_records).sort_values('count')
+
px.bar(counts_3_df, x='count', y='language', title=q3_name, height=700, **counts_color_kwargs)
+
q4_name = '4. What are your primary job roles when you are using Jupyter? (pick up to 2)'
+
cols_4 = [col for col in df.columns.tolist() if col.startswith('4.')]
+
counts_4_records = []
+
for col in cols_4:
+ count = df[col].count()
+ col = col.split("4. ", 1).pop()
+ record = {"role":col, "count":count}
+ counts_4_records.append(record)
+
counts_4_df = pd.DataFrame.from_records(counts_4_records).sort_values('count')
+
px.bar(counts_4_df, x='count', y='role', title=q4_name, height=600, **counts_color_kwargs)
+
q5_name = "5. What are your go-to tools for performing data science, scientific computing, and machine learning on your laptop/ desktop (non-cloud) for data science? (pick up to 3)"
+
cols_5 = [col for col in df.columns.tolist() if col.startswith('5.')]
+
counts_5_records = []
+
for col in cols_5:
+ count = df[col].count()
+ col = col.split("5. ", 1).pop()
+ record = {"ide":col, "count":count}
+ counts_5_records.append(record)
+
counts_5_df = pd.DataFrame.from_records(counts_5_records).sort_values('count')
+
px.bar(counts_5_df, x='count', y='ide', title=q5_name, height=600, **counts_color_kwargs)
+
q6_name = '6. How do you run and/ or access Jupyter? (pick up to 4)'
+
cols_6 = [col for col in df.columns.tolist() if col.startswith('6.')]
+
counts_6_records = []
+
for col in cols_6:
+ count = df[col].count()
+ col = col.split("6. ", 1).pop()
+ record = {"env":col, "count":count}
+ counts_6_records.append(record)
+
counts_6_df = pd.DataFrame.from_records(counts_6_records).sort_values('count')
+
px.bar(counts_6_df, x='count', y='env', title=q6_name, height=700, **counts_color_kwargs)
+
def merge_expectations(df2, df3):
+ jupyter = ['jupyter', 'jupyter', 'jupyter', 'jupyter']
+ alternative = ['alternative', 'alternative', 'alternative', 'alternative']
+
+ df2['tool'] = jupyter
+ df3['tool'] = alternative
+
+ merged_df = df2.append(df3, ignore_index=True)
+ return merged_df
+
q7a1_name = '7a1. Writing a software package. - How frequently do you\xa0perform this task?'
+
q7a1 = series_counts_to_frame(main_df=df, col_name=q7a1_name)
+q7a1 = q7a1.reindex([0, 1, 3, 2, 4])
+
px.bar(q7a1, x='count', y='options', title=q7a1_name, height=300, **counts_color_kwargs)
+
q7a2_name = '7a2. Writing a software package. - Has Jupyter met your expectations for this use case?'
+
q7a3_name = '7a3. Writing a software package. - Have alternative tools met your expectations for this use case?'
+
q7a2 = series_counts_to_frame(main_df=df, col_name=q7a2_name)
+q7a2 = q7a2.reindex([0, 1, 2, 3])
+
q7a3 = series_counts_to_frame(main_df=df, col_name=q7a3_name)
+q7a3 = q7a3.reindex([1,3,2,0])
+
q7a_merged = merge_expectations(q7a2, q7a3)
+
q7a_name = '7a. Writing a software package. Meets expectations.'
+
px.bar(q7a_merged, x='count', y='options', title=q7a_name, **compare_color_kwargs)
+
q7b1_name = '7b1. Cleaning and preparing data. - How frequently do you perform this task?'
+
q7b1 = series_counts_to_frame(main_df=df, col_name=q7b1_name)
+q7b1 = q7b1.reindex([4,3,2,0,1])
+
px.bar(q7b1, x='count', y='options', title=q7b1_name, height=300, **counts_color_kwargs)
+
q7b2_name = '7b2. Cleaning and preparing data. - Has Jupyter met your expectations for this use case?'
+
q7b2 = series_counts_to_frame(main_df=df, col_name=q7b2_name)
+q7b2 = q7b2.reindex([2, 3, 1, 0])
+
q7b3_name = '7b3. Cleaning and preparing data. - Have alternative tools met your expectations for this use case?'
+
q7b3 = series_counts_to_frame(main_df=df, col_name=q7b3_name)
+q7b3 = q7b3.reindex([3, 2, 1, 0])
+
q7b_merged = merge_expectations(q7b2, q7b3)
+
q7b_name = '7b. Cleaning & preparing data. Meets expectations.'
+
px.bar(q7b_merged, x='count', y='options', title=q7b_name, **compare_color_kwargs)
+
q7c1_name = '7c1. Writing and running tests for software. - How frequently do you\xa0perform this task?'
+
q7c1 = series_counts_to_frame(main_df=df, col_name=q7c1_name)
+q7c1 = q7c1.reindex([0, 1, 3, 2, 4])
+
px.bar(q7c1, x='count', y='options', title=q7c1_name, height=300, **counts_color_kwargs)
+
q7c2_name = '7c2. Writing and running tests for software. - Has Jupyter met your expectations for this use case?'
+
q7c2 = series_counts_to_frame(main_df=df, col_name=q7c2_name)
+q7c2 = q7c2.reindex([0, 1, 3, 2])
+
q7c3_name = '7c3. Writing and running tests for software. - Have\xa0alternative tools met your expectations for this use case?'
+
q7c3 = series_counts_to_frame(main_df=df, col_name=q7c3_name)
+q7c3 = q7c3.reindex([1, 3, 2, 0])
+
q7c_name = '7c. Writing and running tests for software. Meets expectations.'
+
q7c_merged = merge_expectations(q7c2, q7c3)
+
px.bar(q7c_merged, x='count', y='options', title=q7c_name, **compare_color_kwargs)
+
q7d1_name = '7d1. Building a machine learning or statistical model. - How frequently do you\xa0perform this task?'
+
q7d1 = series_counts_to_frame(main_df=df, col_name=q7d1_name)
+q7d1 = q7d1.reindex([3, 1, 2, 0, 4])
+
px.bar(q7d1, x='count', y='options', title=q7d1_name, height=300, **counts_color_kwargs)
+
q7d2_name = '7d2. Building a machine learning or statistical model. - Has Jupyter met your expectations for this use case?'
+
q7d2 = series_counts_to_frame(main_df=df, col_name=q7d2_name)
+q7d2 = q7d2.reindex([2,3,1,0])
+
q7d3_name = '7d3. Building a machine learning or statistical model. - Have\xa0alternative tools met your expectations for this use case?'
+
q7d3 = series_counts_to_frame(main_df=df, col_name=q7d3_name)
+q7d3 = q7d3.reindex([2,3,1,0])
+
q7d_name = '7d. Building a machine learning or statistical model. Meets expectations.'
+
q7d_merged = merge_expectations(q7d2, q7d3)
+
px.bar(q7d_merged, x='count', y='options', title=q7d_name, **compare_color_kwargs)
+
q7e1_name = '7e1. Visualize data in charts, plots, or dashboards. - How frequently do you\xa0perform this task?'
+
q7e1 = series_counts_to_frame(main_df=df, col_name=q7e1_name)
+q7e1 = q7e1.reindex([4, 3, 2, 0, 1])
+
px.bar(q7e1, x='count', y='options', title=q7e1_name, height=300, **counts_color_kwargs)
+
q7e2_name ='7e2. Visualize data in charts, plots, or dashboards. - Has Jupyter met your expectations for this use case?'
+
q7e2 = series_counts_to_frame(main_df=df, col_name=q7e2_name)
+q7e2 = q7e2.reindex([2,3,1,0])
+
q7e3_name = '7e3. Visualize data in charts, plots, or dashboards. - Have\xa0alternative tools met your expectations for this use case?'
+
q7e3 = series_counts_to_frame(main_df=df, col_name=q7e3_name)
+q7e3 = q7e3.reindex([3, 2, 1, 0])
+
q7e_name = '7e. Visualize data in charts, plots, or dashboards. Meets expectations.'
+
q7e_merged = merge_expectations(q7e2, q7e3)
+
px.bar(q7e_merged, x='count', y='options', title=q7e_name, **compare_color_kwargs)
+
q7f1_name = '7f1. Creating content (e.g. blogs, books, education materials). - How frequently do you\xa0perform this task?'
+
q7f1 = series_counts_to_frame(main_df=df, col_name=q7f1_name)
+
px.bar(q7f1, x='count', y='options', title=q7f1_name, height=300, **counts_color_kwargs)
+
q7f2_name = '7f2. Creating content (e.g. blogs, books, education materials). - Has Jupyter met your expectations for this use case?'
+
q7f2 = series_counts_to_frame(main_df=df, col_name=q7f2_name)
+q7f2 = q7f2.reindex([1,3,2,0])
+
q7f3_name = '7f3. Creating content (e.g. blogs, books, education materials). - Have\xa0alternative tools met your expectations for this use case?'
+
q7f3 = series_counts_to_frame(main_df=df, col_name=q7f3_name)
+q7f3 = q7f3.reindex([0,3,2,1])
+
q7f_name = '7f. Creating content (e.g. blogs, books, education materials). Meets expectations.'
+
q7f_merged = merge_expectations(q7f2, q7f3)
+
px.bar(q7f_merged, x='count', y='options', title=q7f_name, **compare_color_kwargs)
+
q7g1_name = '7g1. Documenting research (e.g. reports, scientific papers). - How frequently do you\xa0perform this task?'
+
q7g1 = series_counts_to_frame(main_df=df, col_name=q7g1_name)
+q7g1 = q7g1.reindex([2, 0, 3, 1, 4])
+
px.bar(q7g1, x='count', y='options', title=q7g1_name, height=300, **counts_color_kwargs)
+
q7g2_name = '7g2. Documenting research (e.g. reports, scientific papers). - Has Jupyter met your expectations for this use case?'
+
q7g2 = series_counts_to_frame(main_df=df, col_name=q7g2_name)
+q7g2 = q7g2.reindex([1,3,2,0])
+
q7g3_name = '7g3. Documenting research (e.g. reports, scientific papers). - Have\xa0alternative tools met your expectations for this use case?'
+
q7g3 = series_counts_to_frame(main_df=df, col_name=q7g3_name)
+q7g3 = q7g3.reindex([2, 3, 1, 0])
+
q7g_name = '7g. Documenting research (e.g. reports, scientific papers). Meets expectations.'
+
q7g_merged = merge_expectations(q7g2, q7g3)
+
px.bar(q7g_merged, x='count', y='options', title=q7g_name, **compare_color_kwargs)
+
q7h1_name = '7h1. Run pipelines, workflows, or ETL (extract, transform, load) jobs. - How frequently do you\xa0perform this task?'
+
q7h1 = series_counts_to_frame(main_df=df, col_name=q7h1_name)
+q7h1 = q7h1.reindex([0, 2, 3, 1, 4])
+
px.bar(q7h1, x='count', y='options', title=q7h1_name, height=300, **counts_color_kwargs)
+
q7h2_name = '7h2. Run pipelines, workflows, or ETL (extract, transform, load) jobs. - Has Jupyter met your expectations for this use case?'
+
q7h2 = series_counts_to_frame(main_df=df, col_name=q7h2_name)
+q7h2 = q7h2.reindex([0, 3, 2, 1])
+
q7h3_name = '7h3. Run pipelines, workflows, or ETL (extract, transform, load) jobs. - Have\xa0alternative tools met your expectations for this use case?'
+
q7h3 = series_counts_to_frame(main_df=df, col_name=q7h3_name)
+q7h3 = q7h3.reindex([1,3,2,0])
+
q7h_name = '7h. Run pipelines, workflows, or ETL (extract, transform, load) jobs. Meets expectations.'
+
q7h_merged = merge_expectations(q7h2, q7h3)
+
px.bar(q7h_merged, x='count', y='options', title=q7h_name, **compare_color_kwargs)
+
q7i1_name = '7i1. Writing software documentation. - How frequently do you\xa0perform this task?'
+
q7i1 = series_counts_to_frame(main_df=df, col_name=q7i1_name)
+
px.bar(q7i1, x='count', y='options', title=q7i1_name, height=300, **counts_color_kwargs)
+
q7i2_name = '7i2. Writing software documentation. - Has Jupyter met your expectations for this use case?'
+
q7i2 = series_counts_to_frame(main_df=df, col_name=q7i2_name)
+q7i2 = q7i2.reindex([0, 2,3,1])
+
q7i3_name = '7i3. Writing software documentation. - Have\xa0alternative tools met your expectations for this use case?'
+
q7i3 = series_counts_to_frame(main_df=df, col_name=q7i3_name)
+q7i3 = q7i3.reindex([1, 3, 2, 0])
+
q7i_name = '7i. Writing software documentation. Meets expectations.'
+
q7i_merged = merge_expectations(q7i2, q7i3)
+
px.bar(q7i_merged, x='count', y='options', title=q7i_name, **compare_color_kwargs)
+
q7j1_name = '7j1. Finding extensions/ plugins to solve my problems. - How frequently do you\xa0perform this task?'
+
q7j1 = series_counts_to_frame(main_df=df, col_name=q7j1_name)
+q7j1 = q7j1.reindex([1,0,2,3,4])
+
px.bar(q7j1, x='count', y='options', title=q7j1_name, height=300, **counts_color_kwargs)
+
q7j2_name = '7j2. Finding extensions/ plugins to solve my problems. - Has Jupyter met your expectations for this use case?'
+
q7j2 = series_counts_to_frame(main_df=df, col_name=q7j2_name)
+q7j2 = q7j2.reindex([0, 3, 1, 2])
+
q7j3_name = '7j3. Finding extensions/ plugins to solve my problems. - Have\xa0alternative tools met your expectations for this use case?'
+
q7j3 = series_counts_to_frame(main_df=df, col_name=q7j3_name)
+q7j3 = q7j3.reindex([0, 3,2,1])
+
q7j_name = '7j. Finding extensions/ plugins to solve my problems. Meets expectations.'
+
q7j_merged = merge_expectations(q7j2, q7j3)
+
px.bar(q7j_merged, x='count', y='options', title=q7j_name, **compare_color_kwargs)
+
q7k1_name ='7k1. Developing extensions/ plugins to solve my problems. - How frequently do you\xa0perform this task?'
+
q7k1 = series_counts_to_frame(main_df=df, col_name=q7k1_name)
+
px.bar(q7k1, x='count', y='options', title=q7k1_name, height=300, **counts_color_kwargs)
+
q7k2_name ='7k2. Developing extensions/ plugins to solve my problems. - Has Jupyter met your expectations for this use case?'
+
q7k2 = series_counts_to_frame(main_df=df, col_name=q7k2_name)
+q7k2 = q7k2.reindex([0,3,1,2])
+
q7k3_name = '7k3. Developing extensions/ plugins to solve my problems. - Have\xa0alternative tools met your expectations for this use case?'
+
q7k3 = series_counts_to_frame(main_df=df, col_name=q7k3_name)
+q7k3 = q7k3.reindex([0, 3,2,1])
+
q7k_name = '7k. Developing extensions/ plugins to solve my problems. Meets expectations.'
+
q7k_merged = merge_expectations(q7k2, q7k3)
+
px.bar(q7k_merged, x='count', y='options', title=q7k_name, **compare_color_kwargs)
+
q8_name = "8. What data sources are you primarily working with in your role? (pick up to 3)"
+
cols_8 = [col for col in df.columns.tolist() if col.startswith('8.')]
+
counts_8_records = []
+
for col in cols_8:
+ count = df[col].count()
+ col = col.split("8. ", 1).pop()
+ record = {"datastore":col, "count":count}
+ counts_8_records.append(record)
+
counts_8_df = pd.DataFrame.from_records(counts_8_records).sort_values('count')
+
px.bar(counts_8_df, x='count', y='datastore', title=q8_name, height=700, **counts_color_kwargs)
+
q9_name = '9. What data formats are you mostly working with? (pick up to 3)'
+
cols_9 = [col for col in df.columns.tolist() if col.startswith('9.')]
+
counts_9_records = []
+
for col in cols_9:
+ count = df[col].count()
+ col = col.split("9. ", 1).pop()
+ record = {"data_format":col, "count":count}
+ counts_9_records.append(record)
+
counts_9_df = pd.DataFrame.from_records(counts_9_records).sort_values('count')
+
px.bar(counts_9_df, x='count', y='data_format', title=q9_name, height=700, **counts_color_kwargs)
+
q10a_name = '10a. Data is too big to fit into memory on my machine/ server.'
+
q10a = series_counts_to_frame(main_df=df, col_name=q10a_name)
+q10a = q10a.reindex([5, 0, 4, 1, 2, 3])
+
px.bar(q10a, x='count', y='options', title=q10a_name, height=350, **counts_color_kwargs)
+
q10b_name = '10b. Lost data\xa0during failure or restart of kernel/ server.'
+
q10b = series_counts_to_frame(main_df=df, col_name=q10b_name)
+q10b = q10a.reindex([5, 0, 4, 1, 2, 3])
+
px.bar(q10b, x='count', y='options', title=q10b_name, height=350, **counts_color_kwargs)
+
q10c_name = '10c. Can’t see a list of my current variables.'
+
q10c = series_counts_to_frame(main_df=df, col_name=q10c_name)
+q10c = q10c.reindex([5, 0, 3, 1, 2, 4])
+
px.bar(q10c, x='count', y='options', title=q10c_name, height=350, **counts_color_kwargs)
+
q10d_name = '10d. No grid\xa0view for\xa0manipulating/ filtering\xa0dataframes and arrays.'
+
q10d = series_counts_to_frame(main_df=df, col_name=q10d_name)
+q10d = q10d.reindex([4, 0, 3, 1, 2, 5])
+
px.bar(q10d, x='count', y='options', title=q10d_name, height=350, **counts_color_kwargs)
+
q10e_name = '10e. Poor\xa0MVC/ ORM integrations (e.g. Django, Flask).'
+
q10e = series_counts_to_frame(main_df=df, col_name=q10e_name)
+q10e = q10e.reindex([1, 0, 3, 2, 4, 5])
+
px.bar(q10e, x='count', y='options', title=q10e_name, height=350, **counts_color_kwargs)
+
q10f_name = '10f. Plaintext\xa0or environment variable management\xa0of database passwords/ keys/ secrets.'
+
q10f = series_counts_to_frame(main_df=df, col_name=q10f_name)
+q10f = q10f.reindex([2, 0, 3, 1, 4, 5])
+
px.bar(q10f, x='count', y='options', title=q10f_name, height=350, **counts_color_kwargs)
+
q11_name = '11. What type of analysis are you running? (pick up to 4)'
+
cols_11 = [col for col in df.columns.tolist() if col.startswith('11.')]
+
counts_11_records = []
+
for col in cols_11:
+ count = df[col].count()
+ col = col.split("11. ", 1).pop()
+ record = {"analysis":col, "count":count}
+ counts_11_records.append(record)
+
counts_11_df = pd.DataFrame.from_records(counts_11_records).sort_values('count')
+
px.bar(counts_11_df, x='count', y='analysis', title=q11_name, height=550, **counts_color_kwargs)
+
q12_name = '12. What tools do you use to create dashboards? (pick up to 3)'
+
cols_12 = [col for col in df.columns.tolist() if col.startswith('12.')]
+
counts_12_records = []
+
for col in cols_12:
+ count = df[col].count()
+ col = col.split("12. ", 1).pop()
+ record = {"dashboard":col, "count":count}
+ counts_12_records.append(record)
+
counts_12_df = pd.DataFrame.from_records(counts_12_records).sort_values('count')
+
px.bar(counts_12_df, x='count', y='dashboard', title=q12_name, height=550, **counts_color_kwargs)
+
q13a_name = '13a. No built-in UI for creating charts.'
+
q13a = series_counts_to_frame(main_df=df, col_name=q13a_name)
+q13a = q10e.reindex([3, 0, 2, 1, 4, 5])
+
px.bar(q13a, x='count', y='options', title=q13a_name, height=350, **counts_color_kwargs)
+
q13b_name = "13b. Can't publish my charts as web-based dashboards."
+
q13b = series_counts_to_frame(main_df=df, col_name=q13b_name)
+q13b = q13b.reindex([3, 0, 4, 1, 2, 5])
+q13b
+
+ | count | +options | +
---|---|---|
3 | +169 | +N/A - skip, don't know. | +
0 | +402 | +(0) Not a problem for me. | +
4 | +87 | +(1) Trivial. | +
1 | +222 | +(2) Minor. | +
2 | +174 | +(3) Major. | +
5 | +69 | +(4) Critical. | +
px.bar(q13b, x='count', y='options', title=q13b_name, height=350, **counts_color_kwargs)
+
q13c_name = '13c. Poor/ buggy support for my plotting tool.'
+
q13c = series_counts_to_frame(main_df=df, col_name=q13c_name)
+q13c = q13c.reindex([1, 0, 3, 2, 4, 5])
+
px.bar(q13c, x='count', y='options', title=q13c_name, height=350, **counts_color_kwargs)
+
q13d_name = '13d. Difficulty displaying\xa0highly dimensional data (e.g. array of array of arrays, too many rows/ columns to fit on screen).'
+
q13d = series_counts_to_frame(main_df=df, col_name=q13d_name)
+q13d = q13d.reindex([3, 1, 4, 0, 2, 5])
+
px.bar(q13d, x='count', y='options', title=q13d_name, height=350, **counts_color_kwargs)
+
q13e_name = '13e. Lacking\xa0templating support (e.g. Jinja2).'
+
q13e = series_counts_to_frame(main_df=df, col_name=q13e_name)
+q13e = q13e.reindex([1,0,3,2,4,5])
+
px.bar(q13e, x='count', y='options', title=q13e_name, height=350, **counts_color_kwargs)
+
q14_name = '14. How do you scale and schedule your workloads? (pick up to 4)'
+
cols_14 = [col for col in df.columns.tolist() if col.startswith('14.')]
+
counts_14_records = []
+
for col in cols_14:
+ count = df[col].count()
+ col = col.split("14. ", 1).pop()
+ record = {"env":col, "count":count}
+ counts_14_records.append(record)
+
counts_14_df = pd.DataFrame.from_records(counts_14_records).sort_values('count')
+
px.bar(counts_14_df, x='count', y='env', title=q14_name, height=800, **counts_color_kwargs)
+
q15a_name = '15a. Figuring out how to schedule batch execution of notebook-based jobs.'
+
q15a = series_counts_to_frame(main_df=df, col_name=q15a_name)
+q15a = q15a.reindex([1, 0, 4, 2, 3, 5])
+
px.bar(q15a, x='count', y='options', title=q15a_name, height=350, **counts_color_kwargs)
+
q15b_name = '15b. Don’t have the budget for more scalable environment/ cloud services.'
+
q15b = series_counts_to_frame(main_df=df, col_name=q15b_name)
+q15b = q15b.reindex([1, 0, 4, 2, 3, 5])
+
px.bar(q15b, x='count', y='options', title=q15b_name, height=350, **counts_color_kwargs)
+
q15c_name = '15c. Haven’t divided longer notebooks into multiple, modular notebooks.'
+
q15c = series_counts_to_frame(main_df=df, col_name=q15c_name)
+q15c = q15c.reindex([1, 0, 4, 2, 3, 5])
+
px.bar(q15c, x='count', y='options', title=q15c_name, height=350, **counts_color_kwargs)
+
q15d_name = '15d. Not persisting the outputs of a notebook.'
+
q15d = series_counts_to_frame(main_df=df, col_name=q15d_name)
+q15d = q15d.reindex([1, 0, 4, 2, 3, 5])
+
px.bar(q15d, x='count', y='options', title=q15d_name, height=350, **counts_color_kwargs)
+
q15e_name = '15e. Machine learning training jobs take too long.'
+
q15e = series_counts_to_frame(main_df=df, col_name=q15e_name)
+q15e = q15e.reindex([1, 0, 4, 2, 3, 5])
+
px.bar(q15e, x='count', y='options', title=q15e_name, height=350, **counts_color_kwargs)
+
q15f_name = "15f. Can't call code/ modules from other notebooks."
+
q15f = series_counts_to_frame(main_df=df, col_name=q15f_name)
+q15f = q15f.reindex([1, 0, 4, 2, 3, 5])
+
px.bar(q15f, x='count', y='options', title=q15f_name, height=350, **counts_color_kwargs)
+
q15g_name = '15g. Difficulty managing\xa0Spark dependencies (Java).'
+
q15g = series_counts_to_frame(main_df=df, col_name=q15g_name)
+q15g = q15g.reindex([0, 1, 3, 2, 4, 5])
+
px.bar(q15g, x='count', y='options', title=q15g_name, height=350, **counts_color_kwargs)
+
q17_name = '17. What is your reason for sharing a notebook with someone else? (pick up to 3)'
+
cols_17 = [col for col in df.columns.tolist() if col.startswith('17.')]
+
counts_17_records = []
+
for col in cols_17:
+ count = df[col].count()
+ col = col.split("17. ", 1).pop()
+ record = {"reason":col, "count":count}
+ counts_17_records.append(record)
+
counts_17_df = pd.DataFrame.from_records(counts_17_records).sort_values('count')
+
px.bar(counts_17_df, x='count', y='reason', title=q17_name, height=500, **counts_color_kwargs)
+
q18a_name = '18a. How long have you been working together?'
+
q18a = series_counts_to_frame(main_df=df, col_name=q18a_name)
+q18a = q18a.reindex([1, 3, 4, 2, 0])
+
px.bar(q18a, x='count', y='options', title=q18a_name, height=350, **counts_color_kwargs)
+
q18b_name = '18b. How frequently do you work together?'
+
q18b = series_counts_to_frame(main_df=df, col_name=q18b_name)
+q18b = q18b.reindex([1, 4, 5, 3, 0, 2])
+
px.bar(q18b, x='count', y='options', title=q18b_name, height=375, **counts_color_kwargs)
+
q18c_name = '18c. How do you divide the work?'
+
q18c = series_counts_to_frame(main_df=df, col_name=q18c_name)
+q18c = q18c.reindex([1, 2, 0, 3])
+
px.bar(q18c, x='count', y='options', title=q18c_name, height=250, **counts_color_kwargs)
+
q19a_name = "19a. Don't know what\xa0dependencies (versions of language, packages, extensions)\xa0a notebook uses."
+
q19a = series_counts_to_frame(main_df=df, col_name=q19a_name)
+q19a = q19a.reindex([3,0,4,1,2,5])
+
px.bar(q19a, x='count', y='options', title=q19a_name, height=350, **counts_color_kwargs)
+
q19b_name = "19b. Don't know/ have the data a notebook is supposed to use."
+
q19b = series_counts_to_frame(main_df=df, col_name=q19b_name)
+q19b = q19b.reindex([2,0,3,1,4,5])
+
px.bar(q19b, x='count', y='options', title=q19b_name, height=350, **counts_color_kwargs)
+
q19c_name = '19c. Poor\xa0support for\xa0our version control (git) system.'
+
q19c = series_counts_to_frame(main_df=df, col_name=q19c_name)
+q19c = q19c.reindex([3, 2, 5, 4, 1, 0])
+
px.bar(q19c, x='count', y='options', title=q19c_name, height=350, **counts_color_kwargs)
+
q19d_name = '19d. No built-in\xa0way\xa0to publish my notebook to a shared location.'
+
q19d = series_counts_to_frame(main_df=df, col_name=q19d_name)
+q19d = q19d.reindex([2, 0, 4, 1, 3, 5])
+
px.bar(q19d, x='count', y='options', title=q19d_name, height=350, **counts_color_kwargs)
+
q19e_name = '19e. Not being able to comment on notebooks.'
+
q19e = series_counts_to_frame(main_df=df, col_name=q19e_name)
+q19e = q19e.reindex([3, 0, 4, 1, 2, 5])
+
px.bar(q19e, x='count', y='options', title=q19e_name, height=350, **counts_color_kwargs)
+
q19f_name = '19f. No "track changes;" can\'t figure out what changed between notebook checkpoints/ versions.'
+
q19f = series_counts_to_frame(main_df=df, col_name=q19f_name)
+q19f = q19f.reindex([4, 3, 5, 2, 0, 1])
+
px.bar(q19f, x='count', y='options', title=q19f_name, height=350, **counts_color_kwargs)
+
q20a_name = '20a. Poor autocompletion (e.g. LSP, show methods/ attributes).'
+
q20a = series_counts_to_frame(main_df=df, col_name=q20a_name)
+q20a = q20a.reindex([5, 2, 4, 0, 1, 3])
+
px.bar(q20a, x='count', y='options', title=q20a_name, height=350, **counts_color_kwargs)
+
q20b_name = '20b. No native desktop app.'
+
q20b = series_counts_to_frame(main_df=df, col_name=q20b_name)
+q20b = q20b.reindex([4, 0, 1, 3, 2, 5])
+
px.bar(q20b, x='count', y='options', title=q20b_name, height=350, **counts_color_kwargs)
+
q20c_name = "20c. Can't collapse sections of a notebook hierarchically."
+
q20c = series_counts_to_frame(main_df=df, col_name=q20c_name)
+q20c = q20c.reindex([4, 1, 2, 0, 3, 5])
+
px.bar(q20c, x='count', y='options', title=q20c_name, height=350, **counts_color_kwargs)
+
q20d_name = "20d. Can't see hidden `.` files in file browser."
+
q20d = series_counts_to_frame(main_df=df, col_name=q20d_name)
+q20d = q20d.reindex([3,0,2,1,4,5])
+
px.bar(q20d, x='count', y='options', title=q20d_name, height=350, **counts_color_kwargs)
+
q20e_name = "20e. Don't know which cell failed in long notebook."
+
q20e = series_counts_to_frame(main_df=df, col_name=q20e_name)
+q20e = q20e.reindex([4,0,2,1,3,5])
+
px.bar(q20e, x='count', y='options', title=q20e_name, height=350, **counts_color_kwargs)
+
q20f_name = "20f. No progress bar for running long notebooks."
+
q20f = series_counts_to_frame(main_df=df, col_name=q20f_name)
+q20f = q20f.reindex([5,1,3,0,2,4])
+
px.bar(q20f, x='count', y='options', title=q20f_name, height=350, **counts_color_kwargs)
+
q20g_name = '20g. No global search.'
+
q20g = series_counts_to_frame(main_df=df, col_name=q20g_name)
+q20g = q20g.reindex([4, 0, 3, 1, 2, 5])
+
px.bar(q20g, x='count', y='options', title=q20g_name, height=350, **counts_color_kwargs)
+
q20h_name = '20h. No modes for editing other Jupyter\xa0documents (e.g. MyST, Jupyter Book).'
+
q20h = series_counts_to_frame(main_df=df, col_name=q20h_name)
+q20h = q20h.reindex([1, 0, 3, 2, 4, 5])
+
px.bar(q20h, x='count', y='options', title=q20h_name, height=350, **counts_color_kwargs)
+
q20i_name = '20i. No marketplace for Extensions (e.g. 5 star ratings, browsable categories).'
+
q20i = series_counts_to_frame(main_df=df, col_name=q20i_name)
+q20i = q20i.reindex([4,0,3,1,2,5])
+
px.bar(q20i, x='count', y='options', title=q20i_name, height=350, **counts_color_kwargs)
+
df.columns.to_list()
+
['Respondent ID', + '1. How frequently do you use Jupyter?', + '2. How long have you been using Jupyter?', + '3. Python.', + '3. R.', + '3. Spark SQL.', + '3. SQL.', + '3. Java.', + '3. Scala.', + '3. C (and derivatives).', + '3. JavaScript.', + '3. NodeJS.', + '3. TypeScript.', + '3. PHP.', + '3. Ruby.', + '3. Go.', + '3. Rust.', + '3. Groovy.', + '3. Perl.', + '3. Julia.', + '3. I wrap/ use bindings for other languages.', + '3. My preferred language is not supported in Jupyter.', + '4. Data engineer.', + '4. Data scientist.', + '4. Scientist/ researcher.', + '4. Teacher/ lecturer.', + '4. Tutor/\xa0teaching assistant.', + '4. Financial modeler/ analyst.', + '4. Business analyst.', + '4. Backend engineer.', + '4. Front end/ web development.', + '4. DevOps.', + '4. Database Admin (DBA).', + '4. Infrastructure engineer/ cloud architect.', + '4. Sysadmin.', + '4. Student.', + '5. JupyterLab.', + '5. Jupyter Notebook - Classic.', + '5. PyCharm.', + '5. Spyder.', + '5. RStudio.', + '5. nteract.', + '5. VS Code.', + '5. Zeppelin.', + '5. Sublime Text.', + '5. Atom.', + '5. Emacs.', + '5. Vim.', + '5. Ipython.', + '6. Run directly on local machine (e.g. laptop, desktop).', + '6. Through a Python virtual environment (e.g. conda, virtualenv).', + '6. Through Docker.', + '6. HPC or on-premise server.', + '6. Cloud server (e.g. AWS EC2).', + '6. JupyterHub.', + '6. BinderHub / MyBinder.', + '6. Cloud service - AWS (e.g. EMR, SageMaker).', + '6. Cloud service - Azure (e.g. Notebooks, ML Studio).', + '6. Cloud service - Databricks.', + '6. Cloud service - Google (e.g. AI Platform, Dataproc).', + '6. Cloud service - IBM (e.g. Watson Studio).', + '6. Google Colab.', + '6. CoCalc.', + '6. Mobile device (e.g. phone, tablet). Comments welcome.', + '6. Don’t know how, I just go to a URL.', + '7a1. Writing a software package. - How frequently do you\xa0perform this task?', + '7a2. Writing a software package. - Has Jupyter met your expectations for this use case?', + '7a3. Writing a software package. - Have\xa0alternative tools met your expectations for this use case?', + '7b1. Cleaning and preparing data. - How frequently do you\xa0perform this task?', + '7b2. Cleaning and preparing data. - Has Jupyter met your expectations for this use case?', + '7b3. Cleaning and preparing data. - Have\xa0alternative tools met your expectations for this use case?', + '7c1. Writing and running tests for software. - How frequently do you\xa0perform this task?', + '7c2. Writing and running tests for software. - Has Jupyter met your expectations for this use case?', + '7c3. Writing and running tests for software. - Have\xa0alternative tools met your expectations for this use case?', + '7d1. Building a machine learning or statistical model. - How frequently do you\xa0perform this task?', + '7d2. Building a machine learning or statistical model. - Has Jupyter met your expectations for this use case?', + '7d3. Building a machine learning or statistical model. - Have\xa0alternative tools met your expectations for this use case?', + '7e1. Visualize data in charts, plots, or dashboards. - How frequently do you\xa0perform this task?', + '7e2. Visualize data in charts, plots, or dashboards. - Has Jupyter met your expectations for this use case?', + '7e3. Visualize data in charts, plots, or dashboards. - Have\xa0alternative tools met your expectations for this use case?', + '7f1. Creating content (e.g. blogs, books, education materials). - How frequently do you\xa0perform this task?', + '7f2. Creating content (e.g. blogs, books, education materials). - Has Jupyter met your expectations for this use case?', + '7f3. Creating content (e.g. blogs, books, education materials). - Have\xa0alternative tools met your expectations for this use case?', + '7g1. Documenting research (e.g. reports, scientific papers). - How frequently do you\xa0perform this task?', + '7g2. Documenting research (e.g. reports, scientific papers). - Has Jupyter met your expectations for this use case?', + '7g3. Documenting research (e.g. reports, scientific papers). - Have\xa0alternative tools met your expectations for this use case?', + '7h1. Run pipelines, workflows, or ETL (extract, transform, load) jobs. - How frequently do you\xa0perform this task?', + '7h2. Run pipelines, workflows, or ETL (extract, transform, load) jobs. - Has Jupyter met your expectations for this use case?', + '7h3. Run pipelines, workflows, or ETL (extract, transform, load) jobs. - Have\xa0alternative tools met your expectations for this use case?', + '7i1. Writing software documentation. - How frequently do you\xa0perform this task?', + '7i2. Writing software documentation. - Has Jupyter met your expectations for this use case?', + '7i3. Writing software documentation. - Have\xa0alternative tools met your expectations for this use case?', + '7j1. Finding extensions/ plugins to solve my problems. - How frequently do you\xa0perform this task?', + '7j2. Finding extensions/ plugins to solve my problems. - Has Jupyter met your expectations for this use case?', + '7j3. Finding extensions/ plugins to solve my problems. - Have\xa0alternative tools met your expectations for this use case?', + '7k1. Developing extensions/ plugins to solve my problems. - How frequently do you\xa0perform this task?', + '7k2. Developing extensions/ plugins to solve my problems. - Has Jupyter met your expectations for this use case?', + '7k3. Developing extensions/ plugins to solve my problems. - Have\xa0alternative tools met your expectations for this use case?', + '8. My local file system (e.g. files and folder on local machine).', + '8. File system (e.g. HPC, EBS/EFS, JupyterHub volumes).', + '8. Cloud object storage (e.g. buckets, S3, Blob, GS).', + '8. SQL (e.g. PostgreSQL, MySQL).', + '8. SQL - embedded (e.g. SQLite).', + '8. NoSQL - columnar store (e.g. Parquet, Arrow, HDFS, BigQuery).', + '8. NoSQL - document store (e.g. MongoDB, Elasticsearch, DynamoDB).', + '8. Graph database (e.g. Neo4j, TigerGraph).', + '8. Time Series (e.g. InfluxDB).', + '8. Pub/ sub (e.g. Apache Kafka, Druid).', + '8. Key value (e.g. Redis, MemcacheDB).', + '8. Google Sheets.', + '8. Industry or field specific APIs.', + '8. Streaming.', + '9. Tabular (e.g. csv, spreadsheet,\xa0SQL tables, Parquet).', + '9. Images.', + '9. Tensors (e.g. manually handling PyTorch, Tensorflow inputs).', + '9. Nested (e.g. JSON, NoSQL document).', + '9. Hierarchical Data Format (e.g. HDF5 or similar).', + '9. Time series.', + '9. Text.', + '9. Audio.', + '9. Video.', + '9. 3D/ CAD.', + '9. Graph (e.g. nodes, edges).', + '9. Spatial/ geographic (e.g. coordinates, GIS).', + '9. Game/ reinforcement simulation.', + '9. Industry-specific file formats.', + '10a. Data is too big to fit into memory on my machine/ server.', + '10b. Lost data\xa0during failure or restart of kernel/ server.', + '10c. Can’t see a list of my current variables.', + '10d. No grid\xa0view for\xa0manipulating/ filtering\xa0dataframes and arrays.', + '10e. Poor\xa0MVC/ ORM integrations (e.g. Django, Flask).', + '10f. Plaintext\xa0or environment variable management\xa0of database passwords/ keys/ secrets.', + '11. I am not performing ML/statistical tasks.', + '11. Regression; predict a numeric output.', + '11. Classification; predict a categorical output.', + '11. Generative/ auto-encode; create new data based on existing data.', + '11. Reinforcement learning; actions that maximize a reward.', + '11. Dimensionality reduction (e.g. PCA, K-Nearest Neighbors).', + '11. Feature engineering (e.g. importance, extraction, selection, permutation).', + '11. Natural language processing (NLP).', + '11. Graph data science.', + '11. Outlier detection.', + "12. I don't create dashboards.", + '12. I write my own in HTML & JS.', + '12. R Shiny.', + '12. Kibana.', + '12. Dash-Plotly.', + '12. Voila.', + '12. Tableau.', + '12. Looker.', + '12. Klipfolio.', + '12. Google Data Studio.', + '12. Spotfire.', + '12. Grafana', + '13a. No built-in UI for creating charts.', + "13b. Can't publish my charts as web-based dashboards.", + '13c. Poor/ buggy support for my plotting tool.', + '13d. Difficulty displaying\xa0highly dimensional data (e.g. array of array of arrays, too many rows/ columns to fit on screen).', + '13e. Lacking\xa0templating support (e.g. Jinja2).', + '14. They run just fine on my local machine.', + "14. I need to scale, but don't know how.", + '14. Server - on premise HPC/ data center.', + '14. Server - cloud (e.g. AWS EC2).', + '14. Cloud\xa0ML/ AI (e.g. AWS SageMaker, IBM Wastson Studio).', + '14. Cluster - Spark and/ Hadoop.', + '14. Cluster - Dask.', + '14. Cluster - Kubernetes (or similar e.g. Mesos, Swarm, Slurm).', + '14. Cluster - Jupyter Enterprise Gateway.', + '14. Jupyter BinderHub.', + '14. Quantum (e.g. D-Wave).', + '14. Horovod.', + '14. Kubeflow.', + '14. Snakemake.', + '14. Papermill.', + '14. CWL, Nextflow, and/ or WDL.', + '14. Apache Airflow.', + '14. Prefect.', + '14. Cloud\xa0pipelines (e.g. AWS Batch).', + '14. Cloud queries (e.g. AWS Presto, AWS Athena).', + '15a. Figuring out how to schedule batch execution of notebook-based jobs.', + '15b. Don’t have the budget for more scalable environment/ cloud services.', + '15c. Haven’t divided longer notebooks into multiple, modular notebooks.', + '15d. Not persisting the outputs of a notebook.', + '15e. Machine learning training jobs take too long.', + "15f. Can't call code/ modules from other notebooks.", + '15g. Difficulty managing\xa0Spark dependencies (Java).', + '16. When it comes to working on notebooks in a team setting, with how many other people are you collaborating?', + '17. I am not\xa0working with other people.', + '17. Share knowledge.', + '17. Feedback about my writing.', + '17. Feedback about\xa0my code.', + '17. Formal code review.', + '17. Integrate my code/ data with their downstream or upstream processes.', + '17. Edit/ contribute some of their own code.', + '17. Edit/ contribute some of their own writing.', + '17. Teach/ tutor them.', + '17. Peer programming', + '17. Deploy my code/ model/ pipeline/ dashboard.', + '18a. How long have you been working together?', + '18b. How frequently do you work together?', + '18c. How do you divide the work?', + "19a. Don't know what\xa0dependencies (versions of language, packages, extensions)\xa0a notebook uses.", + "19b. Don't know/ have the data a notebook is supposed to use.", + '19c. Poor\xa0support for\xa0our version control (git) system.', + '19d. No built-in\xa0way\xa0to publish my notebook to a shared location.', + '19e. Not being able to comment on notebooks.', + '19f. No "track changes;" can\'t figure out what changed between notebook checkpoints/ versions.', + '20a. Poor autocompletion (e.g. LSP, show methods/ attributes).', + '20b. No native desktop app.', + "20c. Can't collapse sections of a notebook hierarchically.", + "20d. Can't see hidden `.` files in file browser.", + "20e. Don't know which cell failed in long notebook.", + '20f. No progress bar for running long notebooks.', + '20g. No global search.', + '20h. No modes for editing other Jupyter\xa0documents (e.g. MyST, Jupyter Book).', + '20i. No marketplace for Extensions (e.g. 5 star ratings, browsable categories).']+
weighted_pain_qs = [
+ '10a. Data is too big to fit into memory on my machine/ server.',
+ '10b. Lost data\xa0during failure or restart of kernel/ server.',
+ '10c. Can’t see a list of my current variables.',
+ '10d. No grid\xa0view for\xa0manipulating/ filtering\xa0dataframes and arrays.',
+ '10e. Poor\xa0MVC/ ORM integrations (e.g. Django, Flask).',
+ '10f. Plaintext\xa0or environment variable management\xa0of database passwords/ keys/ secrets.',
+ '13a. No built-in UI for creating charts.',
+ "13b. Can't publish my charts as web-based dashboards.",
+ '13c. Poor/ buggy support for my plotting tool.',
+ '13d. Difficulty displaying\xa0highly dimensional data (e.g. array of array of arrays, too many rows/ columns to fit on screen).',
+ '13e. Lacking\xa0templating support (e.g. Jinja2).',
+ '15a. Figuring out how to schedule batch execution of notebook-based jobs.',
+ '15b. Don’t have the budget for more scalable environment/ cloud services.',
+ '15c. Haven’t divided longer notebooks into multiple, modular notebooks.',
+ '15d. Not persisting the outputs of a notebook.',
+ '15e. Machine learning training jobs take too long.',
+ "15f. Can't call code/ modules from other notebooks.",
+ '15g. Difficulty managing\xa0Spark dependencies (Java).',
+ "19a. Don't know what\xa0dependencies (versions of language, packages, extensions)\xa0a notebook uses.",
+ "19b. Don't know/ have the data a notebook is supposed to use.",
+ '19c. Poor\xa0support for\xa0our version control (git) system.',
+ '19d. No built-in\xa0way\xa0to publish my notebook to a shared location.',
+ '19e. Not being able to comment on notebooks.',
+ '19f. No "track changes;" can\'t figure out what changed between notebook checkpoints/ versions.',
+ '20a. Poor autocompletion (e.g. LSP, show methods/ attributes).',
+ '20b. No native desktop app.',
+ "20c. Can't collapse sections of a notebook hierarchically.",
+ "20d. Can't see hidden `.` files in file browser.",
+ "20e. Don't know which cell failed in long notebook.",
+ '20f. No progress bar for running long notebooks.',
+ '20g. No global search.',
+ '20h. No modes for editing other Jupyter\xa0documents (e.g. MyST, Jupyter Book).',
+ '20i. No marketplace for Extensions (e.g. 5 star ratings, browsable categories).',
+]
+
weighted_points = []
+
for q in weighted_pain_qs:
+ # df of counts in memory for each question.
+ q_df = 'q' + q[:3]
+ q_df = globals()[q_df]
+
+ val_crit = q_df.loc[q_df['options'] == '(4) Critical.']['count'].to_list()[0]
+ val_major = q_df.loc[q_df['options'] == '(3) Major.']['count'].to_list()[0]
+ # some of the characters got messed up.
+ val_minor = q_df.loc[(q_df['options'] == '(2) Minor.') | (q_df['options'] == '(2)\xa0Minor.')]['count'].to_list()[0]
+ val_triv = q_df.loc[(q_df['options'] == '(1) Trivial.') | (q_df['options'] == '(1)\xa0Trivial.')]['count'].to_list()[0]
+
+ val_crit = val_crit * 4
+ val_major = val_major * 3
+ val_minor = val_minor * 2
+ val_triv = val_triv
+
+ point_total = sum([val_crit, val_major, val_minor, val_triv])
+
+ record = {"question":q[:65], "points":point_total}
+ weighted_points.append(record)
+
weighted_df = pd.DataFrame.from_records(weighted_points).sort_values('points')
+
px.bar(weighted_df, x='points', y='question', title='Weighted Pain Points', height=1500, width=900, **points_color_kwargs)
+
freq_qs = [
+ '7a1. Writing a software package.',
+ '7b1. Cleaning and preparing data.',
+ '7c1. Writing and running tests for software.',
+ '7d1. Building a machine learning or statistical model.',
+ '7e1. Visualize data in charts, plots, or dashboards.',
+ '7f1. Creating content (e.g. blogs, books, education materials).',
+ '7g1. Documenting research (e.g. reports, scientific papers).',
+ '7h1. Run pipelines, workflows, or ETL (extract, transform, load) jobs.',
+ '7i1. Writing software documentation.',
+ '7j1. Finding extensions/ plugins to solve my problems.',
+ '7k1. Developing extensions/ plugins to solve my problems.',
+]
+
rated_frequencies = []
+
for q in freq_qs:
+ # df of counts in memory for each question.
+ qdf = 'q' + q[:3]
+ qdf = globals()[qdf]
+
+ val_daily = qdf.loc[qdf['options'] == 'Daily.']['count'].to_list()[0]
+ val_weekly = qdf.loc[qdf['options'] == 'Weekly.']['count'].to_list()[0]
+ val_monthly = qdf.loc[qdf['options'] == 'Monthly.']['count'].to_list()[0]
+ val_rarely = qdf.loc[qdf['options'] == 'Every few months.']['count'].to_list()[0]
+
+ val_daily = val_daily * 81
+ val_weekly = val_weekly * 27
+ val_monthly = val_monthly * 9
+ val_rarely = val_rarely * 3
+
+ freq_total = sum([val_daily, val_weekly, val_monthly, val_rarely])
+
+ record = {"question":q, "weighted_freq":freq_total}
+ rated_frequencies.append(record)
+
freq_df = pd.DataFrame.from_records(rated_frequencies).sort_values('weighted_freq')
+
px.bar(freq_df, x='weighted_freq', y='question', title='Use Cases by Frequency', height=600, width=800, **freq_color_kwargs)
+
rated_compt = []
+
for q in freq_qs:
+ qdf = 'q' + q[:2] + "_merged"
+ qdf = globals()[qdf]
+
+ points_jup_pro = qdf.loc[(qdf['tool'] == 'jupyter') & (qdf['options'] == 'Yes.')]['count'].to_list()[0]
+ points_jup_con = qdf.loc[(qdf['tool'] == 'jupyter') & (qdf['options'] == 'No.')]['count'].to_list()[0]
+ points_alt_pro = qdf.loc[(qdf['tool'] == 'alternative') & (qdf['options'] == 'Yes.')]['count'].to_list()[0]
+ points_alt_con = qdf.loc[(qdf['tool'] == 'alternative') & (qdf['options'] == 'No.')]['count'].to_list()[0]
+
+ jup_total = points_jup_pro - points_jup_con
+ alt_total = points_alt_pro - points_alt_con
+
+ points_total = jup_total - alt_total
+
+ record = {"question":q, "points":points_total}
+ rated_compt.append(record)
+
compt_df = pd.DataFrame.from_records(rated_compt).sort_values('points')
+
px.bar(compt_df, x='points', y='question', title='Jupyter vs Alternatives', height=600, width=800, **points_color_kwargs)
+
\n", + " | count | \n", + "options | \n", + "
---|---|---|
3 | \n", + "169 | \n", + "N/A - skip, don't know. | \n", + "
0 | \n", + "402 | \n", + "(0) Not a problem for me. | \n", + "
4 | \n", + "87 | \n", + "(1) Trivial. | \n", + "
1 | \n", + "222 | \n", + "(2) Minor. | \n", + "
2 | \n", + "174 | \n", + "(3) Major. | \n", + "
5 | \n", + "69 | \n", + "(4) Critical. | \n", + "