This repository has been archived by the owner on Oct 8, 2024. It is now read-only.
forked from dataprofessor/vennlit
-
Notifications
You must be signed in to change notification settings - Fork 1
/
streamlit_app.py
432 lines (373 loc) · 18.2 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
import io
from io import BytesIO
from itertools import combinations
from zipfile import ZipFile
import streamlit_analytics
import matplotlib.pyplot as plt
import pandas as pd
import streamlit as st
from venn import venn, pseudovenn
import chardet
import csv
@st.cache_data(ttl=3600)
def load_sheet_names(file):
df = pd.ExcelFile(file)
sheet_names = df.sheet_names
return sheet_names
@st.cache_data(ttl=3600)
def load_data(file, selected_sheet=None):
if file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
df = pd.read_excel(file, sheet_name=selected_sheet)
else:
file_content = file.read()
delimiter, encoding = detect_delimiter(file_content)
df = pd.read_csv(io.StringIO(file_content.decode(encoding)), delimiter=delimiter)
return df
@st.cache_data(ttl=3600)
def detect_delimiter(file_path):
try:
with open(file_path, 'rb') as f:
result = chardet.detect(f.read())
encoding = result['encoding']
with open(file_path, 'r', encoding=encoding) as f:
sample = f.read(4096)
dialect = csv.Sniffer().sniff(sample)
return dialect.delimiter
except Exception as e:
result = chardet.detect(file_path)
encoding = result['encoding']
sample = file_path[:4096]
sample = sample.decode(encoding)
dialect = csv.Sniffer().sniff(sample)
return dialect.delimiter, encoding
# Analyse of selected list to generate multidimensional Venn files
@st.cache_data(ttl=3600)
def download_venn_data(lists):
items_occurrence = {per_list: set(df[per_list].dropna()) for per_list in lists}
zip_buffer = BytesIO()
with ZipFile(zip_buffer, 'a') as zip_file:
for current_list, items_current_list in items_occurrence.items():
exclusive_items = items_current_list.copy()
for other_list, items_other_list in items_occurrence.items():
if other_list != current_list:
exclusive_items -= items_other_list
file_content = "\n".join(map(str, exclusive_items))
zip_file.writestr(f"1_{current_list}.txt", file_content)
for combination_size in range(2, len(lists) + 1):
for lists_combination in combinations(lists, combination_size):
shared_items = set.intersection(*(items_occurrence[item] for item in lists_combination))
items_exclusive_to_combination = shared_items.copy()
for other_list, items_other_list in items_occurrence.items():
if other_list not in lists_combination:
items_exclusive_to_combination -= items_other_list
file_content = "\n".join(items_exclusive_to_combination)
file_name = f"{len(lists_combination)}_{'_'.join(sorted(lists_combination))}.txt"
zip_file.writestr(file_name, file_content)
venn_data = zip_buffer.getvalue()
return venn_data
# For download PNG Venn
def download_png():
buffer_png = BytesIO()
plt.savefig(buffer_png, format="png", bbox_inches='tight')
buffer_png.seek(0)
return buffer_png
# For download SVG Venn
def download_svg():
buffer_svg = BytesIO()
plt.savefig(buffer_svg, format="svg", bbox_inches='tight')
buffer_svg.seek(0)
return buffer_svg
# Setting of Venn configurations
fmt_options = {"Number": "{size}",
"Percentage": "{percentage:.1f}%",
"Logic": "{logic}"}
cmap_options = {'Accent': 'Accent',
'BRG': 'brg',
"Civids": 'cividis',
'CMRmap': 'CMRmap',
"CoolWarm": "coolwarm",
'CubeHelix': 'cubehelix',
'Dark2': 'Dark2',
"Default": 'hsv',
'Flag': 'flag',
'Gist Earth': 'gist_earth',
'Gist Ncar': 'gist_ncar',
'Gist Rainbow': 'gist_rainbow',
'Gist Stern': 'gist_stern',
'GnuPlot': 'gnuplot',
'GnuPlot2': 'gnuplot2',
"Inferno": 'inferno',
'Jet': 'jet',
"Magma": 'magma',
'Nipy Spectral': 'nipy_spectral',
'Ocean': 'ocean',
'Pastel2': 'Pastel2',
'Paired': 'Paired',
"Plasma": 'plasma',
'Prism': 'prism',
'Rainbow': 'rainbow',
'Set1': 'Set1',
'Set2': 'Set2',
'Set3': 'Set3',
"Spectral": "Spectral",
'Tab10': 'tab10',
'Tab20': 'tab20',
'Tab20b': 'tab20b',
'Tab20c': 'tab20c',
'Terrain': 'terrain',
'Turbo': 'turbo',
"Twilight": 'twilight',
"Twilight Shifted": 'twilight_shifted',
"Viridis": 'viridis',
}
legend_loc_options = {'Best': 'best',
'Upper Right': 'upper right',
'Upper Left': 'upper left',
'Upper Center': 'upper center',
'Lower Right': 'lower right',
'Lower Left': 'lower left',
'Lower Center': 'lower center',
'Right': 'right',
'Center Right': 'center right',
'Center Left': 'center left',
'Center': 'center'}
# Settings for Streamlit page
st.set_page_config(
page_title="VennLit V2",
page_icon="⭕",
layout="wide")
# Main page
st.title('⭕ VennLit V2')
df = []
selection_lists = []
col1, col2, col3 = st.columns([0.8, 1.4, 0.8])
with col1:
# Example section
st.subheader("📎 Example and Hints")
st.link_button("Help", 'https://jumitti.notion.site/jumitti/VennLit-V2-e20a373a9c6f4c1390e72a7953ffcb0c')
demo = st.checkbox("**Try example**", value=1)
if demo: # Demo mode
with col2:
st.subheader('Welcome to VennLit V2 😊')
st.write('You are by default in **demo** mode.\n'
'You can play with VennLit V2 or disable **Try example** on the left **📎 Example** section.\n'
'You can also click on **[Help](https://jumitti.notion.site/jumitti/VennLit-V2-e20a373a9c6f4c1390e72a7953ffcb0c)**.')
csv_file = 'example/example.csv'
snif_delimiter = detect_delimiter(csv_file)
df = pd.read_csv(csv_file, delimiter=snif_delimiter)
with st.expander("**.csv and .xlsx templates**", expanded=False):
with open("example/example.csv", "rb") as file: # Download .csv template
st.download_button(
label="Download example.csv",
data=file,
file_name="example.csv",
mime="text/csv")
with open("example/example.xlsx", "rb") as file: # Download .xlsx template
st.download_button(
label="Download example.xlsx",
data=file,
file_name="example.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
# Upload data section
st.subheader("💽 Upload data")
uploaded_files = st.file_uploader("**Upload one or more .xlsx .csv files**", type=["csv", "xlsx"],
accept_multiple_files=True)
if len(uploaded_files) > 0:
dfs = []
for file in uploaded_files: # Is .csv or .xlsx
if file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
sheet_names = load_sheet_names(file)
selected_sheet = st.selectbox(f"Select sheet for **{file.name}**:", sheet_names)
df = load_data(file, selected_sheet=selected_sheet)
else:
df = load_data(file)
dfs.append(df)
all_columns = [col for df in dfs for col in df.columns]
duplicate_columns = [col for col in set(all_columns) if all_columns.count(col) > 1]
if duplicate_columns: # Some lists with same name ?
st.warning(f"Some lists have the same name: {', '.join(duplicate_columns)}")
filtered_dfs = []
included_columns = set()
for df in dfs:
filtered_columns = [col for col in df.columns if col not in duplicate_columns]
included_columns.update(filtered_columns)
filtered_df = df[filtered_columns]
filtered_dfs.append(filtered_df)
df = pd.concat(filtered_dfs, axis=1)
else:
df = pd.concat(dfs, axis=1)
elif len(uploaded_files) == 0 and not demo:
st.cache_data.clear()
with col2:
st.subheader('Welcome to VennLit V2 😊')
st.write('You can play with VennLit V2 or enable **Try example** on the left **📎 Example** section.\n'
'You can also click on **[Help](https://jumitti.notion.site/jumitti/VennLit-V2-e20a373a9c6f4c1390e72a7953ffcb0c)**.')
else:
st.cache_data.clear()
try:
if len(df) > 0:
# Lists section
st.subheader("🧮 Lists")
st.dataframe(df, hide_index=True)
lists = df.columns.tolist()
with col3:
# Lists selection
st.subheader('📌 Lists selection')
items_occurrence = {per_list: set(df[per_list].dropna()) for per_list in lists}
selection_lists = st.multiselect('Lists selection', lists, default=lists[:2],
placeholder="Choose lists", disabled=False,
label_visibility='collapsed')
num_sets = len(selection_lists)
selected_lists = selection_lists[:num_sets]
except Exception as e:
with col2:
st.warning(f"It appears that there is an error with one or more values in your lists..."
"Please check your data. Otherwise, convert your file to .csv with the ';' deliminator.\n\n"
"If this does not resolve the problems, contact me by email (minnitijulien06@gmail.com ; minniti@ipmc.cnrs.fr) or submit a [GitHub Issue](https://github.com/Jumitti/vennlit_v2/issues).\n\n"
"Error information:"
f"{e}", icon='🚨')
with col1:
# Credits section
st.subheader("✒️Credits")
st.write("Original app by [@professordata](https://github.com/dataprofessor/vennlit)")
st.write(
"Venn diagram with [@tctianchi](https://github.com/tctianchi/pyvenn) and [@LankyCyril](https://github.com/LankyCyril/pyvenn)")
st.write(
"Inspired by [InteractiVenn](http://www.interactivenn.net/) (DOI:[10.1186/s12859-015-0611-3](http://doi.org/10.1186/s12859-015-0611-3)")
st.write("VennLit V2 rebuild and up-to-date by [@Jumitti](https://github.com/Jumitti/vennlit_v2)")
st.divider()
# streamlit_analytics.start_tracking()
# streamlit_analytics.stop_tracking()
# views = streamlit_analytics.main.counts["total_pageviews"]
# st.write(f"Total connections (from last reboot) 👨🏼💻: {int(views)}")
st.write(
"My other app: [TFinder](https://tfinder-ipmc.streamlit.app/) and [ChickenAI](https://chickenai.streamlit.app/)")
try:
plt.figure(figsize=(8, 8))
if len(selection_lists) > 1:
with col3:
venn_data = download_venn_data(selected_lists)
st.download_button(label="💾 Download Venn data",
data=venn_data,
file_name=f'venn_data{"".join("_" + selected_list for selected_list in selection_lists)}.zip',
mime="application/zip", )
if 1 < len(selection_lists) <= 6: # Venn diagram 2 to 6 comparisons
with col3:
st.divider()
# Settings
st.subheader('⚙️Venn diagram settings')
fmt = st.radio(
"**Number format:**",
list(fmt_options.keys()),
index=0,
horizontal=True, key='venn_fmt')
venn_format = fmt_options[fmt]
cmap = st.selectbox(
"**Colors:**",
list(cmap_options.keys()),
index=7, key='venn_cmap')
cmap_format = cmap_options[cmap]
font_size = st.slider("**Font size:**", min_value=5, max_value=20, value=10, step=1, key='venn_font_size',
help=None)
fig_size = st.slider("**Venn size**:", min_value=5, max_value=20, value=10, step=1, key='venn_fig_size',
help=None)
legend_loc = st.selectbox(
"**Legend position:**",
list(legend_loc_options.keys()),
index=0, key='venn_legend_loc')
legend_loc_format = legend_loc_options[legend_loc]
with col2:
# Venn diagram
st.subheader('Venn diagram')
dataset_dict = {name: set(items_occurrence[name]) for name in selected_lists}
venn(dataset_dict, fmt=venn_format, cmap=cmap_format, fontsize=font_size, legend_loc=legend_loc_format,
figsize=(fig_size, fig_size))
st.pyplot(plt)
with col3:
# Download PNG and SVG
buffer_png = download_png()
st.download_button(
label="💾 Download Venn diagram (.png)",
data=buffer_png,
file_name=f'venn{"".join("_" + selected_list for selected_list in selection_lists)}.png',
mime='image/png',
)
buffer_svg = download_svg()
st.download_button(
label="💾 Download Venn diagram (.svg)",
data=buffer_svg,
file_name=f'venn{"".join("_" + selected_list for selected_list in selection_lists)}.svg',
mime='image/svg+xml',
)
st.write(
'Try opening the .svg diagram using [Inkscape](https://inkscape.org/) to move shapes, resize, change font, colors and more.')
if len(selection_lists) == 6: # Pseudo-Venn for 6 comparison
with col3:
st.divider()
# Pseudo-Venn settings
st.subheader('⚙️Pseudo-Venn diagram settings',
help='Six-set true Venn diagrams are somewhat unwieldy, and not all intersections are usually of interest.\n\n'
'If you wish to display information about elements in hidden intersections,'
'uncheck the option **hidden intersections** below.\n\n'
'Some intersections are not present, but the most commonly wanted are.')
fmt = st.radio(
"**Number format:**",
list(fmt_options.keys()),
index=0,
horizontal=True, key='pseudovenn_fmt')
venn_format = fmt_options[fmt]
cmap = st.selectbox(
"**Colors:**",
list(cmap_options.keys()),
index=7, key='pseudovenn_cmap')
cmap_format = cmap_options[cmap]
font_size = st.slider("**Font size:**", min_value=5, max_value=20, value=10, step=1,
key='pseudovenn_font_size',
help=None)
fig_size = st.slider("**Pseudo-Venn size:**", min_value=5, max_value=20, value=10, step=1,
key='pseudovenn_fig_size',
help=None)
legend_loc = st.selectbox(
"**Legend position:**",
list(legend_loc_options.keys()),
index=0, key='pseudovenn_legend_loc')
legend_loc_format = legend_loc_options[legend_loc]
hint_hidden_format = st.checkbox('**Hidden intersections**', value=1,
help='Six-set true Venn diagrams are somewhat unwieldy, and not all intersections are usually of interest.\n\n'
'If you wish to display information about elements in hidden intersections,'
'uncheck the option **hidden intersections**.\n\n'
'Some intersections are not present, but the most commonly wanted are.')
with col2:
# Pseudo-Venn diagram
st.subheader('Pseudo-Venn diagram')
dataset_dict = {name: set(items_occurrence[name]) for name in selected_lists}
pseudovenn(dataset_dict, fmt=venn_format, cmap=cmap_format, fontsize=font_size,
legend_loc=legend_loc_format,
figsize=(fig_size, fig_size),
hint_hidden=False if hint_hidden_format else True)
st.pyplot(plt)
with col3:
# Download PNG and SVG
buffer_png = download_png()
st.download_button(
label="💾 Download Pseudo-Venn diagram (.png)",
data=buffer_png,
file_name=f'pseudovenn{"".join("_" + selected_list for selected_list in selection_lists)}.png',
mime='image/png',
)
buffer_svg = download_svg()
st.download_button(
label="💾 Download Pseudo-Venn diagram (.svg)",
data=buffer_svg,
file_name=f'pseudovenn{"".join("_" + selected_list for selected_list in selection_lists)}.svg',
mime='image/svg+xml',
)
st.write(
'Try opening the .svg diagram using [Inkscape](https://inkscape.org/) to move shapes, resize, change font, colors and more.')
except Exception as e:
with col2:
st.warning(f"It appears that there is an error with one or more values in your lists..."
"Please check your data. Otherwise, convert your file to .csv with the ';' deliminator.\n\n"
"If this does not resolve the problems, contact me by email (minnitijulien06@gmail.com ; minniti@ipmc.cnrs.fr) or submit a [GitHub Issue](https://github.com/Jumitti/vennlit_v2/issues).\n\n"
"Error information:"
f"{e}", icon='🚨')