diff --git a/streamlit_app/FAIR_MS_Library_Editor.py b/streamlit_app/FAIR_MS_Library_Editor.py index 37e93eb..25a0520 100644 --- a/streamlit_app/FAIR_MS_Library_Editor.py +++ b/streamlit_app/FAIR_MS_Library_Editor.py @@ -50,20 +50,15 @@ st.markdown("## Datasets") if 'datasets' not in st.session_state or st.session_state['datasets'] == {}: st.warning("Please upload a file to begin!") - if 'selected_sheets' not in st.session_state or st.session_state['selected_sheets'] == {}: - st.warning("Please select a dataset to begin!") + #if 'selected_sheets' not in st.session_state or st.session_state['selected_sheets'] == {}: + #st.warning("Please select a dataset to begin!") # with st.spinner("Loading..."): # time.sleep(5) # st.success("Done!") - if 'datasets' in st.session_state and st.session_state['datasets'] != {}: - for key in st.session_state['selected_sheets']: - with st.expander(key): - datasets = st.session_state['datasets'] - rowsMetricColumn, columnsMetricColumn = st.columns(2) - with rowsMetricColumn: - st.metric('Rows', datasets[key].shape[0]) - with columnsMetricColumn: - st.metric('Columns', datasets[key].shape[1]) + if 'df_spectra' in st.session_state and st.session_state['df_spectra'] != {}: + + df_spectra = st.session_state['df_spectra'] + st.metric('Detected how many spectra', len(df_spectra)) # if st.button("Edit", key=key): # selected_sheet = key # if key in datasets_metadata: diff --git a/streamlit_app/README.md b/streamlit_app/README.md index 3cb118f..1a79f98 100644 --- a/streamlit_app/README.md +++ b/streamlit_app/README.md @@ -11,6 +11,15 @@ Use python venv to use defined dependencies. python -m venv venv source venv/bin/activate + +### windows + 1. cd C:/Users/rfm848 + 2. python -m venv venv + 3. .\venv\Scripts\activate.bat + 4. Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser + 5. .\venv\Scripts\activate.ps1 + + to activate the virtual environment. You can then use the provided requirements.txt to populate the required dependencies in your virtual environment. diff --git a/streamlit_app/pages/1_File_Import.py b/streamlit_app/pages/1_File_Import.py index 4f6229e..49b4c36 100644 --- a/streamlit_app/pages/1_File_Import.py +++ b/streamlit_app/pages/1_File_Import.py @@ -1,9 +1,12 @@ import streamlit as st import pandas as pd +from matchms.importing import load_from_mgf +from tempfile import NamedTemporaryFile + st.set_page_config( layout="wide", - page_title="File Import - FAIR MS Library Curation Editor", + page_title="File Import (.mgf) - FAIR MS Library Curation Editor", #page_icon="assets/favicon.ico", menu_items={ 'Get Help': 'https://github.com/mzmine/biohack23_p15', @@ -12,52 +15,54 @@ } ) -st.markdown("## File Import") -st.markdown("Please select an Excel file to upload. The file should contain one or more sheets. Each sheet should contain sample columns, detailing factors of each individual sample (rows). Lipid identities are the column headers of the non-sample columns, quantities should be reported in the cells.") +st.markdown("## File Import (.mgf)") +st.markdown("Please select an mgf to upload.") + +uploaded_file = st.file_uploader("Choose a file", type = ".mgf") +st.set_option('deprecation.showfileUploaderEncoding', False) + + -uploaded_file = st.file_uploader("Choose a file", ) if uploaded_file is not None: print(uploaded_file) st.session_state['uploaded_file'] = uploaded_file if 'uploaded_file' in st.session_state and st.session_state['uploaded_file'] is not None: print("Uploaded file:", st.session_state['uploaded_file']) - uploaded_file = st.session_state['uploaded_file'] + #uploaded_file = st.session_state['uploaded_file'] with st.spinner('Loading data...'): datasets = {} if 'datasets' in st.session_state: datasets = st.session_state['datasets'] else: st.session_state['datasets'] = datasets + + with NamedTemporaryFile(dir='.', suffix='.mgf', mode = "wb") as f: + f.write(uploaded_file.getbuffer()) + f.close() + spectra_temp = load_from_mgf(f.name) + #spectra_temp = load_from_mgf(uploaded_file, "wb") + spectra = list(spectra_temp) + df_spectra = pd.DataFrame({"spectrum": spectra}) + - xl = pd.ExcelFile(uploaded_file) - sheets = xl.sheet_names - for sheet in sheets: - if sheet not in datasets: - df = pd.read_excel(uploaded_file, sheet_name=sheet) - datasets[sheet] = df - - st.markdown("## Preview Sheets") - sheet_selector = st.selectbox( - "Select a sheet", - sheets - ) - if sheet_selector is not None and sheet_selector in datasets: - rowsMetricColumn, columnsMetricColumn = st.columns(2) - with rowsMetricColumn: - st.metric('Rows', datasets[sheet_selector].shape[0]) - with columnsMetricColumn: - st.metric('Columns', datasets[sheet_selector].shape[1]) - st.write(datasets[sheet_selector]) - - st.markdown("## Select Sheets as Datasets") - selected_sheets = st.multiselect( - 'Each selected sheet will be converted to a dataset', - sheets, - sheets - ) - st.session_state['datasets'] = datasets - st.session_state['selected_sheets'] = selected_sheets - - if 'datasets' not in st.session_state: - st.session_state['datasets'] = [] + # make dataframe for metadata + def extract_metadata(df, keys): + for key in keys: + df[key] = df["spectrum"].apply(lambda x: x.get(key)) + + + extract_metadata(df_spectra, df_spectra["spectrum"][0].metadata.keys()) + + st.markdown("## Preview Information") + + st.metric('Detected how many spectra', len(df_spectra)) + + st.write(df_spectra) + + + st.session_state['df_spectra'] = df_spectra + st.session_state['len_spectra'] = len(df_spectra) + + if 'df_spectra' not in st.session_state: + st.session_state['df_spectra'] = [] diff --git a/streamlit_app/requirements.txt b/streamlit_app/requirements.txt index a3bf7c7..141050a 100644 --- a/streamlit_app/requirements.txt +++ b/streamlit_app/requirements.txt @@ -5,41 +5,63 @@ cachetools==5.3.2 certifi==2023.7.22 charset-normalizer==3.3.1 click==8.1.7 +contourpy==1.1.1 +cycler==0.12.1 +Deprecated==1.2.14 +fonttools==4.43.1 gitdb==4.0.11 GitPython==3.1.40 idna==3.4 importlib-metadata==6.8.0 +importlib-resources==6.1.0 Jinja2==3.1.2 jsonschema==4.19.2 jsonschema-specifications==2023.7.1 +kiwisolver==1.4.5 +llvmlite==0.40.1 +lxml==4.9.3 markdown-it-py==3.0.0 MarkupSafe==2.1.3 +matchms==0.23.1 +matplotlib==3.8.1 mdurl==0.1.2 -numpy==1.26.1 +networkx==3.2.1 +numba==0.57.1 +numpy==1.24.4 packaging==23.2 pandas==2.1.2 +pickydict==0.4.0 Pillow==10.1.0 protobuf==4.24.4 +PubChemPy==1.0.4 pyarrow==13.0.0 pydeck==0.8.1b0 Pygments==2.16.1 +pyparsing==3.1.1 +pyteomics==4.6.2 python-dateutil==2.8.2 pytz==2023.3.post1 +PyYAML==6.0.1 +rdkit==2023.9.1 referencing==0.30.2 requests==2.31.0 rich==13.6.0 rpds-py==0.10.6 +scipy==1.10.1 six==1.16.0 smmap==5.0.1 +sparsestack==0.4.1 streamlit==1.28.0 tenacity==8.2.3 toml==0.10.2 toolz==0.12.0 tornado==6.3.3 +tqdm==4.66.1 typing_extensions==4.8.0 tzdata==2023.3 tzlocal==5.2 urllib3==2.0.7 validators==0.22.0 watchdog==3.0.0 -zipp==3.17.0 +wrapt==1.15.0 +zipp==3.17.0 \ No newline at end of file