From 2eaadd716a8ce08fe833a008cc96fe6ae7c8f317 Mon Sep 17 00:00:00 2001 From: Cybonto <83996716+Cybonto@users.noreply.github.com> Date: Fri, 13 Dec 2024 08:58:21 -0500 Subject: [PATCH] minor improvements --- .../app/Summation_promptvariables.json | 38 +++++++++++++++++++ .../app/entity_bridge/data_loader.py | 4 +- .../app/entity_bridge/entity_matcher.py | 2 +- streamlit_app/app/entity_bridge/ui_helper.py | 1 + streamlit_app/app/pages/Entity_Bridge.py | 16 ++++---- 5 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 streamlit_app/app/Summation_promptvariables.json diff --git a/streamlit_app/app/Summation_promptvariables.json b/streamlit_app/app/Summation_promptvariables.json new file mode 100644 index 0000000..4af3b06 --- /dev/null +++ b/streamlit_app/app/Summation_promptvariables.json @@ -0,0 +1,38 @@ +{ + "guide_ArticleSum": { + "value": "- Read the entire article to understand overall content and structure.\n- Determine the main objective or thesis of the article\n- Locate and mark key points, arguments, and conclusions in each section.\n- Jot down important evidence, examples, or data that reinforce the main ideas.\n- Rewrite the main ideas and supporting details\n- Combine the paraphrased points into a coherent and concise narrative.\n- Compare the summary with the original article to ensure accuracy and completeness, making adjustments as needed.", + "num_tokens": 82, + "timestamp": "2024-12-10T08:16:26.699882", + "origin": "prompt" + }, + "get_rawSum": { + "value": "I have the following content:\n{{sample_content}}\n\nFor context, I work in FedRAMP.\n{{context_FedRAMP}}\n\nPlease summarize the content considering the context and the following summarization guide:\n{{guide_ArticleSum}}", + "num_tokens": 26, + "timestamp": "2024-12-10T08:20:40.927209", + "origin": "prompt" + }, + "guide_sum4personas": { + "value": "- Analyze the given persona.\n- Ascertain what each persona values and requires from the summary (e.g. , high-level insights for executives, technical details for engineers).\n- Adjust content focus, highlight relevant sections.\n- Customize language and tone.\n- Present the summary in a format suitable for each group.\n- If possible, use examples or scenarios that align with each persona's responsibilities and interests", + "num_tokens": 64, + "timestamp": "2024-12-10T08:34:28.099973", + "origin": "response" + }, + "get_targetedSum": { + "value": "I have the following content:\n{{sample_content}}\n\nI have the following context:\n{{context_FedRAMP}}\n\nI target the following persona:\n________\n\nI have the following guide for summarizing an article:\n{{guide_ArticleSum}}\n\nI have the following guide for tailoring summation to a targeted persona:\n{{guide_sum4personas}}\n\nPlease summarize the content considering the context, the targeted personal, and related guides. \n", + "num_tokens": 54, + "timestamp": "2024-12-10T08:42:32.686433", + "origin": "prompt" + }, + "persona_FRdirector": { + "value": "**Responsibilities:**\n- Program Oversight: Lead the FedRAMP Program Management Office (PMO) in operating a security authorization process that meets federal agencies' needs and provides a navigable framework for cloud service providers.\n- Policy Guidance: Collaborate with the FedRAMP Board to approve and guide policies, ensuring the program's health and performance.\n- Stakeholder Engagement: Build and foster strong partnerships with FedRAMP stakeholders, including federal agencies, cloud service providers, and third-party assessment organizations.\n- Program Expansion: Grow the use of secure cloud technologies within government agencies and enhance the framework for securing and authorizing these technologies.\nFedRAMP\n\n**Goals:**\n- Accelerate Cloud Adoption: Increase the adoption of secure cloud services across federal agencies by streamlining the authorization process and reducing barriers for cloud service providers.\n- Enhance Security Frameworks: Continuously improve FedRAMP's security assessment and authorization processes to address evolving cybersecurity threats and technological advancements.\n- Promote Collaboration: Engage with industry and government stakeholders to ensure FedRAMP policies and processes are transparent, efficient, and aligned with best practices.\n\n**Challenges:**\n- Evolving Cyber Threats: Staying ahead of emerging cybersecurity threats and ensuring that authorized cloud services maintain robust security postures.\n- Process Efficiency: Balancing the need for thorough security assessments with the demand for timely authorizations to facilitate rapid cloud adoption.\n- Resource Management: Allocating sufficient resources to support the growing number of cloud service providers seeking FedRAMP authorization while maintaining high assessment standards.\n\n**Communication Preferences:**\n- Data-Driven Insights: Values concise, data-driven reports that highlight key metrics and trends relevant to cloud security and program performance.\n- Strategic Alignment: Prefers communications that align with FedRAMP's strategic goals and offer actionable recommendations for program improvement.\n- Collaborative Dialogue: Encourages open and transparent discussions with stakeholders to foster collaboration and mutual understanding.", + "num_tokens": 286, + "timestamp": "2024-12-10T09:15:07.242544", + "origin": "response" + }, + "get_targetedSum2": { + "value": "I have the following context:\n{{context_FedRAMP}}\n\nI target the FedRAMP Director persona with the following details:\n{{persona_FRdirector}}\n\nI have the following guide for summarizing an article:\n{{guide_ArticleSum}}\n\nI have the following guide for tailoring summation to a targeted persona:\n{{guide_sum4personas}}\n\nThe content to summarize is as follows:\n{{sample_content}}\n\nPlease summarize the content considering the context, the FedRAMP Director persona, and the above guides. \n", + "num_tokens": 63, + "timestamp": "2024-12-10T09:28:15.900391", + "origin": "prompt" + } +} \ No newline at end of file diff --git a/streamlit_app/app/entity_bridge/data_loader.py b/streamlit_app/app/entity_bridge/data_loader.py index 2df791e..4041aed 100644 --- a/streamlit_app/app/entity_bridge/data_loader.py +++ b/streamlit_app/app/entity_bridge/data_loader.py @@ -78,7 +78,7 @@ def handle_missing_data(df, strategy, default_value=None, missing_threshold=0.5) raise ValueError(f"Unsupported missing data handling strategy: {strategy}") return df -def select_fields(df, file_name, idx): +def select_fields(df, file_name, idx): #potentially deprecated func """ Allow the user to select fields from the DataFrame. @@ -146,7 +146,7 @@ def select_fields(df, file_name, idx): } return selected_fields -def load_and_preprocess_files(uploaded_files): +def load_and_preprocess_files(uploaded_files): # potentially deprecated func """ Load and preprocess multiple uploaded files. diff --git a/streamlit_app/app/entity_bridge/entity_matcher.py b/streamlit_app/app/entity_bridge/entity_matcher.py index e40c7d9..d59f305 100644 --- a/streamlit_app/app/entity_bridge/entity_matcher.py +++ b/streamlit_app/app/entity_bridge/entity_matcher.py @@ -364,7 +364,7 @@ def construct_unique_child_list(data_frames): return unique_children_df -def enrich_data_frames_with_unique_ids(data_frames, unique_parents_df, unique_children_df): +def enrich_data_frames_with_unique_entity_ids(data_frames, unique_parents_df, unique_children_df): """ Enrich the original data frames with unique parent and child IDs. diff --git a/streamlit_app/app/entity_bridge/ui_helper.py b/streamlit_app/app/entity_bridge/ui_helper.py index cf3b37e..ec26275 100644 --- a/streamlit_app/app/entity_bridge/ui_helper.py +++ b/streamlit_app/app/entity_bridge/ui_helper.py @@ -10,6 +10,7 @@ import pandas as pd import os import requests +from entity_bridge.llm_integration import OllamaClient def display_file_upload(): """ diff --git a/streamlit_app/app/pages/Entity_Bridge.py b/streamlit_app/app/pages/Entity_Bridge.py index b931e46..b00013b 100644 --- a/streamlit_app/app/pages/Entity_Bridge.py +++ b/streamlit_app/app/pages/Entity_Bridge.py @@ -13,6 +13,7 @@ from entity_bridge import entity_matcher from entity_bridge import ui_helper from entity_bridge import llm_integration +from entity_bridge import utils if 'proceed1' not in st.session_state: st.session_state['proceed1'] = False @@ -216,7 +217,7 @@ def process_file(file, idx): # For each confirmed group, generate a unique ID for the group's parent, map group member IDs for group in confirmed_groups: - group_parent_id = generate_unique_identifier() + group_parent_id = utils.generate_unique_identifier() group_members = group['members'] group_parent_name = group['parent'] @@ -224,10 +225,10 @@ def process_file(file, idx): for member_name in group_members: unique_parents_df.loc[unique_parents_df['ParentName'] == member_name, 'UniqueParentID'] = group_parent_id # Optionally, you may want to add a new row for the group parent - unique_parents_df = unique_parents_df.append({ - 'ParentName': group_parent_name, - 'UniqueParentID': group_parent_id - }, ignore_index=True) + #unique_parents_df = unique_parents_df.append({ + #'ParentName': group_parent_name, + #'UniqueParentID': group_parent_id + #}, ignore_index=True) st.write(f"**Group Parent ID:** {group_parent_id}") st.write(f"**Group Parent Name:** {group_parent_name}") @@ -238,15 +239,16 @@ def process_file(file, idx): # Proceed to Enrich DataFrames with Unique IDs using the updated unique_parents_df enriched_data_frames = entity_matcher.enrich_data_frames_with_unique_ids( - deduplicated_data_frames, unique_parents_df, unique_children_df + deduplicated_data_frames, unique_parents_df, entity_type='parent' ) # Step 6: Construct Unique Child List unique_children_df = entity_matcher.construct_unique_child_list(deduplicated_data_frames) # Step 7: Enrich DataFrames with Unique IDs + # Need rework: might work best if enrich with parent id only. enriched_data_frames = entity_matcher.enrich_data_frames_with_unique_ids( - deduplicated_data_frames, unique_parents_df, unique_children_df + deduplicated_data_frames, unique_children_df,entity_type='child' ) # Step 8: Display Enriched DataFrames