minor improvements

GSA · Dec 13, 2024 · 2eaadd7 · 2eaadd7
1 parent 000af00
commit 2eaadd7
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 10 deletions.
diff --git a/streamlit_app/app/Summation_promptvariables.json b/streamlit_app/app/Summation_promptvariables.json
@@ -0,0 +1,38 @@
+{
+    "guide_ArticleSum": {
+        "value": "- Read the entire article to understand overall content and structure.\n- Determine the main objective or thesis of the article\n- Locate and mark key points, arguments, and conclusions in each section.\n- Jot down important evidence, examples, or data that reinforce the main ideas.\n- Rewrite the main ideas and supporting details\n- Combine the paraphrased points into a coherent and concise narrative.\n- Compare the summary with the original article to ensure accuracy and completeness, making adjustments as needed.",
+        "num_tokens": 82,
+        "timestamp": "2024-12-10T08:16:26.699882",
+        "origin": "prompt"
+    },
+    "get_rawSum": {
+        "value": "I have the following content:\n{{sample_content}}\n\nFor context, I work in FedRAMP.\n{{context_FedRAMP}}\n\nPlease summarize the content considering the context and the following summarization guide:\n{{guide_ArticleSum}}",
+        "num_tokens": 26,
+        "timestamp": "2024-12-10T08:20:40.927209",
+        "origin": "prompt"
+    },
+    "guide_sum4personas": {
+        "value": "- Analyze the given persona.\n- Ascertain what each persona values and requires from the summary (e.g. , high-level insights for executives, technical details for engineers).\n- Adjust content focus, highlight relevant sections.\n- Customize language and tone.\n- Present the summary in a format suitable for each group.\n- If possible, use examples or scenarios that align with each persona's responsibilities and interests",
+        "num_tokens": 64,
+        "timestamp": "2024-12-10T08:34:28.099973",
+        "origin": "response"
+    },
+    "get_targetedSum": {
+        "value": "I have the following content:\n{{sample_content}}\n\nI have the following context:\n{{context_FedRAMP}}\n\nI target the following persona:\n________\n\nI have the following guide for summarizing an article:\n{{guide_ArticleSum}}\n\nI have the following guide for tailoring summation to a targeted persona:\n{{guide_sum4personas}}\n\nPlease summarize the content considering the context, the targeted personal, and related guides. \n",
+        "num_tokens": 54,
+        "timestamp": "2024-12-10T08:42:32.686433",
+        "origin": "prompt"
+    },
+    "persona_FRdirector": {
+        "value": "**Responsibilities:**\n- Program Oversight: Lead the FedRAMP Program Management Office (PMO) in operating a security authorization process that meets federal agencies' needs and provides a navigable framework for cloud service providers.\n- Policy Guidance: Collaborate with the FedRAMP Board to approve and guide policies, ensuring the program's health and performance.\n- Stakeholder Engagement: Build and foster strong partnerships with FedRAMP stakeholders, including federal agencies, cloud service providers, and third-party assessment organizations.\n- Program Expansion: Grow the use of secure cloud technologies within government agencies and enhance the framework for securing and authorizing these technologies.\nFedRAMP\n\n**Goals:**\n- Accelerate Cloud Adoption: Increase the adoption of secure cloud services across federal agencies by streamlining the authorization process and reducing barriers for cloud service providers.\n- Enhance Security Frameworks: Continuously improve FedRAMP's security assessment and authorization processes to address evolving cybersecurity threats and technological advancements.\n- Promote Collaboration: Engage with industry and government stakeholders to ensure FedRAMP policies and processes are transparent, efficient, and aligned with best practices.\n\n**Challenges:**\n- Evolving Cyber Threats: Staying ahead of emerging cybersecurity threats and ensuring that authorized cloud services maintain robust security postures.\n- Process Efficiency: Balancing the need for thorough security assessments with the demand for timely authorizations to facilitate rapid cloud adoption.\n- Resource Management: Allocating sufficient resources to support the growing number of cloud service providers seeking FedRAMP authorization while maintaining high assessment standards.\n\n**Communication Preferences:**\n- Data-Driven Insights: Values concise, data-driven reports that highlight key metrics and trends relevant to cloud security and program performance.\n- Strategic Alignment: Prefers communications that align with FedRAMP's strategic goals and offer actionable recommendations for program improvement.\n- Collaborative Dialogue: Encourages open and transparent discussions with stakeholders to foster collaboration and mutual understanding.",
+        "num_tokens": 286,
+        "timestamp": "2024-12-10T09:15:07.242544",
+        "origin": "response"
+    },
+    "get_targetedSum2": {
+        "value": "I have the following context:\n{{context_FedRAMP}}\n\nI target the FedRAMP Director persona with the following details:\n{{persona_FRdirector}}\n\nI have the following guide for summarizing an article:\n{{guide_ArticleSum}}\n\nI have the following guide for tailoring summation to a targeted persona:\n{{guide_sum4personas}}\n\nThe content to summarize is as follows:\n{{sample_content}}\n\nPlease summarize the content considering the context, the FedRAMP Director persona, and the above guides. \n",
+        "num_tokens": 63,
+        "timestamp": "2024-12-10T09:28:15.900391",
+        "origin": "prompt"
+    }
+}
diff --git a/streamlit_app/app/entity_bridge/data_loader.py b/streamlit_app/app/entity_bridge/data_loader.py
@@ -78,7 +78,7 @@ def handle_missing_data(df, strategy, default_value=None, missing_threshold=0.5)
         raise ValueError(f"Unsupported missing data handling strategy: {strategy}")
     return df
 
-def select_fields(df, file_name, idx):
+def select_fields(df, file_name, idx): #potentially deprecated func
     """
     Allow the user to select fields from the DataFrame.
 
@@ -146,7 +146,7 @@ def select_fields(df, file_name, idx):
     }
     return selected_fields
 
-def load_and_preprocess_files(uploaded_files):
+def load_and_preprocess_files(uploaded_files): # potentially deprecated func
     """
     Load and preprocess multiple uploaded files.
 

diff --git a/streamlit_app/app/entity_bridge/entity_matcher.py b/streamlit_app/app/entity_bridge/entity_matcher.py
@@ -364,7 +364,7 @@ def construct_unique_child_list(data_frames):
     return unique_children_df
 
 
-def enrich_data_frames_with_unique_ids(data_frames, unique_parents_df, unique_children_df):
+def enrich_data_frames_with_unique_entity_ids(data_frames, unique_parents_df, unique_children_df):
     """
     Enrich the original data frames with unique parent and child IDs.
 

diff --git a/streamlit_app/app/entity_bridge/ui_helper.py b/streamlit_app/app/entity_bridge/ui_helper.py
@@ -10,6 +10,7 @@
 import pandas as pd
 import os
 import requests
+from entity_bridge.llm_integration import OllamaClient
 
 def display_file_upload():
     """

diff --git a/streamlit_app/app/pages/Entity_Bridge.py b/streamlit_app/app/pages/Entity_Bridge.py
@@ -13,6 +13,7 @@
 from entity_bridge import entity_matcher
 from entity_bridge import ui_helper
 from entity_bridge import llm_integration
+from entity_bridge import utils
 
 if 'proceed1' not in st.session_state:
     st.session_state['proceed1'] = False
@@ -216,18 +217,18 @@ def process_file(file, idx):
 
                     # For each confirmed group, generate a unique ID for the group's parent, map group member IDs
                     for group in confirmed_groups:
-                        group_parent_id = generate_unique_identifier()
+                        group_parent_id = utils.generate_unique_identifier()
                         group_members = group['members']
                         group_parent_name = group['parent']
 
                         # Update unique_parents_df
                         for member_name in group_members:
                             unique_parents_df.loc[unique_parents_df['ParentName'] == member_name, 'UniqueParentID'] = group_parent_id
                         # Optionally, you may want to add a new row for the group parent
-                        unique_parents_df = unique_parents_df.append({
-                            'ParentName': group_parent_name,
-                            'UniqueParentID': group_parent_id
-                        }, ignore_index=True)
+                        #unique_parents_df = unique_parents_df.append({
+                            #'ParentName': group_parent_name,
+                            #'UniqueParentID': group_parent_id
+                        #}, ignore_index=True)
 
                         st.write(f"**Group Parent ID:** {group_parent_id}")
                         st.write(f"**Group Parent Name:** {group_parent_name}")
@@ -238,15 +239,16 @@ def process_file(file, idx):
 
         # Proceed to Enrich DataFrames with Unique IDs using the updated unique_parents_df
         enriched_data_frames = entity_matcher.enrich_data_frames_with_unique_ids(
-            deduplicated_data_frames, unique_parents_df, unique_children_df
+            deduplicated_data_frames, unique_parents_df, entity_type='parent'
         )
 
         # Step 6: Construct Unique Child List
         unique_children_df = entity_matcher.construct_unique_child_list(deduplicated_data_frames)
 
         # Step 7: Enrich DataFrames with Unique IDs
+        # Need rework: might work best if enrich with parent id only.
         enriched_data_frames = entity_matcher.enrich_data_frames_with_unique_ids(
-            deduplicated_data_frames, unique_parents_df, unique_children_df
+            deduplicated_data_frames, unique_children_df,entity_type='child'
         )
 
         # Step 8: Display Enriched DataFrames