diff --git a/src/interface/web/app/automations/page.tsx b/src/interface/web/app/automations/page.tsx index 0de154715..65f2121ec 100644 --- a/src/interface/web/app/automations/page.tsx +++ b/src/interface/web/app/automations/page.tsx @@ -165,7 +165,7 @@ const suggestedAutomationsMetadata: AutomationsData[] = [ { subject: "Weekly Newsletter", query_to_run: - "Compile a message including: 1. A recap of news from last week 2. An at-home workout I can do before work 3. A quote to inspire me for the week ahead", + "/research Compile a message including: 1. A recap of news from last week 2. An at-home workout I can do before work 3. A quote to inspire me for the week ahead", schedule: "9AM every Monday", next: "Next run at 9AM on Monday", crontime: "0 9 * * 1", @@ -185,7 +185,7 @@ const suggestedAutomationsMetadata: AutomationsData[] = [ { subject: "Front Page of Hacker News", query_to_run: - "Summarize the top 5 posts from https://news.ycombinator.com/best and share them with me, including links", + "/research Summarize the top 5 posts from https://news.ycombinator.com/best and share them with me, including links", schedule: "9PM on every Wednesday", next: "Next run at 9PM on Wednesday", crontime: "0 21 * * 3", @@ -195,7 +195,7 @@ const suggestedAutomationsMetadata: AutomationsData[] = [ { subject: "Market Summary", query_to_run: - "Get the market summary for today and share it with me. Focus on tech stocks and the S&P 500.", + "/research Get the market summary for today and share it with me. Focus on tech stocks and the S&P 500.", schedule: "9AM on every weekday", next: "Next run at 9AM on Monday", crontime: "0 9 * * *", @@ -214,7 +214,7 @@ const suggestedAutomationsMetadata: AutomationsData[] = [ { subject: "Round-up of research papers about AI in healthcare", query_to_run: - "Summarize the top 3 research papers about AI in healthcare that were published in the last week. Include links to the full papers.", + "/research Summarize the top 3 research papers about AI in healthcare that were published in the last week. Include links to the full papers.", schedule: "9AM every Friday", next: "Next run at 9AM on Friday", crontime: "0 9 * * 5", diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index e170818ae..1f32ffda8 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -724,7 +724,16 @@ def collect_telemetry(): yield result return - conversation_commands = [get_conversation_command(query=q, any_references=True)] + # Automated tasks are handled before to allow mixing them with other conversation commands + cmds_to_rate_limit = [] + is_automated_task = False + if q.startswith("/automated_task"): + is_automated_task = True + q = q.replace("/automated_task", "").lstrip() + cmds_to_rate_limit += [ConversationCommand.AutomatedTask] + + # Extract conversation command from query + conversation_commands = [get_conversation_command(query=q)] conversation = await ConversationAdapters.aget_conversation_by_user( user, @@ -757,11 +766,8 @@ def collect_telemetry(): location = None if city or region or country or country_code: location = LocationData(city=city, region=region, country=country, country_code=country_code) - user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - meta_log = conversation.conversation_log - is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask] researched_results = "" online_results: Dict = dict() @@ -778,7 +784,7 @@ def collect_telemetry(): generated_excalidraw_diagram: str = None program_execution_context: List[str] = [] - if conversation_commands == [ConversationCommand.Default] or is_automated_task: + if conversation_commands == [ConversationCommand.Default]: chosen_io = await aget_data_sources_and_output_format( q, meta_log, @@ -799,7 +805,8 @@ def collect_telemetry(): async for result in send_event(ChatEvent.STATUS, f"**Selected Tools:** {conversation_commands_str}"): yield result - for cmd in conversation_commands: + cmds_to_rate_limit += conversation_commands + for cmd in cmds_to_rate_limit: try: await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd) q = q.replace(f"/{cmd.value}", "").strip() diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index af0e980dd..c77b9443b 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -231,7 +231,7 @@ def get_next_url(request: Request) -> str: return urljoin(str(request.base_url).rstrip("/"), next_path) -def get_conversation_command(query: str, any_references: bool = False) -> ConversationCommand: +def get_conversation_command(query: str) -> ConversationCommand: if query.startswith("/notes"): return ConversationCommand.Notes elif query.startswith("/help"): @@ -254,9 +254,6 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver return ConversationCommand.Code elif query.startswith("/research"): return ConversationCommand.Research - # If no relevant notes found for the given query - elif not any_references: - return ConversationCommand.General else: return ConversationCommand.Default @@ -408,42 +405,39 @@ async def aget_data_sources_and_output_format( response = clean_json(response) response = json.loads(response) - selected_sources = [q.strip() for q in response.get("source", []) if q.strip()] - selected_output = response.get("output", "text").strip() # Default to text output + chosen_sources = [s.strip() for s in response.get("source", []) if s.strip()] + chosen_output = response.get("output", "text").strip() # Default to text output - if not isinstance(selected_sources, list) or not selected_sources or len(selected_sources) == 0: + if is_none_or_empty(chosen_sources) or not isinstance(chosen_sources, list): raise ValueError( - f"Invalid response for determining relevant tools: {selected_sources}. Raw Response: {response}" + f"Invalid response for determining relevant tools: {chosen_sources}. Raw Response: {response}" ) - result: Dict = {"sources": [], "output": None if not is_task else ConversationCommand.AutomatedTask} - for selected_source in selected_sources: - # Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options. - if ( - selected_source in source_options.keys() - and isinstance(result["sources"], list) - and (len(agent_sources) == 0 or selected_source in agent_sources) - ): - # Check whether the tool exists as a valid ConversationCommand - result["sources"].append(ConversationCommand(selected_source)) - - # Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options. - if selected_output in output_options.keys() and (len(agent_outputs) == 0 or selected_output in agent_outputs): - # Check whether the tool exists as a valid ConversationCommand - result["output"] = ConversationCommand(selected_output) - - if is_none_or_empty(result): + output_mode = ConversationCommand.Text + # Verify selected output mode is enabled for the agent, as the LLM can sometimes get confused by the tool options. + if chosen_output in output_options.keys() and (len(agent_outputs) == 0 or chosen_output in agent_outputs): + # Ensure that the chosen output mode exists as a valid ConversationCommand + output_mode = ConversationCommand(chosen_output) + + data_sources = [] + # Verify selected data sources are enabled for the agent, as the LLM can sometimes get confused by the tool options. + for chosen_source in chosen_sources: + # Ensure that the chosen data source exists as a valid ConversationCommand + if chosen_source in source_options.keys() and (len(agent_sources) == 0 or chosen_source in agent_sources): + data_sources.append(ConversationCommand(chosen_source)) + + # Fallback to default sources if the inferred data sources are unset or invalid + if is_none_or_empty(data_sources): if len(agent_sources) == 0: - result = {"sources": [ConversationCommand.Default], "output": ConversationCommand.Text} + data_sources = [ConversationCommand.Default] else: - result = {"sources": [ConversationCommand.General], "output": ConversationCommand.Text} + data_sources = [ConversationCommand.General] except Exception as e: logger.error(f"Invalid response for determining relevant tools: {response}. Error: {e}", exc_info=True) - sources = agent_sources if len(agent_sources) > 0 else [ConversationCommand.Default] - output = agent_outputs[0] if len(agent_outputs) > 0 else ConversationCommand.Text - result = {"sources": sources, "output": output} + data_sources = agent_sources if len(agent_sources) > 0 else [ConversationCommand.Default] + output_mode = agent_outputs[0] if len(agent_outputs) > 0 else ConversationCommand.Text - return result + return {"sources": data_sources, "output": output_mode} async def infer_webpage_urls( @@ -1686,7 +1680,7 @@ def scheduled_chat( last_run_time = datetime.strptime(last_run_time, "%Y-%m-%d %I:%M %p %Z").replace(tzinfo=timezone.utc) # If the last run time was within the last 6 hours, don't run it again. This helps avoid multithreading issues and rate limits. - if (datetime.now(timezone.utc) - last_run_time).total_seconds() < 21600: + if (datetime.now(timezone.utc) - last_run_time).total_seconds() < 6 * 60 * 60: logger.info(f"Skipping scheduled chat {job_id} as the next run time is in the future.") return