Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Feature: Adding File Filtering to Conversations #788

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7c6be74
UI update for file filtered conversations
MythicalCow May 28, 2024
599917e
linter fixes
MythicalCow May 29, 2024
cf9f4dd
linter fixes
MythicalCow May 29, 2024
8569092
UI and API endpoints integrations complete. file querying changes to …
MythicalCow May 30, 2024
8640f24
UI streamlining by adding a plus button instead of a static search bar
MythicalCow May 30, 2024
0d87a8f
backend and UI conversation ID based file filtering implemented. file…
MythicalCow May 30, 2024
d2a369f
file filtering based on selected files implemented. further testing a…
MythicalCow May 30, 2024
75d251d
final touches and unnecessary comments removed.
MythicalCow May 31, 2024
fcc99db
adding authentication requirement to file-filters endpoint
MythicalCow May 31, 2024
b727a52
Apply file filters with correct syntax
sabaimran Jun 3, 2024
13d71a1
made PR changes for a better user experience
MythicalCow Jun 3, 2024
0c129ca
Merge branch 'khoj-ai:master' into features/conversation-file-filter
MythicalCow Jun 3, 2024
9714b63
padding fix
MythicalCow Jun 3, 2024
649edd0
Merge branch 'features/conversation-file-filter' of https://github.co…
MythicalCow Jun 3, 2024
cd204c7
small UI improvements
MythicalCow Jun 3, 2024
60de2b4
addressed security risk highlighted in PR.
MythicalCow Jun 5, 2024
f80654b
added handling for automatically adding an uploaded file to the curre…
MythicalCow Jun 5, 2024
1f52e9a
fixed API issue related to duplicate file management
MythicalCow Jun 5, 2024
e8a3455
Add cursor:pointer styling on :hover
sabaimran Jun 6, 2024
29b9826
Remove print statement
sabaimran Jun 6, 2024
00a4bf3
Remove comment for file_filters for consistency
sabaimran Jun 6, 2024
c65cc1b
Move FilterRequest from api_chat to raw_config
sabaimran Jun 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/interface/desktop/config.html
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ <h3 class="card-title">
button.sync-data:hover {
background-color: var(--summer-sun);
box-shadow: 0px 3px 0px var(--background-color);
cursor: pointer;
}
.sync-force-toggle {
align-content: center;
Expand Down
17 changes: 17 additions & 0 deletions src/khoj/database/migrations/0044_conversation_file_filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.2.10 on 2024-05-29 19:56

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("database", "0043_alter_chatmodeloptions_model_type"),
]

operations = [
migrations.AddField(
model_name="conversation",
name="file_filters",
field=models.JSONField(default=list),
),
]
1 change: 1 addition & 0 deletions src/khoj/database/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ class Conversation(BaseModel):
slug = models.CharField(max_length=200, default=None, null=True, blank=True)
title = models.CharField(max_length=200, default=None, null=True, blank=True)
agent = models.ForeignKey(Agent, on_delete=models.SET_NULL, default=None, null=True, blank=True)
file_filters = models.JSONField(default=list)


class PublicConversation(BaseModel):
Expand Down
320 changes: 317 additions & 3 deletions src/khoj/interface/web/chat.html

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion src/khoj/routers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ async def extract_references_and_questions(
q: str,
n: int,
d: float,
conversation_id: int,
conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
location_data: LocationData = None,
send_status_func: Optional[Callable] = None,
Expand All @@ -308,8 +309,10 @@ async def extract_references_and_questions(
for filter in [DateFilter(), WordFilter(), FileFilter()]:
defiltered_query = filter.defilter(defiltered_query)
filters_in_query = q.replace(defiltered_query, "").strip()

conversation = await sync_to_async(ConversationAdapters.get_conversation_by_id)(conversation_id)
filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
using_offline_chat = False
print(f"Filters in query: {filters_in_query}")
sabaimran marked this conversation as resolved.
Show resolved Hide resolved

# Infer search queries from user message
with timer("Extracting search queries took", logger):
Expand Down
57 changes: 54 additions & 3 deletions src/khoj/routers/api_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
get_device,
is_none_or_empty,
)
from khoj.utils.rawconfig import LocationData
from khoj.utils.rawconfig import FilterRequest, LocationData

# Initialize Router
logger = logging.getLogger(__name__)
Expand All @@ -73,6 +73,57 @@
from khoj.routers.email import send_query_feedback


@api_chat.get("/conversation/file-filters/{conversation_id}", response_class=Response)
@requires(["authenticated"])
def get_file_filter(request: Request, conversation_id: str) -> Response:
conversation = ConversationAdapters.get_conversation_by_user(
request.user.object, conversation_id=int(conversation_id)
)
# get all files from "computer"
file_list = EntryAdapters.get_all_filenames_by_source(request.user.object, "computer")
file_filters = []
for file in conversation.file_filters:
if file in file_list:
file_filters.append(file)
return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)


@api_chat.post("/conversation/file-filters", response_class=Response)
@requires(["authenticated"])
def add_file_filter(request: Request, filter: FilterRequest):
try:
conversation = ConversationAdapters.get_conversation_by_user(
request.user.object, conversation_id=int(filter.conversation_id)
)
file_list = EntryAdapters.get_all_filenames_by_source(request.user.object, "computer")
if filter.filename in file_list and filter.filename not in conversation.file_filters:
conversation.file_filters.append(filter.filename)
conversation.save()
# remove files from conversation.file_filters that are not in file_list
conversation.file_filters = [file for file in conversation.file_filters if file in file_list]
conversation.save()
return Response(content=json.dumps(conversation.file_filters), media_type="application/json", status_code=200)
except Exception as e:
logger.error(f"Error adding file filter {filter.filename}: {e}", exc_info=True)
raise HTTPException(status_code=422, detail=str(e))


@api_chat.delete("/conversation/file-filters", response_class=Response)
@requires(["authenticated"])
def remove_file_filter(request: Request, filter: FilterRequest) -> Response:
conversation = ConversationAdapters.get_conversation_by_user(
request.user.object, conversation_id=int(filter.conversation_id)
)
if filter.filename in conversation.file_filters:
conversation.file_filters.remove(filter.filename)
conversation.save()
# remove files from conversation.file_filters that are not in file_list
file_list = EntryAdapters.get_all_filenames_by_source(request.user.object, "computer")
conversation.file_filters = [file for file in conversation.file_filters if file in file_list]
conversation.save()
return Response(content=json.dumps(conversation.file_filters), media_type="application/json", status_code=200)


class FeedbackData(BaseModel):
uquery: str
kquery: str
Expand Down Expand Up @@ -586,7 +637,7 @@ async def send_rate_limit_message(message: str):
continue

compiled_references, inferred_queries, defiltered_query = await extract_references_and_questions(
websocket, meta_log, q, 7, 0.18, conversation_commands, location, send_status_update
websocket, meta_log, q, 7, 0.18, conversation_id, conversation_commands, location, send_status_update
)

if compiled_references:
Expand Down Expand Up @@ -838,7 +889,7 @@ async def chat(
return Response(content=llm_response, media_type="text/plain", status_code=200)

compiled_references, inferred_queries, defiltered_query = await extract_references_and_questions(
request, meta_log, q, (n or 5), (d or math.inf), conversation_commands, location
request, meta_log, q, (n or 5), (d or math.inf), conversation_id, conversation_commands, location
)
online_results: Dict[str, Dict] = {}

Expand Down
5 changes: 5 additions & 0 deletions src/khoj/utils/rawconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ class LocationData(BaseModel):
country: Optional[str]


class FilterRequest(BaseModel):
filename: str
conversation_id: str


class TextConfigBase(ConfigBase):
compressed_jsonl: Path
embeddings_file: Path
Expand Down
Loading