Skip to content

Commit

Permalink
Disambiguate input_filter variable names in fs_syncer functions
Browse files Browse the repository at this point in the history
  • Loading branch information
debanjum committed Oct 18, 2023
1 parent e3cd8b4 commit cf1cdc3
Showing 1 changed file with 17 additions and 17 deletions.
34 changes: 17 additions & 17 deletions src/khoj/utils/fs_syncer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,25 @@ def extract_html_content(html_content: str):
return soup.get_text(strip=True, separator="\n")

# Extract required fields from config
input_files, input_filter = (
input_files, input_filters = (
config.input_files,
config.input_filter,
)

# Input Validation
if is_none_or_empty(input_files) and is_none_or_empty(input_filter):
if is_none_or_empty(input_files) and is_none_or_empty(input_filters):
logger.debug("At least one of input-files or input-file-filter is required to be specified")
return {}

"Get all files to process"
absolute_plaintext_files, filtered_plaintext_files = set(), set()
if input_files:
absolute_plaintext_files = {get_absolute_path(jsonl_file) for jsonl_file in input_files}
if input_filter:
if input_filters:
filtered_plaintext_files = {
filtered_file
for jsonl_file_filter in input_filter
for filtered_file in glob.glob(get_absolute_path(jsonl_file_filter), recursive=True)
for plaintext_file_filter in input_filters
for filtered_file in glob.glob(get_absolute_path(plaintext_file_filter), recursive=True)
if os.path.isfile(filtered_file)
}

Expand Down Expand Up @@ -85,24 +85,24 @@ def extract_html_content(html_content: str):

def get_org_files(config: TextContentConfig):
# Extract required fields from config
org_files, org_file_filter = (
org_files, org_file_filters = (
config.input_files,
config.input_filter,
)

# Input Validation
if is_none_or_empty(org_files) and is_none_or_empty(org_file_filter):
if is_none_or_empty(org_files) and is_none_or_empty(org_file_filters):
logger.debug("At least one of org-files or org-file-filter is required to be specified")
return {}

"Get Org files to process"
absolute_org_files, filtered_org_files = set(), set()
if org_files:
absolute_org_files = {get_absolute_path(org_file) for org_file in org_files}
if org_file_filter:
if org_file_filters:
filtered_org_files = {
filtered_file
for org_file_filter in org_file_filter
for org_file_filter in org_file_filters
for filtered_file in glob.glob(get_absolute_path(org_file_filter), recursive=True)
if os.path.isfile(filtered_file)
}
Expand All @@ -129,13 +129,13 @@ def get_org_files(config: TextContentConfig):

def get_markdown_files(config: TextContentConfig):
# Extract required fields from config
markdown_files, markdown_file_filter = (
markdown_files, markdown_file_filters = (
config.input_files,
config.input_filter,
)

# Input Validation
if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filter):
if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filters):
logger.debug("At least one of markdown-files or markdown-file-filter is required to be specified")
return {}

Expand All @@ -144,10 +144,10 @@ def get_markdown_files(config: TextContentConfig):
if markdown_files:
absolute_markdown_files = {get_absolute_path(markdown_file) for markdown_file in markdown_files}

if markdown_file_filter:
if markdown_file_filters:
filtered_markdown_files = {
filtered_file
for markdown_file_filter in markdown_file_filter
for markdown_file_filter in markdown_file_filters
for filtered_file in glob.glob(get_absolute_path(markdown_file_filter), recursive=True)
if os.path.isfile(filtered_file)
}
Expand Down Expand Up @@ -179,24 +179,24 @@ def get_markdown_files(config: TextContentConfig):

def get_pdf_files(config: TextContentConfig):
# Extract required fields from config
pdf_files, pdf_file_filter = (
pdf_files, pdf_file_filters = (
config.input_files,
config.input_filter,
)

# Input Validation
if is_none_or_empty(pdf_files) and is_none_or_empty(pdf_file_filter):
if is_none_or_empty(pdf_files) and is_none_or_empty(pdf_file_filters):
logger.debug("At least one of pdf-files or pdf-file-filter is required to be specified")
return {}

"Get PDF files to process"
absolute_pdf_files, filtered_pdf_files = set(), set()
if pdf_files:
absolute_pdf_files = {get_absolute_path(pdf_file) for pdf_file in pdf_files}
if pdf_file_filter:
if pdf_file_filters:
filtered_pdf_files = {
filtered_file
for pdf_file_filter in pdf_file_filter
for pdf_file_filter in pdf_file_filters
for filtered_file in glob.glob(get_absolute_path(pdf_file_filter), recursive=True)
if os.path.isfile(filtered_file)
}
Expand Down

0 comments on commit cf1cdc3

Please sign in to comment.