Skip to content

Commit

Permalink
feat(crawler): add multiple urls support (#1112)
Browse files Browse the repository at this point in the history
* feat(Field): add icon support

* feat(Crawler): replace submit button with send icon

* feat(crawler): add multiple urls support

* feat: add <FeedItems/> component to display adding items

* feat(FeedItems): add remove icon

* feat: add url displayer

* feat: add invalid url message

* fix: add crawler to upload page

* feat: clean sueCrawler

* feat: rename Feed to KnowledgeToFeed

* feat: add tracking
  • Loading branch information
mamadoudicko authored Sep 5, 2023
1 parent 204d4fd commit efac5f4
Show file tree
Hide file tree
Showing 31 changed files with 289 additions and 46 deletions.
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import { ChatInput, Feed } from "./components";
import { ChatInput, KnowledgeToFeed } from "./components";
import { useActionBar } from "./hooks/useActionBar";

export const ActionsBar = (): JSX.Element => {
const { isUploading, setIsUploading } = useActionBar();

return (
<div className={isUploading ? "h-full" : ""}>
<div className={isUploading ? "h-full flex flex-col flex-auto" : ""}>
{isUploading && (
<div className="shadow-md dark:shadow-primary/25 hover:shadow-xl transition-shadow rounded-xl bg-white dark:bg-black border border-black/10 dark:border-white/25 p-6">
<Feed onClose={() => setIsUploading(false)} />
<div className="flex flex-1 overflow-y-scroll shadow-md dark:shadow-primary/25 hover:shadow-xl transition-shadow rounded-xl bg-white dark:bg-black border border-black/10 dark:border-white/25 p-6">
<KnowledgeToFeed onClose={() => setIsUploading(false)} />
</div>
)}
<div className="flex mt-1 flex-col w-full shadow-md dark:shadow-primary/25 hover:shadow-xl transition-shadow rounded-xl bg-white dark:bg-black border border-black/10 dark:border-white/25 p-6">
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,20 @@ import { MdClose } from "react-icons/md";
import Button from "@/lib/components/ui/Button";
import { Divider } from "@/lib/components/ui/Divider";

import { FeedItems } from "./components";
import { Crawler } from "./components/Crawler";
import { FileUploader } from "./components/FileUploader";
import { useKnowledgeToFeed } from "./hooks/useKnowledgeToFeed";

type FeedProps = {
onClose: () => void;
};
export const Feed = ({ onClose }: FeedProps): JSX.Element => {
export const KnowledgeToFeed = ({ onClose }: FeedProps): JSX.Element => {
const { t } = useTranslation(["translation"]);
const { addContent, contents, removeContent } = useKnowledgeToFeed();

return (
<div className="flex flex-col w-full relative">
<div className="flex flex-col w-full table relative pb-5">
<div className="absolute right-2 top-1">
<Button variant={"tertiary"} onClick={onClose}>
<span>
Expand All @@ -24,7 +27,8 @@ export const Feed = ({ onClose }: FeedProps): JSX.Element => {
</div>
<FileUploader />
<Divider text={t("or")} className="m-5" />
<Crawler />
<Crawler addContent={addContent} />
<FeedItems contents={contents} removeContent={removeContent} />
</div>
);
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
export const isValidUrl = (urlString: string): boolean => {
const urlPattern = new RegExp(
"^(https?:\\/\\/)?" + // validate protocol
"((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|" + // validate domain name
"((\\d{1,3}\\.){3}\\d{1,3}))" + // validate OR ip (v4) address
"(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*" + // validate port and path
"(\\?[;&a-z\\d%_.~+=-]*)?" + // validate query string
"(\\#[-a-z\\d_]*)?$",
"i"
); // validate fragment locator

return !!urlPattern.test(urlString);
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"use client";
import { useRef, useState } from "react";
import { useTranslation } from "react-i18next";

import { useSupabase } from "@/lib/context/SupabaseProvider";
import { useToast } from "@/lib/hooks";
import { redirectToLogin } from "@/lib/router/redirectToLogin";
import { useEventTracking } from "@/services/analytics/useEventTracking";

import { FeedItemType } from "../../../types";
import { isValidUrl } from "../helpers/isValidUrl";

type UseCrawlerProps = {
addContent: (content: FeedItemType) => void;
};

// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
export const useCrawler = ({ addContent }: UseCrawlerProps) => {
const urlInputRef = useRef<HTMLInputElement | null>(null);
const { session } = useSupabase();
const { publish } = useToast();
const { t } = useTranslation(["translation", "upload"]);
const [urlToCrawl, setUrlToCrawl] = useState<string>("");
const { track } = useEventTracking();

if (session === null) {
redirectToLogin();
}

const handleSubmit = () => {
if (urlToCrawl === "") {
return;
}
if (!isValidUrl(urlToCrawl)) {
void track("URL_INVALID");
publish({
variant: "danger",
text: t("invalidUrl"),
});

return;
}
void track("URL_CRAWLED");
addContent({
source: "crawl",
url: urlToCrawl,
});
setUrlToCrawl("");
};

return {
urlInputRef,
urlToCrawl,
setUrlToCrawl,
handleSubmit,
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"use client";
import { useTranslation } from "react-i18next";
import { MdSend } from "react-icons/md";

import Button from "@/lib/components/ui/Button";
import Field from "@/lib/components/ui/Field";

import { useCrawler } from "./hooks/useCrawler";
import { FeedItemType } from "../../types";

type CrawlerProps = {
addContent: (content: FeedItemType) => void;
};

export const Crawler = ({ addContent }: CrawlerProps): JSX.Element => {
const { urlInputRef, urlToCrawl, handleSubmit, setUrlToCrawl } = useCrawler({
addContent,
});
const { t } = useTranslation(["translation", "upload"]);

return (
<div className="w-full flex justify-center items-center">
<div className="max-w-xl w-full">
<form
onSubmit={(e) => {
e.preventDefault();
handleSubmit();
}}
className="w-full"
>
<Field
name="crawlurl"
ref={urlInputRef}
type="text"
placeholder={t("webSite", { ns: "upload" })}
className="w-full"
value={urlToCrawl}
onChange={(e) => setUrlToCrawl(e.target.value)}
icon={
<Button variant={"tertiary"}>
<MdSend />
</Button>
}
/>
</form>
</div>
</div>
);
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { Fragment } from "react";
import { IoMdCloseCircle } from "react-icons/io";
import { MdLink } from "react-icons/md";

import { UrlDisplay } from "./components";
import { FeedItemType } from "../../types";

type FeedItemsProps = {
contents: FeedItemType[];
removeContent: (index: number) => void;
};

export const FeedItems = ({
contents,
removeContent,
}: FeedItemsProps): JSX.Element => {
if (contents.length === 0) {
return <Fragment />;
}

return (
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-2 gap-4 mt-5 shadow-md shadow-md dark:shadow-primary/25 hover:shadow-xl transition-shadow rounded-xl bg-white dark:bg-black border border-black/10 dark:border-white/25 p-6">
{contents.map((item, index) => (
<div
key={item.url}
className="relative bg-gray-100 p-4 rounded-lg shadow-sm"
>
<IoMdCloseCircle
className="absolute top-2 right-2 cursor-pointer text-gray-400 text-2xl"
onClick={() => removeContent(index)}
/>
<div className="flex items-center">
<MdLink className="mr-2 text-2xl" />
<UrlDisplay url={item.url} />
</div>
</div>
))}
</div>
);
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { useState } from "react";

import { enhanceUrlDisplay } from "./utils/enhanceUrlDisplay";

type UrlDisplayProps = {
url: string;
};

export const UrlDisplay = ({ url }: UrlDisplayProps): JSX.Element => {
const [showFullUrl, setShowFullUrl] = useState(false);

const toggleShowFullUrl = () => {
setShowFullUrl(!showFullUrl);
};

return (
<div>
<span className="cursor-pointer" onClick={toggleShowFullUrl}>
{showFullUrl ? url : enhanceUrlDisplay(url)}
</span>
</div>
);
};
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./UrlDisplay";
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
export const enhanceUrlDisplay = (url: string): string => {
const parts = url.split("/");

// Check if the URL has at least 3 parts (protocol, domain, and one more segment)
if (parts.length >= 3) {
const domain = parts[2];
const path = parts.slice(3).join("/");

// Split the domain by "." to check for subdomains and remove "www"
const domainParts = domain.split(".");
if (domainParts[0] === "www") {
domainParts.shift(); // Remove "www"
}

// Combine the beginning (subdomain/domain) and the end (trimmed path)
const beginning = domainParts.join(".");
const trimmedPath = path.slice(0, 5) + "..." + path.slice(-5); // Display the beginning and end of the path

return `${beginning}/${trimmedPath}`;
}

// If the URL doesn't have enough parts, return it as is
return url;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./UrlDisplay";
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./FeedItems";
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export * from "./Crawler";
export * from "./FeedItems";
export * from "./FileUploader";
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { useState } from "react";

import { FeedItemType } from "../types";

// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
export const useKnowledgeToFeed = () => {
const [contents, setContents] = useState<FeedItemType[]>([]);

const addContent = (content: FeedItemType) => {
setContents((prevContents) => [...prevContents, content]);
};
const removeContent = (index: number) => {
setContents((prevContents) => prevContents.filter((_, i) => i !== index));
};

return {
addContent,
contents,
setContents,
removeContent,
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./KnowledgeToFeed";
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export type FeedItemSource = "crawl" | "upload";

export type FeedItemType = {
source: FeedItemSource;
url: string;
};
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
export * from "./ChatInput";
export * from "./Feed";
export * from "./KnowledgeToFeed";
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@ import { UUID } from "crypto";
import { useCallback, useRef, useState } from "react";
import { useTranslation } from "react-i18next";

import { isValidUrl } from "@/app/chat/[chatId]/components/ActionsBar/components/KnowledgeToFeed/components/Crawler/helpers/isValidUrl";
import { useCrawlApi } from "@/lib/api/crawl/useCrawlApi";
import { useSupabase } from "@/lib/context/SupabaseProvider";
import { useToast } from "@/lib/hooks";
import { redirectToLogin } from "@/lib/router/redirectToLogin";
import { useEventTracking } from "@/services/analytics/useEventTracking";

import { isValidUrl } from "../helpers/isValidUrl";

// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
export const useCrawler = () => {
const [isCrawling, setCrawling] = useState(false);
Expand Down
File renamed without changes.
6 changes: 2 additions & 4 deletions frontend/app/upload/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@ import { useBrainContext } from "@/lib/context/BrainProvider/hooks/useBrainConte
import { useSupabase } from "@/lib/context/SupabaseProvider";
import { redirectToLogin } from "@/lib/router/redirectToLogin";

import { Crawler } from "./Crawler";
import { requiredRolesForUpload } from "./config";
import {
Crawler,
FileUploader,
} from "../chat/[chatId]/components/ActionsBar/components/Feed/components";
import { FileUploader } from "../chat/[chatId]/components/ActionsBar/components/KnowledgeToFeed/components";

const UploadPage = (): JSX.Element => {
const { currentBrain } = useBrainContext();
Expand Down
Loading

0 comments on commit efac5f4

Please sign in to comment.