Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Starting the V1 search engine #65

Merged
merged 1 commit into from
Mar 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,22 @@ on:
jobs:
tests:
runs-on: ubuntu-latest
# Service containers to run with `container-job`
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres
# Provide the password for postgres
env:
POSTGRES_PASSWORD: postgres
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:

- uses: actions/checkout@v2.3.1

- name: Set up Haskell
Expand Down Expand Up @@ -39,3 +53,13 @@ jobs:
make build
- name: Test
run: make test
env:
FLORA_DB_DATABASE: flora_test
FLORA_DB_PASSWORD: postgres
FLORA_DB_POOL_CONNECTIONS: 10
FLORA_DB_POST: 5432
FLORA_DB_SUB_POOLS: 10
FLORA_DB_TIMEOUT: 10
FLORA_DB_USER: postgres
FLORA_LOGGING_DESTINATION: stdout
FLORA_HTTP_PORT: 8083
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ clean: ## Remove the cabal build artifacts
assets-deps: ## Install the dependencies of the frontend
@cd assets/ && yarn

assets-build: assets-deps ## Build the web assets
build-assets: assets-deps ## Build the web assets
@cd assets/ && yarn build

assets-watch: ## Continuously rebuild the web assets
watch-assets: ## Continuously rebuild the web assets
@cd assets/ && yarn watch

assets-clean: ## Remove JS artifacts
clean-assets: ## Remove JS artifacts
@cd assets/ && rm -R node_modules

db-create: ## Create the database
Expand All @@ -42,7 +42,7 @@ ghci: repl ## Start a cabal REPL (alias for `make repl`)
watch: soufflé ## Load the main library and reload on file change
@ghcid --target flora-server -l

test : soufflé ## Run the test suite
test: soufflé ## Run the test suite
./scripts/run-tests.sh

watch-test: soufflé ## Load the tests in ghcid and reload them on file change
Expand Down
1 change: 1 addition & 0 deletions assets/tailwind.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module.exports = {
colors: {
"background": {
"dark": "#25282a",
"dark-focused": "#3a3d3f",
"DEFAULT": "#f3f4f6",
},
"brand-purple": {
Expand Down
2 changes: 2 additions & 0 deletions flora.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ library
Flora.Model.User.Query
Flora.Model.User.Update
Flora.Publish
Flora.Search
Flora.ThirdParties.Hackage.API
Flora.ThirdParties.Hackage.Client
FloraWeb.Client
Expand Down Expand Up @@ -141,6 +142,7 @@ library
FloraWeb.Templates.Pages.Categories.Show
FloraWeb.Templates.Pages.Home
FloraWeb.Templates.Pages.Packages
FloraWeb.Templates.Pages.Search
FloraWeb.Templates.Pages.Sessions
FloraWeb.Templates.Types
FloraWeb.Types
Expand Down
3 changes: 3 additions & 0 deletions migrations/20211106003401_create_packages.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
create extension if not exists pg_trgm;
-- A package is comprised of metadata and has many releases.
create table if not exists packages (
namespace text not null,
Expand All @@ -9,3 +10,5 @@ create table if not exists packages (
updated_at timestamptz not null,
primary key(namespace, name)
);

create index package_name_namespace_trgm on packages USING GIN (name gin_trgm_ops);
78 changes: 40 additions & 38 deletions src/Flora/Import/Package.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# OPTIONS_GHC -Wno-unused-imports #-}
module Flora.Import.Package where

import Control.Concurrent
import Control.Monad.Except
import qualified Data.ByteString as B
import Data.Map (Map)
Expand Down Expand Up @@ -72,6 +73,7 @@ coreLibraries = Set.fromList
, PackageName "binary"
, PackageName "bytestring"
, PackageName "containers"
, PackageName "directory"
, PackageName "deepseq"
, PackageName "ghc-bignum"
, PackageName "ghc-boot-th"
Expand Down Expand Up @@ -116,44 +118,44 @@ importCabal :: (MonadIO m)
-> FilePath -- ^ Directory where to find the .cabal files
-> DBT m Package
importCabal userId packageName cabalFile directory = do
genDesc <- liftIO $ loadFile cabalFile
let namespace = if Set.member packageName coreLibraries then Namespace "haskell" else Namespace "hackage"
result <- runExceptT $ do
package <- lift (Query.getHaskellOrHackagePackage packageName)
>>= \case
Nothing -> do
logImportMessage (namespace, packageName) $
"\"" <> display packageName <> "\" could not be found in the database."
cabalToPackage userId (genDesc ^. #packageDescription) namespace packageName
Just package -> pure package
release <- lift $
Query.getReleaseByVersion (package ^. #namespace, package ^. #name)
((genDesc ^. #packageDescription) ^. (#package % #pkgVersion))
>>= \case
Nothing -> do
r <- createRelease (package ^. #namespace) (package ^. #name)
((genDesc ^. #packageDescription) ^. (#package % #pkgVersion))
logImportMessage (namespace, packageName) $ "Creating Release "
<> display (r ^. #releaseId) <> " for package " <> display (package ^. #name)
pure r
Just release -> do
logImportMessage (namespace, packageName) $
"Release found: releaseId: " <> display (release ^. #releaseId) <> " / packageId: "
pure release
componentsAndRequirements <- extractComponents userId directory
(namespace, packageName)
(flattenPackageDescription genDesc)
(release ^. #releaseId)
(package ^. #name)
let components = fmap fst componentsAndRequirements
let requirements = foldMap snd componentsAndRequirements
pure (package, release, components, requirements)
case result of
Left err -> error $ "Encountered error during import: " <> show err
Right (package, release, components, requirements) -> do
let rawCategoryField = T.pack $ Cabal.fromShortText $ genDesc ^. (#packageDescription % #category)
let categoryList = fmap (UserPackageCategory . T.stripStart) (T.splitOn "," rawCategoryField)
Update.publishPackage requirements components release categoryList package
let namespace = if Set.member packageName coreLibraries then Namespace "haskell" else Namespace "hackage"
genDesc <- liftIO $ loadFile cabalFile
result <- runExceptT $ do
package <- lift (Query.getHaskellOrHackagePackage packageName)
>>= \case
Nothing -> do
logImportMessage (namespace, packageName) $
"\"" <> display packageName <> "\" could not be found in the database."
cabalToPackage userId (genDesc ^. #packageDescription) namespace packageName
Just package -> pure package
release <- lift $
Query.getReleaseByVersion (package ^. #namespace, package ^. #name)
((genDesc ^. #packageDescription) ^. (#package % #pkgVersion))
>>= \case
Nothing -> do
r <- createRelease (package ^. #namespace) (package ^. #name)
((genDesc ^. #packageDescription) ^. (#package % #pkgVersion))
logImportMessage (namespace, packageName) $ "Creating Release "
<> display (r ^. #releaseId) <> " for package " <> display (package ^. #name)
pure r
Just release -> do
logImportMessage (namespace, packageName) $
"Release found: releaseId: " <> display (release ^. #releaseId) <> " / packageId: "
pure release
componentsAndRequirements <- extractComponents userId directory
(namespace, packageName)
(flattenPackageDescription genDesc)
(release ^. #releaseId)
(package ^. #name)
let components = fmap fst componentsAndRequirements
let requirements = foldMap snd componentsAndRequirements
pure (package, release, components, requirements)
case result of
Left err -> error $ "Encountered error during import: " <> show err
Right (package, release, components, requirements) -> do
let rawCategoryField = T.pack $ Cabal.fromShortText $ genDesc ^. (#packageDescription % #category)
let categoryList = fmap (UserPackageCategory . T.stripStart) (T.splitOn "," rawCategoryField)
Update.publishPackage requirements components release categoryList package

importPackageDeps :: (MonadIO m) => PackageName -> FilePath -> DBT m (Map PackageName (Set PackageName))
importPackageDeps pName directory = do
Expand Down
15 changes: 7 additions & 8 deletions src/Flora/Model/Category/Query.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,17 @@ getPackagesFromCategorySlug slug = do
Just Category{categoryId} -> do
liftIO $ T.putStrLn "Category found!"
query Select [sql|
select p.namespace,
p.name,
p.synopsis,
p.metadata,
p.owner_id,
p.created_at,
p.updated_at,
select p.namespace
, p.name
, p.synopsis
, p.metadata
, p.owner_id
, p.created_at
, p.updated_at
from packages as p
inner join package_categories as pc on (p.namespace = pc.package_namespace and p.name = pc.package_name)
where pc.category_id = ?
|] (Only categoryId)
-- joinSelectOneByField @Package @PackageCategory [field| package_id |] [field| category_id |] categoryId

getAllCategories :: (MonadIO m) => DBT m (Vector Category)
getAllCategories = query_ Select (_select @Category)
58 changes: 54 additions & 4 deletions src/Flora/Model/Package/Query.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ module Flora.Model.Package.Query where
import Control.Monad.IO.Class (MonadIO)
import Data.Text (Text)
import Data.Vector (Vector)
import Database.PostgreSQL.Entity (_select, _selectWhere, selectById)
import Database.PostgreSQL.Entity (_select, _selectWhere, selectById,
selectManyByField)
import Database.PostgreSQL.Entity.DBT (QueryNature (Select), query, queryOne,
query_)
import Database.PostgreSQL.Entity.Types (Field, field)
Expand All @@ -15,14 +16,17 @@ import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Transact (DBT)
import Distribution.Types.Version (Version)
import Flora.Model.Category (Category, CategoryId)
import Flora.Model.Package (Namespace, Package, PackageName)
import Flora.Model.Package (Namespace (..), Package, PackageName)
import Flora.Model.Package.Component (ComponentId, ComponentType,
PackageComponent)
import Flora.Model.Release (ReleaseId)

getAllPackages :: (MonadIO m) => DBT m (Vector Package)
getAllPackages = query_ Select (_select @Package)

getPackagesByNamespace :: Namespace -> DBT IO (Vector Package)
getPackagesByNamespace namespace = selectManyByField @Package [field| namespace |] (Only namespace)

getPackageByNamespaceAndName :: (MonadIO m) => Namespace -> PackageName -> DBT m (Maybe Package)
getPackageByNamespaceAndName namespace name = queryOne Select
(_selectWhere @Package [[field| namespace |], [field| name |]])
Expand Down Expand Up @@ -111,8 +115,6 @@ getPackageCategories namespace packageName = query Select
and pc.package_name = ?
|] (namespace, packageName)

-- joinSelectOneByField @Category @PackageCategory [field| category_id |] [field| package_namespace |] packageId

getPackagesFromCategoryWithLatestVersion :: MonadIO m
=> CategoryId
-> DBT m (Vector (Namespace, PackageName, Text, Version))
Expand All @@ -126,3 +128,51 @@ getPackagesFromCategoryWithLatestVersion categoryId = query Select q (Only categ
inner join categories as c on c.category_id = pc.category_id
where c.category_id = ?
|]

searchPackage :: Text -> DBT IO (Vector (Namespace, PackageName, Text, Version, Float))
searchPackage searchString = query Select [sql|
SELECT lv."namespace"
, lv."name"
, lv."synopsis"
, lv."version"
, word_similarity(lv.name, ?) as rating
FROM latest_versions as lv
WHERE ? <% lv.name
GROUP BY
lv."namespace"
, lv."name"
, lv."synopsis"
, lv."version"
ORDER BY rating desc, count(lv."namespace") desc, lv.name asc;
|] (searchString, searchString)

listAllPackages :: DBT IO (Vector (Namespace, PackageName, Text, Version, Float))
listAllPackages = query_ Select [sql|
SELECT lv."namespace"
, lv."name"
, lv."synopsis"
, lv."version"
, (1.0::real) as rating
FROM latest_versions as lv
GROUP BY
lv."namespace"
, lv."name"
, lv."synopsis"
, lv."version"
ORDER BY rating desc, count(lv."namespace") desc, lv.name asc;
|]

searchPackageByNamespace :: Namespace -> Text -> DBT IO (Vector (Namespace, PackageName, Text, Float))
searchPackageByNamespace (Namespace namespace) searchString = query Select [sql|
SELECT p."namespace"
, p."name"
, p."synopsis"
, word_similarity(p.name, ?) as rating
FROM packages as p
WHERE ? <% p.name
AND p."namespace" = ?
GROUP BY
p."namespace"
, p."name"
ORDER BY rating desc, count(p."namespace") desc, p.name asc;
|] (searchString, searchString, namespace)
34 changes: 34 additions & 0 deletions src/Flora/Search.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module Flora.Search where

import Data.Text
import Data.Vector (Vector)

import Control.Monad.IO.Class
import Database.PostgreSQL.Entity.DBT (withPool)
import Distribution.Types.Version (Version)
import Flora.Environment (FloraEnv (..))
import Flora.Model.Package (Namespace (..), PackageName)
import qualified Flora.Model.Package.Query as Query
import FloraWeb.Server.Auth (FloraPageM)
import FloraWeb.Session (Session (..), getSession)
import FloraWeb.Types (fetchFloraEnv)
import Optics.Core

-- searchPackageByNamespace :: Namespace -> Text -> FloraPageM (Vector Package)
-- searchPackageByNamespace (Namespace namespace) query = undefined

searchPackageByName :: Text -> FloraPageM (Vector (Namespace, PackageName, Text, Version))
searchPackageByName queryString = do
session <- getSession
FloraEnv{pool} <- liftIO $ fetchFloraEnv (session ^. #webEnvStore)
results <- liftIO $ withPool pool $ Query.searchPackage queryString
let getInfo = (,,,) <$> view _1 <*> view _2 <*> view _3 <*> view _4
pure $ fmap getInfo results

listAllPackages :: FloraPageM (Vector (Namespace, PackageName, Text, Version))
listAllPackages = do
session <- getSession
FloraEnv{pool} <- liftIO $ fetchFloraEnv (session ^. #webEnvStore)
results <- liftIO $ withPool pool Query.listAllPackages
let getInfo = (,,,) <$> view _1 <*> view _2 <*> view _3 <*> view _4
pure $ fmap getInfo results
2 changes: 2 additions & 0 deletions src/FloraWeb/Routes/Pages.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module FloraWeb.Routes.Pages where

import Data.Text
import qualified FloraWeb.Routes.Pages.Admin as Admin
import qualified FloraWeb.Routes.Pages.Categories as Categories
import qualified FloraWeb.Routes.Pages.Packages as Packages
Expand All @@ -18,5 +19,6 @@ data Routes' mode = Routes'
, sessions :: mode :- "sessions" :> Sessions.Routes
, packages :: mode :- "packages" :> Packages.Routes
, categories :: mode :- "categories" :> Categories.Routes
, search :: mode :- "search" :> QueryParam "q" Text :> Get '[HTML] (Html ())
}
deriving stock (Generic)
13 changes: 10 additions & 3 deletions src/FloraWeb/Routes/Pages/Packages.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module FloraWeb.Routes.Pages.Packages
( Routes
, Routes'(..)
, GetRedirect
) where

import Data.Text
Expand All @@ -11,10 +12,16 @@ import Servant.HTML.Lucid

type Routes = NamedRoutes Routes'

type GetRedirect
= Verb 'GET 301 '[HTML] (Headers '[Header "Location" Text] NoContent)

data Routes' mode = Routes'
{ show :: mode :- Capture "namespace" Text :> Capture "package" Text
:> Get '[HTML] (Html ())
, showVersion :: mode :- Capture "namespace" Text :> Capture "package" Text
{ index :: mode :- GetRedirect
, show :: mode :- Capture "organisation" Text
:> Capture "package" Text
:> Get '[HTML] (Html ())
, showVersion :: mode :- Capture "organisation" Text
:> Capture "package" Text
:> Capture "version" Text :> Get '[HTML] (Html ())
}
deriving stock (Generic)
Expand Down
Loading