Skip to content

Commit

Permalink
Starting the V1 search engine (#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
tchoutri authored Mar 30, 2022
1 parent 928a7bf commit d28d327
Show file tree
Hide file tree
Showing 21 changed files with 292 additions and 81 deletions.
26 changes: 25 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,22 @@ on:
jobs:
tests:
runs-on: ubuntu-latest
# Service containers to run with `container-job`
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres
# Provide the password for postgres
env:
POSTGRES_PASSWORD: postgres
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:

- uses: actions/checkout@v2.3.1

- name: Set up Haskell
Expand Down Expand Up @@ -39,3 +53,13 @@ jobs:
make build
- name: Test
run: make test
env:
FLORA_DB_DATABASE: flora_test
FLORA_DB_PASSWORD: postgres
FLORA_DB_POOL_CONNECTIONS: 10
FLORA_DB_POST: 5432
FLORA_DB_SUB_POOLS: 10
FLORA_DB_TIMEOUT: 10
FLORA_DB_USER: postgres
FLORA_LOGGING_DESTINATION: stdout
FLORA_HTTP_PORT: 8083
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ clean: ## Remove the cabal build artifacts
assets-deps: ## Install the dependencies of the frontend
@cd assets/ && yarn

assets-build: assets-deps ## Build the web assets
build-assets: assets-deps ## Build the web assets
@cd assets/ && yarn build

assets-watch: ## Continuously rebuild the web assets
watch-assets: ## Continuously rebuild the web assets
@cd assets/ && yarn watch

assets-clean: ## Remove JS artifacts
clean-assets: ## Remove JS artifacts
@cd assets/ && rm -R node_modules

db-create: ## Create the database
Expand All @@ -42,7 +42,7 @@ ghci: repl ## Start a cabal REPL (alias for `make repl`)
watch: soufflé ## Load the main library and reload on file change
@ghcid --target flora-server -l

test : soufflé ## Run the test suite
test: soufflé ## Run the test suite
./scripts/run-tests.sh

watch-test: soufflé ## Load the tests in ghcid and reload them on file change
Expand Down
1 change: 1 addition & 0 deletions assets/tailwind.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module.exports = {
colors: {
"background": {
"dark": "#25282a",
"dark-focused": "#3a3d3f",
"DEFAULT": "#f3f4f6",
},
"brand-purple": {
Expand Down
2 changes: 2 additions & 0 deletions flora.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ library
Flora.Model.User.Query
Flora.Model.User.Update
Flora.Publish
Flora.Search
Flora.ThirdParties.Hackage.API
Flora.ThirdParties.Hackage.Client
FloraWeb.Client
Expand Down Expand Up @@ -141,6 +142,7 @@ library
FloraWeb.Templates.Pages.Categories.Show
FloraWeb.Templates.Pages.Home
FloraWeb.Templates.Pages.Packages
FloraWeb.Templates.Pages.Search
FloraWeb.Templates.Pages.Sessions
FloraWeb.Templates.Types
FloraWeb.Types
Expand Down
3 changes: 3 additions & 0 deletions migrations/20211106003401_create_packages.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
create extension if not exists pg_trgm;
-- A package is comprised of metadata and has many releases.
create table if not exists packages (
namespace text not null,
Expand All @@ -9,3 +10,5 @@ create table if not exists packages (
updated_at timestamptz not null,
primary key(namespace, name)
);

create index package_name_namespace_trgm on packages USING GIN (name gin_trgm_ops);
78 changes: 40 additions & 38 deletions src/Flora/Import/Package.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# OPTIONS_GHC -Wno-unused-imports #-}
module Flora.Import.Package where

import Control.Concurrent
import Control.Monad.Except
import qualified Data.ByteString as B
import Data.Map (Map)
Expand Down Expand Up @@ -72,6 +73,7 @@ coreLibraries = Set.fromList
, PackageName "binary"
, PackageName "bytestring"
, PackageName "containers"
, PackageName "directory"
, PackageName "deepseq"
, PackageName "ghc-bignum"
, PackageName "ghc-boot-th"
Expand Down Expand Up @@ -116,44 +118,44 @@ importCabal :: (MonadIO m)
-> FilePath -- ^ Directory where to find the .cabal files
-> DBT m Package
importCabal userId packageName cabalFile directory = do
genDesc <- liftIO $ loadFile cabalFile
let namespace = if Set.member packageName coreLibraries then Namespace "haskell" else Namespace "hackage"
result <- runExceptT $ do
package <- lift (Query.getHaskellOrHackagePackage packageName)
>>= \case
Nothing -> do
logImportMessage (namespace, packageName) $
"\"" <> display packageName <> "\" could not be found in the database."
cabalToPackage userId (genDesc ^. #packageDescription) namespace packageName
Just package -> pure package
release <- lift $
Query.getReleaseByVersion (package ^. #namespace, package ^. #name)
((genDesc ^. #packageDescription) ^. (#package % #pkgVersion))
>>= \case
Nothing -> do
r <- createRelease (package ^. #namespace) (package ^. #name)
((genDesc ^. #packageDescription) ^. (#package % #pkgVersion))
logImportMessage (namespace, packageName) $ "Creating Release "
<> display (r ^. #releaseId) <> " for package " <> display (package ^. #name)
pure r
Just release -> do
logImportMessage (namespace, packageName) $
"Release found: releaseId: " <> display (release ^. #releaseId) <> " / packageId: "
pure release
componentsAndRequirements <- extractComponents userId directory
(namespace, packageName)
(flattenPackageDescription genDesc)
(release ^. #releaseId)
(package ^. #name)
let components = fmap fst componentsAndRequirements
let requirements = foldMap snd componentsAndRequirements
pure (package, release, components, requirements)
case result of
Left err -> error $ "Encountered error during import: " <> show err
Right (package, release, components, requirements) -> do
let rawCategoryField = T.pack $ Cabal.fromShortText $ genDesc ^. (#packageDescription % #category)
let categoryList = fmap (UserPackageCategory . T.stripStart) (T.splitOn "," rawCategoryField)
Update.publishPackage requirements components release categoryList package
let namespace = if Set.member packageName coreLibraries then Namespace "haskell" else Namespace "hackage"
genDesc <- liftIO $ loadFile cabalFile
result <- runExceptT $ do
package <- lift (Query.getHaskellOrHackagePackage packageName)
>>= \case
Nothing -> do
logImportMessage (namespace, packageName) $
"\"" <> display packageName <> "\" could not be found in the database."
cabalToPackage userId (genDesc ^. #packageDescription) namespace packageName
Just package -> pure package
release <- lift $
Query.getReleaseByVersion (package ^. #namespace, package ^. #name)
((genDesc ^. #packageDescription) ^. (#package % #pkgVersion))
>>= \case
Nothing -> do
r <- createRelease (package ^. #namespace) (package ^. #name)
((genDesc ^. #packageDescription) ^. (#package % #pkgVersion))
logImportMessage (namespace, packageName) $ "Creating Release "
<> display (r ^. #releaseId) <> " for package " <> display (package ^. #name)
pure r
Just release -> do
logImportMessage (namespace, packageName) $
"Release found: releaseId: " <> display (release ^. #releaseId) <> " / packageId: "
pure release
componentsAndRequirements <- extractComponents userId directory
(namespace, packageName)
(flattenPackageDescription genDesc)
(release ^. #releaseId)
(package ^. #name)
let components = fmap fst componentsAndRequirements
let requirements = foldMap snd componentsAndRequirements
pure (package, release, components, requirements)
case result of
Left err -> error $ "Encountered error during import: " <> show err
Right (package, release, components, requirements) -> do
let rawCategoryField = T.pack $ Cabal.fromShortText $ genDesc ^. (#packageDescription % #category)
let categoryList = fmap (UserPackageCategory . T.stripStart) (T.splitOn "," rawCategoryField)
Update.publishPackage requirements components release categoryList package

importPackageDeps :: (MonadIO m) => PackageName -> FilePath -> DBT m (Map PackageName (Set PackageName))
importPackageDeps pName directory = do
Expand Down
15 changes: 7 additions & 8 deletions src/Flora/Model/Category/Query.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,17 @@ getPackagesFromCategorySlug slug = do
Just Category{categoryId} -> do
liftIO $ T.putStrLn "Category found!"
query Select [sql|
select p.namespace,
p.name,
p.synopsis,
p.metadata,
p.owner_id,
p.created_at,
p.updated_at,
select p.namespace
, p.name
, p.synopsis
, p.metadata
, p.owner_id
, p.created_at
, p.updated_at
from packages as p
inner join package_categories as pc on (p.namespace = pc.package_namespace and p.name = pc.package_name)
where pc.category_id = ?
|] (Only categoryId)
-- joinSelectOneByField @Package @PackageCategory [field| package_id |] [field| category_id |] categoryId

getAllCategories :: (MonadIO m) => DBT m (Vector Category)
getAllCategories = query_ Select (_select @Category)
58 changes: 54 additions & 4 deletions src/Flora/Model/Package/Query.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ module Flora.Model.Package.Query where
import Control.Monad.IO.Class (MonadIO)
import Data.Text (Text)
import Data.Vector (Vector)
import Database.PostgreSQL.Entity (_select, _selectWhere, selectById)
import Database.PostgreSQL.Entity (_select, _selectWhere, selectById,
selectManyByField)
import Database.PostgreSQL.Entity.DBT (QueryNature (Select), query, queryOne,
query_)
import Database.PostgreSQL.Entity.Types (Field, field)
Expand All @@ -15,14 +16,17 @@ import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Transact (DBT)
import Distribution.Types.Version (Version)
import Flora.Model.Category (Category, CategoryId)
import Flora.Model.Package (Namespace, Package, PackageName)
import Flora.Model.Package (Namespace (..), Package, PackageName)
import Flora.Model.Package.Component (ComponentId, ComponentType,
PackageComponent)
import Flora.Model.Release (ReleaseId)

getAllPackages :: (MonadIO m) => DBT m (Vector Package)
getAllPackages = query_ Select (_select @Package)

getPackagesByNamespace :: Namespace -> DBT IO (Vector Package)
getPackagesByNamespace namespace = selectManyByField @Package [field| namespace |] (Only namespace)

getPackageByNamespaceAndName :: (MonadIO m) => Namespace -> PackageName -> DBT m (Maybe Package)
getPackageByNamespaceAndName namespace name = queryOne Select
(_selectWhere @Package [[field| namespace |], [field| name |]])
Expand Down Expand Up @@ -111,8 +115,6 @@ getPackageCategories namespace packageName = query Select
and pc.package_name = ?
|] (namespace, packageName)

-- joinSelectOneByField @Category @PackageCategory [field| category_id |] [field| package_namespace |] packageId

getPackagesFromCategoryWithLatestVersion :: MonadIO m
=> CategoryId
-> DBT m (Vector (Namespace, PackageName, Text, Version))
Expand All @@ -126,3 +128,51 @@ getPackagesFromCategoryWithLatestVersion categoryId = query Select q (Only categ
inner join categories as c on c.category_id = pc.category_id
where c.category_id = ?
|]

searchPackage :: Text -> DBT IO (Vector (Namespace, PackageName, Text, Version, Float))
searchPackage searchString = query Select [sql|
SELECT lv."namespace"
, lv."name"
, lv."synopsis"
, lv."version"
, word_similarity(lv.name, ?) as rating
FROM latest_versions as lv
WHERE ? <% lv.name
GROUP BY
lv."namespace"
, lv."name"
, lv."synopsis"
, lv."version"
ORDER BY rating desc, count(lv."namespace") desc, lv.name asc;
|] (searchString, searchString)

listAllPackages :: DBT IO (Vector (Namespace, PackageName, Text, Version, Float))
listAllPackages = query_ Select [sql|
SELECT lv."namespace"
, lv."name"
, lv."synopsis"
, lv."version"
, (1.0::real) as rating
FROM latest_versions as lv
GROUP BY
lv."namespace"
, lv."name"
, lv."synopsis"
, lv."version"
ORDER BY rating desc, count(lv."namespace") desc, lv.name asc;
|]

searchPackageByNamespace :: Namespace -> Text -> DBT IO (Vector (Namespace, PackageName, Text, Float))
searchPackageByNamespace (Namespace namespace) searchString = query Select [sql|
SELECT p."namespace"
, p."name"
, p."synopsis"
, word_similarity(p.name, ?) as rating
FROM packages as p
WHERE ? <% p.name
AND p."namespace" = ?
GROUP BY
p."namespace"
, p."name"
ORDER BY rating desc, count(p."namespace") desc, p.name asc;
|] (searchString, searchString, namespace)
34 changes: 34 additions & 0 deletions src/Flora/Search.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module Flora.Search where

import Data.Text
import Data.Vector (Vector)

import Control.Monad.IO.Class
import Database.PostgreSQL.Entity.DBT (withPool)
import Distribution.Types.Version (Version)
import Flora.Environment (FloraEnv (..))
import Flora.Model.Package (Namespace (..), PackageName)
import qualified Flora.Model.Package.Query as Query
import FloraWeb.Server.Auth (FloraPageM)
import FloraWeb.Session (Session (..), getSession)
import FloraWeb.Types (fetchFloraEnv)
import Optics.Core

-- searchPackageByNamespace :: Namespace -> Text -> FloraPageM (Vector Package)
-- searchPackageByNamespace (Namespace namespace) query = undefined

searchPackageByName :: Text -> FloraPageM (Vector (Namespace, PackageName, Text, Version))
searchPackageByName queryString = do
session <- getSession
FloraEnv{pool} <- liftIO $ fetchFloraEnv (session ^. #webEnvStore)
results <- liftIO $ withPool pool $ Query.searchPackage queryString
let getInfo = (,,,) <$> view _1 <*> view _2 <*> view _3 <*> view _4
pure $ fmap getInfo results

listAllPackages :: FloraPageM (Vector (Namespace, PackageName, Text, Version))
listAllPackages = do
session <- getSession
FloraEnv{pool} <- liftIO $ fetchFloraEnv (session ^. #webEnvStore)
results <- liftIO $ withPool pool Query.listAllPackages
let getInfo = (,,,) <$> view _1 <*> view _2 <*> view _3 <*> view _4
pure $ fmap getInfo results
2 changes: 2 additions & 0 deletions src/FloraWeb/Routes/Pages.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module FloraWeb.Routes.Pages where

import Data.Text
import qualified FloraWeb.Routes.Pages.Admin as Admin
import qualified FloraWeb.Routes.Pages.Categories as Categories
import qualified FloraWeb.Routes.Pages.Packages as Packages
Expand All @@ -18,5 +19,6 @@ data Routes' mode = Routes'
, sessions :: mode :- "sessions" :> Sessions.Routes
, packages :: mode :- "packages" :> Packages.Routes
, categories :: mode :- "categories" :> Categories.Routes
, search :: mode :- "search" :> QueryParam "q" Text :> Get '[HTML] (Html ())
}
deriving stock (Generic)
13 changes: 10 additions & 3 deletions src/FloraWeb/Routes/Pages/Packages.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module FloraWeb.Routes.Pages.Packages
( Routes
, Routes'(..)
, GetRedirect
) where

import Data.Text
Expand All @@ -11,10 +12,16 @@ import Servant.HTML.Lucid

type Routes = NamedRoutes Routes'

type GetRedirect
= Verb 'GET 301 '[HTML] (Headers '[Header "Location" Text] NoContent)

data Routes' mode = Routes'
{ show :: mode :- Capture "namespace" Text :> Capture "package" Text
:> Get '[HTML] (Html ())
, showVersion :: mode :- Capture "namespace" Text :> Capture "package" Text
{ index :: mode :- GetRedirect
, show :: mode :- Capture "organisation" Text
:> Capture "package" Text
:> Get '[HTML] (Html ())
, showVersion :: mode :- Capture "organisation" Text
:> Capture "package" Text
:> Capture "version" Text :> Get '[HTML] (Html ())
}
deriving stock (Generic)
Expand Down
Loading

0 comments on commit d28d327

Please sign in to comment.