Skip to content

Commit

Permalink
Simplify the way the schema ID is supplied to a query
Browse files Browse the repository at this point in the history
Summary:
We had some legacy cruft here, and things were more confusing than
necessary.

The new story is:
* The schema ID for a query is always specified by the `schema_id`
field of `UserQuery` (or `UserQueryFacts`). The field is populated
by the client.
* In the local backend, `envSchemaId` stores the schema ID for the
session, and this is attached to each query in `instance Backend
Env`.
(previously it was attached in `Glean.Query.Thrift`, which
missed some queries, but it was also being picked up inside
`userQuery` itself which was pretty confusing).
* The `--schema-id` flag is now ignored. I don't think there's a good
use for this, we should always be passing the schema ID with the query.
* We now log a warning if the user-supplied schema ID is unknown. This
might help debug some issues like D62471296

Reviewed By: josefs

Differential Revision: D62737521

fbshipit-source-id: e7bddaf3c5e9b11dd9aa8d617cb0827e74e002da
  • Loading branch information
Simon Marlow authored and facebook-github-bot committed Sep 17, 2024
1 parent a951be0 commit 96d30aa
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 62 deletions.
23 changes: 18 additions & 5 deletions glean/client/hs/Glean/Remote.hs
Original file line number Diff line number Diff line change
Expand Up @@ -203,24 +203,37 @@ instance Backend ThriftBackend where
<|> Just (thriftBackendClientInfo t)
getDatabase t repo = withShard t repo $ GleanService.getDatabase repo
userQueryFacts t repo q = withShard t repo $
GleanService.userQueryFacts repo q
{ Thrift.userQueryFacts_client_info = client }
GleanService.userQueryFacts repo q {
Thrift.userQueryFacts_client_info = client,
Thrift.userQueryFacts_schema_id = schema_id
}
where
client = Thrift.userQueryFacts_client_info q
<|> Just (thriftBackendClientInfo t)
schema_id = Thrift.userQueryFacts_schema_id q
<|> schemaId t

userQuery t repo q = withShard t repo $
GleanService.userQuery repo q { Thrift.userQuery_client_info = client }
GleanService.userQuery repo q {
Thrift.userQuery_client_info = client,
Thrift.userQuery_schema_id = schema_id
}
where
client = Thrift.userQuery_client_info q
<|> Just (thriftBackendClientInfo t)
schema_id = Thrift.userQuery_schema_id q
<|> schemaId t

userQueryBatch t repo q = withShard t repo $
GleanService.userQueryBatch repo q
{ Thrift.userQueryBatch_client_info = client }
GleanService.userQueryBatch repo q {
Thrift.userQueryBatch_client_info = client,
Thrift.userQueryBatch_schema_id = schema_id
}
where
client = Thrift.userQueryBatch_client_info q
<|> Just (thriftBackendClientInfo t)
schema_id = Thrift.userQueryBatch_schema_id q
<|> schemaId t

deriveStored t _ repo pred = withShard t repo $
GleanService.deriveStored repo pred
Expand Down
20 changes: 17 additions & 3 deletions glean/db/Glean/Backend/Local.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ module Glean.Backend.Local
, serializeInventory
) where

import Control.Applicative
import Control.Concurrent (getNumCapabilities)
import Control.Concurrent.Stream (stream)
import Control.Exception (catches, Handler (Handler), throwIO)
Expand All @@ -33,6 +34,7 @@ import Data.Default
import Data.IORef.Extra
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Map.Strict as Map
import Data.Maybe
import Data.Typeable
import qualified Haxl.Core as Haxl

Expand Down Expand Up @@ -98,8 +100,20 @@ instance Backend Database.Env where

predicateStats env repo opts = Database.predicateStats env repo opts

userQueryFacts = UserQuery.userQueryFacts
userQuery = UserQuery.userQuery
userQueryFacts env repo q
| isNothing (Thrift.userQueryFacts_schema_id q) =
UserQuery.userQueryFacts env repo q {
Thrift.userQueryFacts_schema_id = schemaId env
}
| otherwise = UserQuery.userQueryFacts env repo q

userQuery env repo q
| isNothing (Thrift.userQuery_schema_id q) =
UserQuery.userQuery env repo q {
Thrift.userQuery_schema_id = schemaId env
}
| otherwise = UserQuery.userQuery env repo q

userQueryBatch env repo Thrift.UserQueryBatch{..} = do
resultsRef <- newIORef mempty
numCaps <- getNumCapabilities
Expand All @@ -115,7 +129,7 @@ instance Backend Database.Env where
, userQuery_predicate_version = userQueryBatch_predicate_version
, userQuery_encodings = userQueryBatch_encodings
, userQuery_client_info = userQueryBatch_client_info
, userQuery_schema_id = userQueryBatch_schema_id
, userQuery_schema_id = userQueryBatch_schema_id <|> schemaId env
, userQuery_options = userQueryBatch_options
, userQuery_query = q
}
Expand Down
11 changes: 3 additions & 8 deletions glean/db/Glean/Database/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -416,14 +416,8 @@ options = do
cfgDataStore <- dbRoot <|> dbTmp <|> dbMem <|> pure tmpDataStore
~(cfgSchemaDir, cfgSchemaSource) <- schemaSourceOption
_ignored_for_backwards_compat <- switch (long "db-schema-override")
cfgSchemaId <- fmap (fmap SchemaId) $ optional $ strOption
( long "schema-id"
<> metavar "ID"
<> help (
"version of schema to use when resolving queries " <>
"(mostly for testing purposes; defaults to the version this binary " <>
"was compiled against)")
)
_cfgSchemaId <- fmap (fmap SchemaId) $ optional $ strOption
( long "schema-id" <> hidden ) -- ignored for backwards compat

cfgRecipeConfig <- recipesConfigThriftSource
cfgServerConfig <-
Expand All @@ -447,6 +441,7 @@ options = do
, cfgBackupBackends = cfgBackupBackends def
, cfgFilterAvailableDBs = return
, cfgTracer = mempty
, cfgSchemaId = Nothing
, .. }
where
debugParser :: Parser DebugFlags
Expand Down
1 change: 1 addition & 0 deletions glean/db/Glean/Database/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ data Env = forall storage. Storage storage => Env
, envUpdateSchema :: Bool
, envSchemaUpdateSignal :: TMVar ()
, envSchemaId :: Maybe Thrift.SchemaId
-- ^ Used to store the schema ID for the session when using the local backend
, envRecipeConfig :: Observed Recipes.Config
, envServerConfig :: Observed ServerConfig.Config
, envBackupBackends :: Backup.Backends
Expand Down
4 changes: 2 additions & 2 deletions glean/db/Glean/Query/Derive.hs
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ getPredicate
-> IO PredicateDetails
getPredicate env repo schema ref = do
config <- Observed.get (envServerConfig env)
schemaVersion <- UserQuery.schemaVersionForQuery env schema config
(Just repo) Nothing
schemaId <- getDbSchemaVersion env repo
schemaVersion <- UserQuery.schemaVersionForQuery schema config schemaId
-- we default to resolving this predicate using the schema
-- version stored in the glean.schema_id property of the
-- DB. This is important because the client is often just "glean
Expand Down
60 changes: 17 additions & 43 deletions glean/db/Glean/Query/UserQuery.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import Data.Hashable (hash)
import Data.Int
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe
Expand Down Expand Up @@ -151,7 +150,7 @@ genericUserQueryFacts env repo query enc = do
config <- Observed.get (envServerConfig env)
readDatabase env repo $ \odb lookup ->
performUserQuery enc (odbSchema odb) $
userQueryFactsImpl env (odbSchema odb) config lookup query
userQueryFactsImpl (odbSchema odb) config lookup query

-- | Results returned from a query, parametrised of the types of facts and
-- statistics
Expand Down Expand Up @@ -423,24 +422,21 @@ pidDetails schema ty = do
Just d -> return d

userQueryFactsImpl
:: Database.Env
-> DbSchema
:: DbSchema
-> ServerConfig.Config
-> Lookup
-> Thrift.UserQueryFacts
-> IO (Results Stats Thrift.Fact)
-- The length of the result list is guaranteed to be the same
-- as the userQueryFacts_facts list in the input.
userQueryFactsImpl
env
schema@DbSchema{..}
config
lookup
query@Thrift.UserQueryFacts{..} = do
let opts = fromMaybe def userQueryFacts_options

schemaSelector <- schemaVersionForQuery env schema config Nothing
userQueryFacts_schema_id
schemaSelector <- schemaVersionForQuery schema config userQueryFacts_schema_id

expandPids <- optsExpandPids opts schemaSelector schema
let limits = mkQueryRuntimeOptions opts config expandPids
Expand Down Expand Up @@ -636,8 +632,7 @@ userQueryImpl
debug = Thrift.userQueryOptions_debug opts

schemaVersion <-
schemaVersionForQuery env schema config Nothing
userQuery_schema_id
schemaVersionForQuery schema config userQuery_schema_id
trans <- transformationsForQuery schema schemaVersion

(returnType, compileTime, irDiag, cont) <-
Expand Down Expand Up @@ -821,44 +816,23 @@ transformationsForQuery schema selector = do
Nothing -> throwIO $ Thrift.BadQuery "no transformations for schema_id"

schemaVersionForQuery
:: Database.Env
-> DbSchema
:: DbSchema
-> ServerConfig.Config
-> Maybe Thrift.Repo -- ^ default to the DB version if this is supplied
-> Maybe Thrift.SchemaId -- ^ SchemaId specified by client
-> IO SchemaSelector
schemaVersionForQuery env schema ServerConfig.Config{..} repo qid = do
dbSchemaId <-
case repo of
Nothing -> return Nothing
Just repo -> getDbSchemaVersion env repo
let
allSelectors = catMaybes
[ SpecificSchemaId <$> qid
, SpecificSchemaId <$> envSchemaId env
, SpecificSchemaId <$> dbSchemaId
]
vlog 1 $ "all selectors: " <> show (pretty allSelectors)

let
isAvailable (SpecificSchemaId id) =
id `Map.member` schemaEnvs schema
isAvailable _ = True

schemaVersionForQuery schema ServerConfig.Config{..} qid = do
use <-
if config_strict_query_schema_id
then case allSelectors of
[] -> return LatestSchemaAll
(id : _)
| isAvailable id -> return id
| SpecificSchemaId schema_id <- id ->
throwIO (Thrift.UnknownSchemaId schema_id)
| otherwise ->
throwIO (Thrift.Exception "Unknown schema version")
else
return $ fromMaybe LatestSchemaAll $ find isAvailable allSelectors

vlog 1 $ "using selector: " <> show (pretty use)
case qid of
Nothing -> return LatestSchemaAll
Just id
| id `Map.member` schemaEnvs schema -> return (SpecificSchemaId id)
| config_strict_query_schema_id ->
throwIO (Thrift.UnknownSchemaId id)
| otherwise -> do
logWarning $ "schema unavailable: " <> show id
return LatestSchemaAll

vlog 1 $ "using schema ID: " <> show (pretty use)
return use

optsExpandPids
Expand Down
1 change: 0 additions & 1 deletion glean/hs/Glean/Query/Thrift.hs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ runQueryPage be repo cont (Query query) = do
UserQueryEncoding_listbin def,
UserQueryEncoding_bin def
]
, userQuery_schema_id = schemaId be
}
UserQueryResults{..} <- userQuery be repo query'
mapM_ reportUserQueryStats userQueryResults_stats
Expand Down

0 comments on commit 96d30aa

Please sign in to comment.