-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMorphDisambiguator.hs
executable file
·62 lines (51 loc) · 2 KB
/
MorphDisambiguator.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env runhaskell
import PGF
import System.Environment
import Control.Applicative
import Data.Maybe
import Data.List
main = do
args <- getArgs
if length args == 2 then do
[p, l] <- getArgs
pgf <- readPGF p
loop $ fromMaybe (\x -> x :: String) $ (parseString pgf) <$> (readLanguage l)
else do
[p, l, s] <- getArgs
pgf <- readPGF p
putStrLn $ fromMaybe "" $ (parseString pgf) <$> (readLanguage l) <*> pure s
getWord :: Bool -> String -> String
getWord _ ('^':xs) = getWord True xs
getWord _ ('/':xs) = ' ' : getWord False xs
getWord False (x:xs) = getWord False xs
getWord True (x:xs) = x : getWord True xs
getWord _ "" = ""
parseSentence :: String -> [String]
parseSentence = words . getWord False
loop :: (String -> String) -> IO ()
loop parse = do
s <- getLine
if s == "quit" then putStrLn "bye" else do
putStrLn $ parse s
loop parse
getMorph :: PGF -> Language -> String -> [(Lemma, Analysis)]
getMorph p l s = lookupMorpho (buildMorpho p l) s
initStream :: PGF -> Language -> String -> String -> String
initStream p l orig s
| length morph > 0 = "^" ++ s ++ buildStream p l morph orig
| otherwise = "^" ++ s ++ "/*" ++ s ++ "$ "
where morph = getMorph p l s
buildStream :: PGF -> Language -> [(Lemma, Analysis)] -> String -> String
buildStream _ _ [] _ = "$ "
buildStream p ln ((l, a):xs) s
| isValid p ln s l = "/" ++ show l ++ buildTags (words a) ++ buildStream p ln xs s
| otherwise = buildStream p ln xs s
where t = startCat p
isValid :: PGF -> Language -> String -> Lemma -> Bool
isValid p ln s l = isInfixOf (show l) . show $ (head $ filter (\x -> not $ isInfixOf "LStr" (show x)) (parse p ln t s))
where t = startCat p
buildTags :: [String] -> String
buildTags [] = ""
buildTags (x:xs) = "<" ++ x ++ ">" ++ buildTags xs
parseString :: PGF -> Language -> String -> String
parseString p l s = foldl (\acc x -> acc ++ x) "" (map (initStream p l (getWord False s)) (parseSentence s))