darcsden :: kowey -> qc-overwhelm -> blob

I don't know how to shrink effectively in QC

root / NLP / Aura / CgmMacros.hs

-- Macros from the Common Grammar Manifesto (used by XMG)
-- see http://wiki.loria.fr/wiki/Common_grammar_manifesto/Lexical_macros

module NLP.Aura.CgmMacros where

import Control.Applicative ( (<$>), (<*>) )
import Data.List ( nub, (\\) )
import Data.Text ( Text )
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Language (emptyDef)
import Text.ParserCombinators.Parsec.Token
    ( TokenParser, LanguageDef, commentLine, commentStart, commentEnd
    , opLetter, reservedOpNames, reservedNames, identLetter, identStart
    , makeTokenParser
    )
import qualified Data.Map as Map
import qualified Data.Text as T
import qualified Text.ParserCombinators.Parsec.Token as P

-- Here we are cheating by assuming that whatever we want to read in
-- can be modeled by GenI
import Data.FullList hiding ( (++) )
import NLP.GenI.FeatureStructure
import NLP.GenI.General ( isGeniIdentLetter )
import NLP.GenI.GeniVal
import NLP.GenI.Parser ( geniValue )
import NLP.GenI.Semantics hiding ( constants )

data Macro = Macro
    { mName      :: Text
    , mParams    :: FeatStruct GeniVal -- TODO: not sure what this is exactly
    , mSemantics :: Sem  
    , mInterface :: FeatStruct GeniVal
    }
  deriving Eq

instance DescendGeniVal Macro where
    descendGeniVal f m = m
        { mParams    = descendGeniVal f (mParams m)
        , mSemantics = descendGeniVal f (mSemantics m)
        , mInterface = descendGeniVal f (mInterface m)
        }
  
instance Collectable Macro where
    collect m = collect (mParams m)
              . collect (mSemantics m)
              . collect (mInterface m)
  
instance Collectable a => Collectable (Map.Map k a) where
    collect = collect . Map.elems


{-
spatialRel[rel=?B0]
	 semantics:[?A0:?B0(?C0,?D0,?E0) ?F0:instance-of(?D0,?G0) ?H0:instance-of(?C0,?I0) ]
	 interface:[agr2=?E0,arg1=?C0,arg2=?D0,kbclass1=?I0,kbclass2=?G0,predlabel=?A0,rel=?B0]
-}

cgmMacros :: Parser [Macro]
cgmMacros = cgmMacro `sepBy` spaces

cgmMacro :: Parser Macro
cgmMacro =
    Macro <$> (identifierR <|> stringLiteral)
          <*> squares cgmFeats
          <*> (reserved "semantics" >> colon >> squares cgmSemantics)
          <*> (reserved "interface" >> colon >> squares cgmFeats)

-- ----------------------------------------------------------------------
-- semantics
-- ----------------------------------------------------------------------

cgmSemantics :: Parser Sem
cgmSemantics = sortSem <$> many (cgmLiteral <?> "a literal")

cgmLiteral :: Parser (Literal GeniVal)
cgmLiteral = do
    handle    <- cgmValue <?> "a handle"
    _         <- char ':'
    predicate <- cgmValue <?> "a predicate"
    pars      <- parens (cgmValue `sepBy` comma) <?> "some parameters"
    --
    return (Literal handle predicate pars)

-- ----------------------------------------------------------------------
-- features
-- ----------------------------------------------------------------------

cgmFeats :: Parser (FeatStruct GeniVal)
cgmFeats = option emptyFeatStruct
         $ mkFeatStructP =<< (cgmAttVal `sepBy` comma)

mkFeatStructP :: Flist GeniVal -> Parser (FeatStruct GeniVal)
mkFeatStructP xs =
    if atts == nub atts
       then return (mkFeatStruct xs)
       else fail $ "duplicate attributes not allowed: " ++ (T.unpack .  T.unwords $ atts \\ nub atts)
  where
    atts = map avAtt xs

cgmAttVal :: Parser (AvPair GeniVal)
cgmAttVal = do
    att <- (identifierR <|> stringLiteral) <?> "an attribute"
    _   <- char '='
    val <- cgmValue    <?> "a CGM value"
    return $ AvPair att val

-- ----------------------------------------------------------------------
-- values
-- ----------------------------------------------------------------------

cgmValue :: Parser GeniVal
cgmValue = geniValue

-- ----------------------------------------------------------------------
-- language def helpers
-- ----------------------------------------------------------------------

-- Lexer

cgmLanguageDef :: LanguageDef ()
cgmLanguageDef = emptyDef
    { commentLine = "%"
    , commentStart = "/*"
    , commentEnd = "*/"
    , opLetter = oneOf ""
    , reservedOpNames = [""]
    , reservedNames = [ "semantics", "interface" ]
    , identLetter = identStuff
    , identStart  = identStuff
    }
  where
    identStuff = satisfy isGeniIdentLetter

lexer :: TokenParser ()
lexer  = makeTokenParser cgmLanguageDef

whiteSpace :: Parser ()
whiteSpace = P.whiteSpace lexer

identifier :: Parser Text
identifier = T.pack <$> P.identifier lexer

stringLiteral :: Parser Text
stringLiteral = T.pack <$> P.stringLiteral lexer

colon :: Parser String
colon = P.colon lexer

comma :: Parser String
comma = P.comma lexer

squares, braces, parens :: Parser a -> Parser a
squares = P.squares lexer
braces  = P.braces  lexer
parens  = P.parens  lexer

reserved, symbol :: String -> Parser String
reserved s = P.reserved lexer s >> return s
symbol = P.symbol lexer

-- | identifier, permitting reserved words too
identifierR :: Parser Text
identifierR = T.pack <$> do
    { c <- P.identStart cgmLanguageDef
    ; cs <- many (P.identLetter cgmLanguageDef)
    ; whiteSpace
    ; return (c:cs)
    }
    <?> "identifier or reserved word"