-
Notifications
You must be signed in to change notification settings - Fork 0
/
CodeSnippet.hs
105 lines (94 loc) · 3.65 KB
/
CodeSnippet.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
-- c-repl: a C read-eval-print loop.
-- Copyright (C) 2008 Evan Martin <[email protected]>
module CodeSnippet (
CodeSnippet(..),
parse,
runTests
) where
import Control.Monad.Error
import Data.Char
import Data.List
import Test.HUnit
import Text.ParserCombinators.Parsec hiding (parse)
import qualified Text.ParserCombinators.Parsec as Parsec
data CodeSnippet = Code String
| VarDecl String String -- Decl, initialization code.
| FunDecl String String -- Type + name, body.
deriving (Eq,Show)
type TokenStream = [(SourcePos, Token)]
tokPos = fst
data Token = Ident String | Punct String deriving (Eq, Show)
substr :: Maybe SourcePos -> Maybe SourcePos -> String -> String
substr start end str = strip $ take sublen $ drop startOfs $ str
where
startOfs = maybe 0 spOfs start
endOfs = maybe (length str) spOfs end
sublen = endOfs - startOfs
spOfs sp = sourceColumn sp - 1
strip [] = []
strip [' '] = []
strip (x:xs) = x : strip xs
stripSemi [] = []
stripSemi [';'] = []
stripSemi (x:xs) = x : stripSemi xs
parse :: String -> Either String CodeSnippet
parse input = do
-- Properly parsing C is famously impossible without processing typedefs in
-- all headers. But we can get pretty close with some heuristics.
-- This code is hideous, but it sorta comes with the territory.
case Parsec.parse p_tokenize "code" input of
Left err -> Left (show err)
Right tokenstream -> do
let (idents, rest) = span (isTypeLeader . snd) tokenstream
if length idents < 2
then return $ Code (stripSemi input)
else let (typ, var) = (init idents, last idents)
in parseDecl typ var rest
where
parseDecl typ var ((npos, Punct "("):rest) =
case dropWhile (\(_,tok) -> tok /= Punct ")") rest of
(rparen:(next,_):rest) ->
return $ FunDecl (substr Nothing (Just next) input)
(substr (Just next) Nothing input)
_ -> Left $ "couldn't find rparen"
parseDecl typ var rest =
let nextpos = case rest of
((pos, tok):rest) | tok /= Punct ";" -> Just pos
_ -> Nothing
code = case nextpos of
Just n -> substr (Just (tokPos var)) Nothing input
Nothing -> ""
in return $ VarDecl (stripSemi $ substr Nothing nextpos input)
(stripSemi code)
isTypeLeader (Ident _) = True
isTypeLeader (Punct "*") = True
isTypeLeader _ = False
p_tokenize :: Parser TokenStream
p_tokenize = many (annotate p_ident <|> annotate p_token) where
p_ident = liftM Ident $ withSpaces $ many1 (letter <|> digit <|> char '_')
p_token = do l <- withSpaces $ oneOf "()*[]={};"; return $ Punct [l]
withSpaces p = do r <- p; skipMany space; return r
annotate p = do
pos <- getPosition
p' <- p
return (pos, p')
assertParse :: CodeSnippet -> String -> Assertion
assertParse expected input = do
case parse input of
Left error -> assertFailure $ show input ++ " failed to parse: " ++ error
Right snip -> assertEqual input expected snip
testParse exp input = test $ assertParse exp input
runTests =
runTestTT $ test $ TestList [
testParse (VarDecl "int x" "x = 3") "int x = 3;"
, testParse (VarDecl "int x" "x = 3") "int x = 3"
, testParse (VarDecl "int xx" "xx = 3") "int xx = 3;"
, testParse (Code "x = 3") "x = 3"
, testParse (Code "*((char*)x) = 0") "*((char*)x) = 0;"
, testParse (VarDecl "int x" "") "int x"
, testParse (VarDecl "const char* x" "") "const char* x;"
, testParse (Code "x+y = 4") "x+y = 4;"
, testParse (Code "for (;;) x") "for (;;) x;"
, testParse (FunDecl "void f()" "{}") "void f() {}"
]
main = runTests