git @ Cat's Eye Technologies Iphigeneia / master src / Scanner.hs
master

Tree @master (Download .tar.gz)

Scanner.hs @masterraw · history · blame

--
-- Copyright (c)2007 Chris Pressey, Cat's Eye Technologies.
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
--
--   1. Redistributions of source code must retain the above copyright
--      notices, this list of conditions and the following disclaimer.
--   2. Redistributions in binary form must reproduce the above copyright
--      notices, this list of conditions, and the following disclaimer in
--      the documentation and/or other materials provided with the
--      distribution.
--   3. Neither the names of the copyright holders nor the names of their
--      contributors may be used to endorse or promote products derived
--      from this software without specific prior written permission. 
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES INCLUDING, BUT NOT
-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-- FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
-- COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-- POSSIBILITY OF SUCH DAMAGE.
--

-----------------------------------------------------------------------
-- ============================= Scanner =========================== --
-----------------------------------------------------------------------

module Scanner where

import Data.Char

data Token = Ident String
           | IntLit Integer
           | OpenCommentToken
           | CloseCommentToken
           | BecomesToken
           | GreaterThanToken
           | GreaterThanOrEqualToken
           | EqualToken
           | NotEqualToken
           | LessThanOrEqualToken
           | LessThanToken
           | StmtSepToken
           | AndToken
           | OrToken
           | NotToken
           | AddToken
           | SubtractToken
           | MultiplyToken
           | DivideToken
           | OpenParenToken
           | CloseParenToken
           | IfToken
           | ThenToken
           | ElseToken
           | WhileToken
           | DoToken
           | BeginToken
           | EndToken
           | InputToken
           | PrintToken
           | LetToken
           | InToken
           | VarToken
           | LoopToken
           | RepeatToken
           | ValueOfToken
           | TokenizerError
           deriving (Show, Read, Eq)

digitVal '0' = 0
digitVal '1' = 1
digitVal '2' = 2
digitVal '3' = 3
digitVal '4' = 4
digitVal '5' = 5
digitVal '6' = 6
digitVal '7' = 7
digitVal '8' = 8
digitVal '9' = 9

tokens = [("(*", OpenCommentToken),
          ("*)", CloseCommentToken),
          (":=", BecomesToken),
          (">=", GreaterThanOrEqualToken),
          ("<=", LessThanOrEqualToken),
          (">", GreaterThanToken),
          ("<", LessThanToken),
          ("=", EqualToken),
          ("/=", NotEqualToken),
          (";", StmtSepToken),
          ("&", AndToken),
          ("|", OrToken),
          ("!", NotToken),
          ("+", AddToken),
          ("-", SubtractToken),
          ("*", MultiplyToken),
          ("/", DivideToken),
          ("(", OpenParenToken),
          (")", CloseParenToken),
          ("if", IfToken),
          ("then", ThenToken),
          ("else", ElseToken),
          ("while", WhileToken),
          ("do", DoToken),
          ("begin", BeginToken),
          ("end", EndToken),
          ("input", InputToken),
          ("print", PrintToken),
          ("let", LetToken),
          ("in", InToken),
          ("var", VarToken),
          ("loop", LoopToken),
          ("repeat", RepeatToken),
          ("valueof", ValueOfToken)]

findToken string [] =
    (Nothing, string)
findToken string ((tokenString, token):rest)
    | (take len string) == tokenString =
        (Just token, (drop len string))
    | otherwise =
        findToken string rest
    where
        len = length tokenString

tokenize [] = []
tokenize string@(char:chars)
    | isSpace char =
        tokenize chars
    | isDigit char =
        tokenizeIntLit string 0
    | foundToken == Just OpenCommentToken =
        let
            newRestOfString = gobble CloseCommentToken restOfString
        in
            tokenize newRestOfString
    | foundToken /= Nothing =
        let
            (Just token) = foundToken
        in
            token:(tokenize restOfString)
    | isAlpha char =
        tokenizeIdent string ""
    | otherwise =
        [TokenizerError]
    where
        (foundToken, restOfString) = findToken string tokens

gobble token [] = []
gobble token string@(char:chars)
    | foundToken == Just token =
        restOfString
    | otherwise =
        gobble token chars
    where
        (foundToken, restOfString) = findToken string tokens

tokenizeIntLit [] num = [IntLit num]
tokenizeIntLit string@(char:chars) num
    | isDigit char =
        tokenizeIntLit chars (num * 10 + digitVal char)
    | otherwise =
        IntLit num:(tokenize string)

tokenizeIdent [] id = [Ident (reverse id)]
tokenizeIdent string@(char:chars) id
    | isAlpha char =
        tokenizeIdent chars (char:id)
    | otherwise =
        Ident (reverse id):(tokenize string)