git @ Cat's Eye Technologies Kangaroo-Iceberg / master src / scan.c
master

Tree @master (Download .tar.gz)

scan.c @masterraw · history · blame

/*
 * Copyright (c)2004 Cat's Eye Technologies.  All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 
 *   Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * 
 *   Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in
 *   the documentation and/or other materials provided with the
 *   distribution.
 * 
 *   Neither the name of Cat's Eye Technologies nor the names of its
 *   contributors may be used to endorse or promote products derived
 *   from this software without specific prior written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE. 
 */
/*
 * scan.c
 * Lexical scanner for kiceberg.
 * $Id$
 */

#include <ctype.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>

#include "scan.h"
#include "mem.h"

struct scan_st *
scan_open(char *filename)
{
	struct scan_st *sc;

	MALLOC(sc, scan_st, "scanner");

	if ((sc->token = (char *)malloc(256 * sizeof(char))) == NULL) {
		free(sc);
		return(NULL);
	}
	if ((sc->in = fopen(filename, "r")) == NULL) {
		free(sc->token);
		free(sc);
		return(NULL);
	}
	sc->lino = 1;
	sc->columno = 1;
	sc->toktype = TOKEN_NONE;
	scan(sc);		/* prime the pump */
	return(sc);
}

void
scan_close(struct scan_st *sc)
{	
	fclose(sc->in);
	free(sc->token);
	free(sc);
}

void
scan_error(struct scan_st *sc, const char *fmt, ...)
{
	va_list args;
	char error[256];

	va_start(args, fmt);
	vsnprintf(error, 255, fmt, args);

	fprintf(stderr, "Error (line %d, column %d, token '%s'): %s.\n",
	    sc->lino, sc->columno, sc->token, error);
}

static int
scan_char(struct scan_st *sc, char *x)
{	
	*x = (char)getc(sc->in); sc->columno++;
	if (feof(sc->in)) {
		sc->token[0] = 0;
		sc->toktype = TOKEN_EOF;
		return(0);
	}
	return(1);
}

void
scan(struct scan_st *sc)
{
	char x;
	int i = 0;

	sc->token[0] = 0;
	if (feof(sc->in)) {
		sc->toktype = TOKEN_EOF;
		return;
	}
	if (!scan_char(sc, &x)) return;

	/* Skip whitespace. */

top:
	while (isspace(x)) {
		if (x == '\n') {
			sc->lino++;
			sc->columno = 0;
		}
		if (!scan_char(sc, &x)) return;
	}

	/* Skip comments. */

	if (x == '#') {
		while (x != '\n') {
			if (!scan_char(sc, &x)) return;
		}
		goto top;
	}

	/*
	 * Scan decimal numbers.  Must start with a
	 * digit (not a sign or decimal point.)
	 */

	if (isdigit(x) && !feof(sc->in)) {
		sc->toktype = TOKEN_NUMBER;
		while ((isdigit(x) || x == '.') && !feof(sc->in)) {
			sc->token[i++] = x;
			if (!scan_char(sc, &x)) return;
		}
		ungetc(x, sc->in);
		sc->columno--;
		sc->token[i] = 0;
		return;
	}

	/* Scan alphanumeric tokens. */

	if (isalpha(x) && !feof(sc->in)) {
		sc->toktype = TOKEN_IDENTIFIER;
		while ((isalpha(x) || isdigit(x) || x == '_') && !feof(sc->in)) {
			sc->token[i++] = x;
			if (!scan_char(sc, &x)) return;
		}
		ungetc(x, sc->in);
		sc->columno--;
		sc->token[i] = 0;
		return;
	}

	/* It's a symbol. */
	
	sc->toktype = TOKEN_SYMBOL;

	/* Is it a multi-character symbol? */

	if (x == '-') {
		if (!scan_char(sc, &x)) return;
		if (x == '>') {
			strcpy(sc->token, "->");
			return;
		} else {
			ungetc(x, sc->in);
			sc->columno--;
			x = '-';
		}
	}
	
	/* Otherwise, it's a single-character symbol. */
	
	sc->token[0] = x;
	sc->token[1] = 0;
}

void
scan_expect(struct scan_st *sc, const char *x)
{
	if (!strcmp(sc->token, x)) {
		scan(sc);
	} else {
		scan_error(sc, "Expected '%s'", x);
	}
}