git @ Cat's Eye Technologies Kosheri / master src / scan.c
master

Tree @master (Download .tar.gz)

scan.c @masterraw · history · blame

/*
 * scan.c
 * Lexical scanner.
 * $Id: scan.c 139 2008-07-16 09:56:31Z catseye $
 */

#include <stdarg.h>

#include "lib.h"

#include "stream.h"
#include "file.h"

#include "scan.h"
#include "report.h"
#include "render.h"

enum token_type {
	TOKEN_EOF,
	TOKEN_NUMBER,
	TOKEN_BAREWORD,
	TOKEN_SYMBOL,
	TOKEN_QUOTED_STRING
};

struct scanner {
        struct reporter *reporter;
	struct process	*input;		/* file process from which we are scanning */
	const char	*filename;	/* name of file scanning from */
	char		*token;		/* text content of token we just scanned */
	enum token_type	 token_type;	/* type of token that was scanned */
	unsigned int	 token_length;	/* length of the token that was scanned */
	int	 	 line;		/* current line number, 1-based */
	int	 	 column;	/* current column number, 1-based */
	int	 	 last_column;	/* for putback */
	char		*putback_buf;	/* buffer of characters put back */
	int		 putback_pos;	/* position within putback buffer */
};

#define	PUTBACK_SIZE	80

struct scanner *
scanner_new(struct reporter *r)
{
	struct scanner *sc;

	if ((sc = malloc(sizeof(struct scanner))) == NULL) {
		return NULL;
	}
	if ((sc->token = malloc(256 * sizeof(char))) == NULL) {
		free(sc);
		return NULL;
	}

        sc->reporter = r;
	sc->filename = NULL;
	sc->input = NULL;
	sc->putback_buf = malloc(PUTBACK_SIZE * sizeof(char));
	sc->putback_pos = 0;

	return sc;
}

void
scanner_free(struct scanner *sc)
{
	scanner_close(sc);
	free(sc->token);
	free(sc);
}

void
scanner_reset(struct scanner *sc)
{
	sc->line = 1;
	sc->column = 1;
	sc->last_column = 0;
	scanner_scan(sc);		/* prime the pump */
}

/*
 * caller is responsible for freeing the filename
 */
int
scanner_open(struct scanner *sc, const char *filename)
{
	sc->filename = filename;
	if ((sc->input = file_open(filename, "r")) == NULL) {
		scanner_report(sc, REPORT_ERROR,
		    "Can't open '%s' for reading", filename);
		return 0;
	}
	scanner_reset(sc);
	return 1;
}

/*
 * caller is responsible for freeing the filename
 */
int
scanner_attach(struct scanner *sc, struct process *p, const char *filename)
{
	sc->filename = filename;
	sc->input = p;
	scanner_reset(sc);
	return 1;
}

void
scanner_close(struct scanner *sc)
{
	if (sc->filename != NULL) {
		sc->filename = NULL;
	}
	if (sc->input != NULL) {
		stream_close(NULL, sc->input);
		sc->input = NULL; /* ? */
	}
}

/*
 * x is not a string, it is a pointer to a single character.
 */
static void
scan_char(struct scanner *sc, char *x)
{
	sc->last_column = sc->column;

	/* do a 'getc' */
	if (sc->putback_pos > 0) {
		*x = sc->putback_buf[sc->putback_pos--];
	} else {
		stream_read(NULL, sc->input, x, sizeof(char));
	}

	if (*x == '\n') {
		sc->column = 1;
		sc->line++;
	} else if (*x == '\t') {
		sc->column++;
		while (sc->column % 8 != 0)
			sc->column++;
	} else {
		sc->column++;
	}
}

static void
putback(struct scanner *sc, char x)
{
	if (stream_is_at_end(NULL, sc->input))
		return;

	/* do a 'ungetc' */
	if (sc->putback_pos < (PUTBACK_SIZE - 1)) {
		sc->putback_buf[++sc->putback_pos] = x;
	} else {
		scanner_report(sc, REPORT_ERROR,
		    "Putback buffer size exceeded on '%s'", sc->filename);
	}

	sc->column = sc->last_column;
	if (x == '\n')
		sc->line--;
}

static void
real_scan(struct scanner *sc)
{
	char x;
	int i = 0;

	sc->token[0] = '\0';
	sc->token_length = 0;
	if (stream_is_at_end(NULL, sc->input)) {
		sc->token_type = TOKEN_EOF;
		return;
	}

	scan_char(sc, &x);

	/* Skip whitespace. */

top:
	while (k_isspace(x) && !stream_is_at_end(NULL, sc->input)) {
		scan_char(sc, &x);
	}

	/* Skip comments. */

	if (x == '/') {
		scan_char(sc, &x);
		if (x == '/') {
			while (x != '\n' && !stream_is_at_end(NULL, sc->input)) {
				scan_char(sc, &x);
			}
			goto top;
		} else {
			putback(sc, x);
			x = '/';
			/* falls through to the bottom of scan() */
		}
	}

	if (stream_is_at_end(NULL, sc->input)) {
		sc->token[0] = '\0';
		sc->token_type = TOKEN_EOF;
		return;
	}

	/*
	 * Scan decimal numbers.  Must start with a
	 * digit (not a sign or decimal point.)
	 */
	if (k_isdigit(x)) {
		while ((k_isdigit(x) || x == '.') && !stream_is_at_end(NULL, sc->input)) {
			sc->token[i++] = x;
			sc->token_length++;
			scan_char(sc, &x);
		}
		putback(sc, x);
		sc->token[i] = '\0';
		sc->token_type = TOKEN_NUMBER;
		return;
	}

	/*
	 * Scan quoted strings.
	 */
	if (x == '"') {
		scan_char(sc, &x);
		while (x != '"' && !stream_is_at_end(NULL, sc->input) && i < 255) {
			sc->token[i++] = x;
			sc->token_length++;
			scan_char(sc, &x);
		}
		sc->token[i] = '\0';
		sc->token_type = TOKEN_QUOTED_STRING;
		return;
	}

	/*
	 * Scan alphanumeric ("bareword") tokens.
	 */
	if (k_isalpha(x) || x == '_') {
		while ((k_isalpha(x) || k_isdigit(x) || x == '_') && !stream_is_at_end(NULL, sc->input)) {
			sc->token[i++] = x;
			sc->token_length++;
			scan_char(sc, &x);
		}
		putback(sc, x);
		sc->token[i] = '\0';
		sc->token_type = TOKEN_BAREWORD;
		return;
	}

	/*
	 * Scan multi-character symbols.
	 */
	if (x == '>' || x == '<' || x == '!') {
		sc->token[i++] = x;
		sc->token_length++;
		scan_char(sc, &x);
		if (x == '=' && !stream_is_at_end(NULL, sc->input)) {
			sc->token[i++] = x;
			sc->token_length++;
			scan_char(sc, &x);
		} else {
			putback(sc, x);
		}
		sc->token[i] = '\0';
		sc->token_type = TOKEN_SYMBOL;
		return;
	}

	/*
	 * Degenerate case: scan single symbols.
	 */
	sc->token[0] = x;
	sc->token[1] = '\0';
	sc->token_length = 1;
	sc->token_type = TOKEN_SYMBOL;
}

void
scanner_scan(struct scanner *sc)
{
	real_scan(sc);
#ifdef DEBUG
	printf("scanned -> '%s'\n", sc->token);
#endif
}

void
scanner_expect(struct scanner *sc, const char *x)
{
	if (strcmp(sc->token, x) == 0) {
		scanner_scan(sc);
	} else {
		scanner_report(sc, REPORT_ERROR, "Expected '%s'", x);
	}
}

void
scanner_scanline(struct scanner *sc)
{
	char x;

	scan_char(sc, &x);
	while (x != '\n' && !stream_is_at_end(NULL, sc->input)) {
		scan_char(sc, &x);
	}
	real_scan(sc);
}

int
scanner_tokeq(struct scanner *sc, const char *token)
{
        return strcmp(sc->token, token) == 0;
}

const char *
scanner_token_string(struct scanner *sc)
{
        return sc->token;
}

int
scanner_token_length(struct scanner *sc)
{
        return sc->token_length;
}

int
scanner_eof(struct scanner *sc)
{
        return sc->token_type == TOKEN_EOF;
}

const char *
scanner_filename(struct scanner *sc)
{
        return sc->filename != NULL ? sc->filename : "<no file>";
}

int
scanner_line(struct scanner *sc)
{
        return sc->line;
}

int
scanner_column(struct scanner *sc)
{
        return sc->column;
}

void
scanner_report(struct scanner *sc, enum report_type rtype, const char *fmt, ...)
{
	va_list args;

	/*
	 * Breaking abstraction just to have a nicely-formatted error message...
	 */
        process_render(reporter_stream(sc->reporter),
            "(%s, line %d, column %d, token '%s'): ",
            scanner_filename(sc), scanner_line(sc),
            scanner_column(sc), scanner_token_string(sc));

	va_start(args, fmt);
        report_va_list(sc->reporter, rtype, fmt, args);
	va_end(args);
}