src/yucca/main.py - yucca (master) - git @ Cat's Eye Technologies

Tree @master (Download .tar.gz)

main.py @master — raw · history · blame

# yucca -- dialect-agnostic static analyzer for 8-bit BASIC programs
# Version 1.2

# Copyright (c) 2012-2024 Chris Pressey, Cat's Eye Technologies
# This file is distributed under an MIT license.  See LICENSES directory:
# SPDX-License-Identifier: LicenseRef-MIT-X-yucca

import re
import sys
import fileinput
from optparse import OptionParser


class Error(object):
    """An object representing one of the possible violations that yucca
    can raise in objection to a given BASIC program.

    """
    def __init__(self, line, line_number):
        self.line = line
        self.line_number = line_number


class UndefinedStatement(Error):
    def __str__(self):
        return '?UNDEFINED STATEMENT "%s" IN: %s' % (
            str(self.line_number).strip(), self.line.description)


class ComputedJump(Error):
    def __str__(self):
        return '?COMPUTED JUMP TO "%s" IN: %s' % (
            str(self.line_number).strip(), self.line.description)


class OutOfSequence(Error):
    def __str__(self):
        return '?OUT OF SEQUENCE LINE "%s" IN: %s' % (
            str(self.line_number).strip(), self.line.description)


class LineNumber(object):
    """An object representing the line number of a line in a
    BASIC program.

    The object retains any whitespace encountered while parsing
    the line, and outputs it in its string representation.

    """
    def __init__(self, text):
        self.text = text
        self.number = -1
        try:
            self.number = int(text)
        except ValueError:
            pass

    def __str__(self):
        return self.text

    def is_computed(self):
        return re.match(r'^\s*\d+\s*$', self.text) is None


class BasicCommand(object):
    @classmethod
    def create(class_, text):
        match = re.match(r'^(\s*rem)(.*)$', text, re.I)
        if match:
            return Remark(match.group(1), match.group(2))
        match = re.match(r'^(\s*goto)(.*?)$', text, re.I)
        if match:
            return Goto(match.group(1), LineNumber(match.group(2)))
        match = re.match(r'^(\s*gosub)(.*?)$', text, re.I)
        if match:
            return Gosub(match.group(1), LineNumber(match.group(2)))
        # I doubt many BASICs allow a computed goto right after a 'THEN'...
        match = re.match(r'^(\s*if(.*?)then)(\s*\d+\s*)$', text, re.I)
        if match:
            return IfThenLine(match.group(1), LineNumber(match.group(3)))
        match = re.match(r'^(\s*if(.*?)then)(.*?)$', text, re.I)
        if match:
            return IfThen(match.group(1), BasicCommand.create(match.group(3)))
        # We do this check *after* the above two, so as to not accidentally
        # match something like IF A THEN PRINT "HI":GOTO 50...
        match = re.match(r'^(\s*if(.*?)goto)(.*?)$', text, re.I)
        if match:
            return IfThenLine(match.group(1), LineNumber(match.group(3)))
        match = re.match(r'^(\s*on(.*?)go(to|sub))(.*?)$', text, re.I)
        if match:
            line_numbers = [LineNumber(x) for x in match.group(4).split(',')]
            return OnLines(match.group(1), line_numbers)
        return GenericCommand(text)

    def referenced_line_numbers(self):
        raise NotImplementedError


class GenericCommand(BasicCommand):
    def __init__(self, text):
        self.text = text

    def __str__(self):
        return self.text

    def referenced_line_numbers(self):
        return []


class Remark(BasicCommand):
    def __init__(self, command, text):
        self.command = command
        self.text = text

    def __str__(self):
        return "%s%s" % (self.command, self.text)

    def referenced_line_numbers(self):
        return []


class IfThen(BasicCommand):
    def __init__(self, body, subsequent):
        self.body = body
        self.subsequent = subsequent

    def __str__(self):
        return "%s%s" % (self.body, self.subsequent)

    def referenced_line_numbers(self):
        return self.subsequent.referenced_line_numbers()


class IfThenLine(BasicCommand):
    def __init__(self, body, line_number):
        self.body = body
        self.line_number = line_number

    def __str__(self):
        return "%s%s" % (self.body, self.line_number)

    def referenced_line_numbers(self):
        return [self.line_number]


class Goto(BasicCommand):
    def __init__(self, command, line_number):
        self.command = command
        self.line_number = line_number

    def __str__(self):
        return "%s%s" % (self.command, self.line_number)

    def referenced_line_numbers(self):
        return [self.line_number]


class Gosub(BasicCommand):
    def __init__(self, command, line_number):
        self.command = command
        self.line_number = line_number

    def __str__(self):
        return "%s%s" % (self.command, self.line_number)

    def referenced_line_numbers(self):
        return [self.line_number]


class OnLines(BasicCommand):
    def __init__(self, body, line_numbers):
        self.body = body
        self.line_numbers = line_numbers

    def __str__(self):
        return "%s%s" % (self.body,
                         ','.join(str(x) for x in self.line_numbers))

    def referenced_line_numbers(self):
        return self.line_numbers


class BasicLine(object):
    def __init__(self, text, text_file_line):
        self.line_number = None
        self.text_file_line = text_file_line
        if text is None:
            self.command = None
            return
        text = text.rstrip('\r\n')
        self.text = text
        match = re.match(r'^(\s*\d+\s*)(.*?)$', text)
        if match:
            self.line_number = LineNumber(match.group(1))
            text = match.group(2)

        self.commands = []

        index = 0
        start = 0
        state = 'start'
        while index < len(text):
            if state in ('start', 'cmd'):
                match = re.match(r'^rem', text[index:], re.I)
                if match:
                    state = 'remark'
                else:
                    state = 'cmd'
            if state == 'cmd':
                if text[index] == '"':
                    state = 'quoted'
                elif text[index] == ':':
                    cmd = BasicCommand.create(text[start:index])
                    self.commands.append(cmd)
                    start = index + 1
                    state = 'start'
            elif state == 'quoted':
                if text[index] == '"':
                    state = 'cmd'
            elif state == 'remark':
                pass
            index += 1
        cmd = BasicCommand.create(text[start:index])
        self.commands.append(cmd)

    def __str__(self):
        text = ':'.join(str(x) for x in self.commands)
        if self.line_number:
            return "%s%s" % (self.line_number, text)
        else:
            return text

    @property
    def description(self):
        """A description of this line, used in violation reports."""
        if self.line_number:
            return str(self)
        else:
            return "%s (immediate mode, text file line %d)" % \
                (self, self.text_file_line)

    def referenced_line_numbers(self):
        line_numbers = []
        for command in self.commands:
            line_numbers.extend(command.referenced_line_numbers())
        return line_numbers

    def strip_remarks(self):
        new_commands = []
        for command in self.commands:
            if not isinstance(command, Remark):
                new_commands.append(command)
        if new_commands:
            new_line = BasicLine(None, self.text_file_line)
            new_line.line_number = self.line_number
            new_line.commands = new_commands
            return new_line
        else:
            return None


class BasicProgram(object):
    r"""An object which represents a BASIC program.

    Rudimentary parsing of lines of commands:

    >>> b = BasicProgram('10 PRINT "HELLO"\n'
    ...                  '20 GOTO 10\n')
    >>> b.dump()
    10 PRINT "HELLO"
    20 GOTO 10
    >>> len(b.lines)
    2
    >>> print(b.lines[0].commands[0])
    PRINT "HELLO"
    >>> print(b.lines[1].commands[0].__class__.__name__)
    Goto

    Checking for jumps to non-existant line numbers:

    >>> b = BasicProgram()
    >>> b.add_line('10 PRINT "HELLO"', 1)
    >>> b.add_line('20 GOTO 30', 2)
    >>> len(b.lines)
    2
    >>> for e in b.check_line_numbers(): print(e)
    ?UNDEFINED STATEMENT "30" IN: 20 GOTO 30

    Checking for GOSUB and ON GOTO, and retaining case in
    error messages:

    >>> b = BasicProgram('5 goSUb 10\n'
    ...                  '7goSUb8\n'
    ...                  '10 oN (X+1 )* 5  gOtO 100,6')
    >>> for e in b.check_line_numbers(): print(e)
    ?UNDEFINED STATEMENT "8" IN: 7goSUb8
    ?UNDEFINED STATEMENT "100" IN: 10 oN (X+1 )* 5  gOtO 100,6
    ?UNDEFINED STATEMENT "6" IN: 10 oN (X+1 )* 5  gOtO 100,6

    Whitespace and case is retained when dumping a program:

    >>> b = BasicProgram('5 goSUb 10\n'
    ...                  '7gOSub8\n'
    ...                  '9   rem WHAT? ??:>?: >?\n'
    ...                  '\n'
    ...                  '800   PRINT::print:ZORK  30\n'
    ...                  '10 oN   ERROR  gOtO 100, 6,7,  800  ,3\n'
    ...                  ' 99 what  \n'
    ...                  'if50then60\n'
    ...                  '50ifthisstuffistruegoto70\n'
    ...                  '60 If  This Stuff Is True  Then   Print:GoTo  9\n'
    ... )
    >>> b.dump()
    5 goSUb 10
    7gOSub8
    9   rem WHAT? ??:>?: >?
    <BLANKLINE>
    800   PRINT::print:ZORK  30
    10 oN   ERROR  gOtO 100, 6,7,  800  ,3
     99 what  
    if50then60
    50ifthisstuffistruegoto70
    60 If  This Stuff Is True  Then   Print:GoTo  9

    Remarks may contain colons:

    >>> b = BasicProgram('10 REM HELLO: GOTO 20')
    >>> len(b.lines[0].commands)
    1
    >>> print(b.lines[0].commands[0].__class__.__name__)
    Remark
    >>> b.check_line_numbers()
    []

    Immediate mode commands are checked, and can be stripped:

    >>> b = BasicProgram('10 REM HELLO\n'
    ...                  'PRINT "HELLO"\n'
    ...                  'GOTO 20')
    >>> for e in b.check_line_numbers(): print(e)
    ?UNDEFINED STATEMENT "20" IN: GOTO 20 (immediate mode, text file line 3)
    >>> b.strip_immediate_mode_commands()
    >>> b.dump()
    10 REM HELLO
    >>> b.check_line_numbers()
    []

    Remarks, both on numbered lines and immediate mode, can be
    stripped:

    >>> b = BasicProgram('10 PRINT "HI":REM HELLO\n'
    ...                  'PRINT "HELLO"\n'
    ...                  'REM WHAT?\n'
    ...                  '20 GOTO 30\n'
    ...                  '30 REM THIS IS BUNK, MAN\n')
    >>> b.strip_remarks()
    >>> b.dump()
    10 PRINT "HI"
    PRINT "HELLO"
    20 GOTO 30
    >>> for e in b.check_line_numbers(): print(e)
    ?UNDEFINED STATEMENT "30" IN: 20 GOTO 30

    Proper (sequential) ordering of line numbers can be checked for.

    >>> b = BasicProgram('10 PRINT "HI":REM HELLO\n'
    ...                  'PRINT "IMMEDIATE MODE"\n'
    ...                  '20 GOTO 30\n'
    ...                  '30 PRINT "HI"\n')
    >>> b.check_ascending()
    []
    >>> b = BasicProgram('10 PRINT\n'
    ...                  '20 PRINT\n'
    ...                  '20 GOTO 30\n'
    ...                  '30 PRINT\n'
    ...                  '40 GOTO 30\n'
    ...                  '60 GOTO 30\n'
    ...                  'PRINT "IMMEDIATE MODE"\n'
    ...                  '50 GOTO 30\n')
    >>> for e in b.check_ascending(): print(e)
    ?OUT OF SEQUENCE LINE "20" IN: 20 GOTO 30
    ?OUT OF SEQUENCE LINE "50" IN: 50 GOTO 30

    Computed GOTOs/GOSUBs can be detected.  Note that a line number
    computation cannot appear after the THEN in an IF...THEN because
    it cannot readily be distinguished from a command.

    >>> b = BasicProgram('10 GOTO A * 4\n'
    ...                  '20 EARTH:AIR:WATER:FIRE:GOSUB 6+7\n'
    ...                  '30 GOTO 30\n'
    ...                  '35 GOTO 35.0\n'
    ...                  '40 IFATHENP*40\n'
    ...                  '50 IFAGOTOP*40\n'
    ...                  '60 ONAGOTO10,20,70\n'
    ...                  'GOSUB -10*-1\n')
    >>> for e in b.check_computed_jumps(): print(e)
    ?COMPUTED JUMP TO "A * 4" IN: 10 GOTO A * 4
    ?COMPUTED JUMP TO "6+7" IN: 20 EARTH:AIR:WATER:FIRE:GOSUB 6+7
    ?COMPUTED JUMP TO "35.0" IN: 35 GOTO 35.0
    ?COMPUTED JUMP TO "P*40" IN: 50 IFAGOTOP*40
    ?COMPUTED JUMP TO "-10*-1" IN: GOSUB -10*-1 (immediate mode, text file line 8)

    Computed GOTOs/GOSUBs are not analyzed for validity as jump targets.

    >>> for e in b.check_line_numbers(): print(e)
    ?UNDEFINED STATEMENT "70" IN: 60 ONAGOTO10,20,70

    >>> b = BasicProgram('418 IF IR%>=92 THEN ON IR%-91 GOTO 361,311,321,331')
    >>> print(b.lines[0].commands[0].__class__.__name__)
    IfThen
    >>> len([e for e in b.check_computed_jumps()])
    0

    Symbolic constants defined within the program can be collected
    and expanded.

    >>> b = BasicProgram('[value]=10\n'
    ...                  '[xyz]=PRINT\n'
    ...                  '10 FORI=1TO[value]:[xyz]I:NEXT\n')
    >>> b.dump()
    [value]=10
    [xyz]=PRINT
    10 FORI=1TO[value]:[xyz]I:NEXT
    >>> d = b.collect_symbols()
    >>> sorted(d.keys())
    ['value', 'xyz']
    >>> d['value']
    '10'
    >>> d['xyz']
    'PRINT'
    >>> b.dump()
    10 FORI=1TO[value]:[xyz]I:NEXT
    >>> b.expand_symbols(d)
    >>> b.dump()
    10 FORI=1TO10:PRINTI:NEXT

    """
    def __init__(self, text=None):
        self.lines = []
        if text is not None:
            text_file_line = 1
            for line in text.rstrip('\r\n').split('\n'):
                self.add_line(line, text_file_line)
                text_file_line += 1

    def add_line(self, line, text_file_line):
        self.lines.append(BasicLine(line, text_file_line))

    def check_ascending(self):
        errors = []
        last_line_number = None
        for line in self.lines:
            if line.line_number is not None:
                if last_line_number is not None:
                    if line.line_number.number <= last_line_number.number:
                        errors.append(OutOfSequence(line, line.line_number))
                last_line_number = line.line_number
        return errors

    def check_line_numbers(self):
        referenced = {}
        defined = {}
        errors = []
        text_file_line = 1
        for line in self.lines:
            if line.line_number is not None:
                location = line.line_number.number
            else:
                location = "IMMEDIATE MODE (line %d)" % line.text_file_line
            defined[location] = line
            referenced[location] = line.referenced_line_numbers()
            text_file_line += 1
        for location in sorted(referenced.keys(), key=lambda x: x if re.match(r'^\d+$', str(x)) else 0):
            referenced_line_numbers = referenced[location]
            for referenced_line_number in referenced_line_numbers:
                if referenced_line_number.is_computed():
                    continue
                if referenced_line_number.number not in defined:
                    errors.append(
                        UndefinedStatement(defined[location], referenced_line_number)
                    )
        return errors

    def check_computed_jumps(self):
        errors = []
        for line in self.lines:
            for line_number in line.referenced_line_numbers():
                if line_number.is_computed():
                    errors.append(ComputedJump(line, line_number))
        return errors

    def strip_immediate_mode_commands(self):
        new_lines = []
        for line in self.lines:
            if line.line_number is not None:
                new_lines.append(line)
        self.lines = new_lines

    def strip_remarks(self, program_lines_only=False):
        new_lines = []
        for line in self.lines:
            if program_lines_only and line.line_number is None:
                new_lines.append(line)
                continue
            new_line = line.strip_remarks()
            if new_line is not None:
                new_lines.append(new_line)
        self.lines = new_lines

    def collect_symbols(self):
        symbols = {}
        new_lines = []
        for line in self.lines:
            match = re.match(r'^\[(.*?)\]=(.*?)$', line.text)
            if match:
                symbols[match.group(1)] = match.group(2)
            else:
                new_lines.append(line)
        self.lines = new_lines
        return symbols

    def expand_symbols(self, symbols):
        new_lines = []
        for line in self.lines:
            text = line.text
            text_file_line = line.text_file_line
            for symbol in symbols:
                pattern = re.escape('[%s]' % symbol)
                text = re.sub(pattern, symbols[symbol], text)
            new_line = BasicLine(text, text_file_line)
            new_lines.append(new_line)
        self.lines = new_lines

    def dump(self):
        for line in self.lines:
            print(line)


def main():
    parser = OptionParser()
    parser.add_option("-A", "--no-check-ascending",
                      dest="check_ascending",
                      default=True,
                      action="store_false",
                      help="do not check that line numbers are given in "
                           "strictly ascending order")
    parser.add_option("-C", "--allow-computed-jumps",
                      dest="allow_computed_jumps",
                      default=False,
                      action="store_true",
                      help="acknowledge that the program contains computed "
                           "GOTOs/GOSUBs, which cannot be analyzed by yucca; "
                           "without this flag, any occurrence of a computed "
                           "jump will be rejected as an error")
    parser.add_option("-I", "--strip-immediate-mode",
                      dest="strip_immediate_mode",
                      action="store_true",
                      help="strip immediate mode commands (implies -o)")
    parser.add_option("-L", "--no-check-line-numbers",
                      dest="check_line_numbers",
                      default=True,
                      action="store_false",
                      help="do not check that all target line numbers exist")
    parser.add_option("-o", "--dump-output",
                      dest="dump_output",
                      action="store_true",
                      help="dump (possibly transformed) program to standard "
                           "output; implied by other options")
    parser.add_option("-p", "--program-lines-only",
                      dest="program_lines_only",
                      action="store_true",
                      help="have transformations only affect program lines, "
                           "not immediate mode lines")
    parser.add_option("-R", "--strip-remarks",
                      dest="strip_remarks",
                      action="store_true",
                      help="strip all REM statements from program (implies -o)")
    parser.add_option("-t", "--test",
                      action="store_true", dest="test", default=False,
                      help="run internal test cases and exit")
    parser.add_option("-x", "--expand-symbols",
                      dest="expand_symbols",
                      action="store_true",
                      help="expand symbolic names defined in the source file "
                           "(implies -o)")

    (options, args) = parser.parse_args()

    if options.test:
        import doctest
        (fails, something) = doctest.testmod(sys.modules[__name__], verbose=True)
        if fails == 0:
            sys.exit(0)
        else:
            sys.exit(1)

    p = BasicProgram()
    text_file_line = 1
    for line in fileinput.input(args):
        p.add_line(line, text_file_line)
        text_file_line += 1

    if options.expand_symbols:
        options.dump_output = True
        symbols = p.collect_symbols()
        p.expand_symbols(symbols)

    if options.strip_immediate_mode:
        options.dump_output = True
        p.strip_immediate_mode_commands()

    if options.strip_remarks:
        options.dump_output = True
        p.strip_remarks(program_lines_only=options.program_lines_only)

    errors = []
    if options.check_ascending:
        errors += p.check_ascending()
    if not options.allow_computed_jumps:
        errors += p.check_computed_jumps()
    if options.check_line_numbers:
        errors += p.check_line_numbers()
    if len(errors) > 0:
        for error in errors:
            sys.stderr.write("%s\n" % error)
        sys.exit(1)

    if options.dump_output:
        p.dump()

    sys.exit(0)