diff --git a/README.md b/README.md index edc4bff..19f81ae 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ `relwrite` ========== -`relwrite` relates strings to string via a grammar in the Chomsky hierarchy. +`relwrite` relates strings to strings via a grammar in the Chomsky hierarchy. What does "relate" mean in this context? @@ -20,7 +20,7 @@ best on small inputs. There are, however, features intended to improve performance in the case of very -long derivations. Specifying a search strategy enables a **beam search** algorithm +long derivations. Search strategies can be used to enable a **beam search** algorithm which aggressively focuses on derivations with a desired propery, e.g. a particular minimum length. This does sacrifice completeness however -- only a handful of all the possible results will be returned. @@ -30,33 +30,47 @@ ### Example usage -Generate a string from a non-terminal in a grammar: +Generate a string from a starting non-terminal in a grammar: ``` -./bin/relwrite eg/recursive-grammar.json --start "<Sentence>" --max-derivations=1 +./bin/relwrite complete eg/recursive-grammar.json \ + --start "<Sentence>" --max-derivations=1 ``` Parse a string w.r.t. a grammar: ``` -./bin/relwrite eg/recursive-grammar.json --parse --start "a penguin sees a penguin then sees a dog" +./bin/relwrite complete eg/recursive-grammar.json \ + --parse --start "a penguin sees a penguin then sees a dog" \ + --goal "<Sentence>" ``` -Generate a really long string from a non-terminal in a grammar, without running out -of memory and only taking a few hours of processor time: +Use the `complete` strategy to generate all possible strings from a +starting non-terminal in a grammar. NOTE that this can use unreasonable +amounts of resources, with possibly adverse effects on your system. ``` -./bin/relwrite eg/recursive-grammar.json --start "<Sentence>" \ - --max-derivations=1 --strategy=expand --expand-until=3000 \ +./bin/relwrite complete eg/sample-grammar.json --start "<Sentence>" +``` + +Use the `expand` strategy to generate a really long string from a non-terminal +in a grammar, without running out of memory and only taking a few hours of +processor time: + +``` +./bin/relwrite expand eg/recursive-grammar.json \ + --start "<Sentence>" --max-derivations=1 --expand-until=3000 \ --output-file=out.json ``` -Parse a really long string from a non-terminal in a grammar, without running out -of memory and only taking a few hours of processor time. This assumes the string -to be parsed is in JSON format in the file `xyz.json`. +Use the `contract` strategy to parse a really long string from a non-terminal +in a grammar, without running out of memory and only taking a few hours of +processor time. This assumes the string to be parsed is in JSON format in +the file `out.json` -- the generation example above would produce this. ``` -./bin/relwrite eg/recursive-grammar.json --parse --start-set-file=xyz.json --max-derivations=1 --strategy=contract +./bin/relwrite contract eg/recursive-grammar.json \ + --parse --start-set-file=out.json ``` ### Detailed usage @@ -69,11 +83,7 @@ `relwrite` uses the term "derivation" as a generic term meaning "a parse or a generated utterance". It also uses the term "utterance" to mean "any string of terminals and non-terminals". -### TODO (immediate) - -* Turn `complete` into a strategy that must be explicitly selected. - -### TODO (aspirational) +### TODO Analyze the input grammar and classify it in the Chomsky hierarchy. diff --git a/src/relwrite/engine.py b/src/relwrite/engine.py index dc92149..ae256c2 100644 --- a/src/relwrite/engine.py +++ b/src/relwrite/engine.py @@ -41,11 +41,11 @@ def derive( rules, working_utterances, + strategy, max_derivations=None, max_matches=None, verbose=False, save_snapshots_every=None, - strategy=None, expand_until=None, beam_width=10 ): @@ -55,6 +55,7 @@ iter = 0 scoring_functions = { + 'complete': None, 'expand': lambda u: 0 - len(u), 'contract': lambda u: len(u), 'minimize-nonterminals': lambda u: sum(map(lambda s: s.startswith('<'), u)), @@ -85,8 +86,9 @@ working_utterances, final_utterances = generate(rules, working_utterances, max_matches=max_matches) # beam search: sort by score and trim before continuing - if strategy: - working_utterances = sorted(working_utterances, key=scoring_functions[strategy])[:beam_width] + scoring_function = scoring_functions[strategy] + if scoring_function: + working_utterances = sorted(working_utterances, key=scoring_function)[:beam_width] for utterance in final_utterances: collected_utterances.append(utterance) diff --git a/src/relwrite/main.py b/src/relwrite/main.py index 2fd9100..ba3e558 100644 --- a/src/relwrite/main.py +++ b/src/relwrite/main.py @@ -8,7 +8,19 @@ def main(args): argparser = ArgumentParser() - # NOTE: these options are provisional and will change + # NOTE: these options are somewhat provisional and may change + + # Strategy + + argparser.add_argument( + "strategy", metavar='STRATEGY', type=str, + help="apply this strategy to the search; must be one of " + "`complete`, `expand`, or `contract`. NOTE: while " + "`complete` will find all parses, it will also use " + "the most resources, with possibly adverse effects " + "on your system; the other strategies use " + "beam search to avoid this" + ) # Input/output specifying parameters @@ -26,43 +38,38 @@ argparser.add_argument( "--parse", action="store_true", default=False, - help="Process rules from right to left" + help="process rules from right to left" ) argparser.add_argument( "--start", metavar='UTTERANCE', type=str, default=None, - help="A single utterance to use as " + help="a single utterance to use as " "the starting point of the derivation" ) argparser.add_argument( "--start-set-file", metavar='FILENAME', type=str, default=None, - help="Use the set of utterances in this JSON file as " + help="use the set of utterances in this JSON file as " "the starting point of the derivation" ) argparser.add_argument( "--goal", metavar='UTTERANCE', type=str, default=None, - help="A single utterance; if given, the processor expects it " + help="a single utterance; if given, the processor expects it " "to be the final result of the derivation; if it is not, " "exits with a non-zero error code" ) argparser.add_argument( "--max-derivations", metavar='COUNT', type=int, default=None, - help="The maximum number of derivations to produce " + help="the maximum number of derivations to produce " "(default: no limit)" ) argparser.add_argument( "--max-rewrites-per-utterance", metavar='COUNT', type=int, default=None, - help="If given, limits the number of times a pattern can rewrite " + help="if given, limits the number of times a pattern can rewrite " "any particular utterance during a single sweep " "(default: no limit, unless beam search is applied, in which case 10)" ) - argparser.add_argument( - "--strategy", metavar='STRATEGY', type=str, default=None, - help="Will apply a particular strategy (`expand` or `contract`) " - "under beam search" - ) argparser.add_argument( "--beam-width", metavar='SIZE', type=int, default=10, help="When traversing with a strategy, specify the beam width " @@ -108,17 +115,17 @@ working_utterances = [] max_matches = options.max_rewrites_per_utterance - if options.strategy: + if options.strategy != 'complete': max_matches = max_matches or 10 result = derive( rules, working_utterances, + options.strategy, max_derivations=options.max_derivations, max_matches=max_matches, verbose=options.verbose, save_snapshots_every=options.save_snapshots_every, - strategy=options.strategy, expand_until=options.expand_until, beam_width=options.beam_width, )