Introduce new strategy. Name strategies consistently.
Chris Pressey
2 years ago
45 | 45 | of memory and only taking a few hours of processor time: |
46 | 46 | |
47 | 47 | ``` |
48 | ./bin/relwrite eg/recursive-grammar.json --start "<Sentence>" --max-derivations=1 --strategy=expansion --expand-until=3000 | |
48 | ./bin/relwrite eg/recursive-grammar.json --start "<Sentence>" --max-derivations=1 --strategy=expand --expand-until=3000 | |
49 | 49 | ``` |
50 | 50 | |
51 | 51 | Parse a really long string from a non-terminal in a grammar, without running out |
53 | 53 | to be parsed is in JSON format in the file `xyz.json`. |
54 | 54 | |
55 | 55 | ``` |
56 | ./bin/relwrite eg/recursive-grammar.json --parse --start-set-file=xyz.json --max-derivations=1 --strategy=contraction | |
56 | ./bin/relwrite eg/recursive-grammar.json --parse --start-set-file=xyz.json --max-derivations=1 --strategy=contract | |
57 | 57 | ``` |
58 | 58 | |
59 | 59 | ### Detailed usage |
69 | 69 | ### TODO |
70 | 70 | |
71 | 71 | * specify output filename |
72 | * try heuristic for contraction phase: highest proportion of terminals | |
73 | 72 | * `--goal` to assert that a particular final utterance appears |
41 | 41 | final_utterances = None |
42 | 42 | collected_utterances = [] |
43 | 43 | num_derivations = 0 |
44 | iter = 0 | |
44 | 45 | |
45 | def score_expansion(u): | |
46 | return 0 - len(u) | |
47 | ||
48 | def score_contraction(u): | |
49 | return len(u) | |
46 | scoring_functions = { | |
47 | 'expand': lambda u: 0 - len(u), | |
48 | 'contract': lambda u: len(u), | |
49 | 'minimize-nonterminals': lambda u: sum(map(lambda s: s.startswith('<'), u)), | |
50 | } | |
50 | 51 | |
51 | 52 | while working_utterances: |
52 | ||
53 | iter += 1 | |
54 | # if verbose: # TODO: actually this should be "if display_snapshots", or something | |
55 | # if iter % 100 == 0: | |
56 | # for i, wu in enumerate(working_utterances): | |
57 | # print(i, ' '.join(wu)) | |
53 | 58 | length = len(working_utterances) |
54 | 59 | lengths = [len(u) for u in working_utterances] |
55 | 60 | min_length = min(lengths) |
57 | 62 | print('{} working utterances, min length = {}'.format( |
58 | 63 | length, min_length |
59 | 64 | )) |
60 | if strategy == 'expansion' and min_length >= (expand_until or 0): | |
65 | if strategy == 'expand' and min_length >= (expand_until or 0): | |
61 | 66 | if verbose: |
62 | 67 | print('Reached {} threshold'.format(expand_until)) |
63 | strategy = 'contraction' | |
68 | # TODO: make it configurable, which strategy to switch to here? | |
69 | strategy = 'minimize-nonterminals' | |
64 | 70 | |
65 | 71 | working_utterances, final_utterances = generate(rules, working_utterances, max_matches=max_matches) |
66 | 72 | |
67 | 73 | # beam search: sort by score and trim before continuing |
68 | 74 | if strategy: |
69 | scoring_function = score_contraction if strategy == 'contraction' else score_expansion | |
70 | working_utterances = sorted(working_utterances, key=scoring_function)[:beam_width] | |
75 | working_utterances = sorted(working_utterances, key=scoring_functions[strategy])[:beam_width] | |
71 | 76 | |
72 | 77 | for utterance in final_utterances: |
73 | 78 | print(' '.join(utterance)) |
32 | 32 | ) |
33 | 33 | |
34 | 34 | argparser.add_argument( |
35 | "--max-derivations", metavar='COUNT', type=int, default=None, | |
36 | help="The maximum number of derivations to produce " | |
37 | "(default: no limit)" | |
38 | ) | |
39 | argparser.add_argument( | |
35 | 40 | "--max-rewrites-per-utterance", metavar='COUNT', type=int, default=None, |
36 | 41 | help="If given, limits the number of times a pattern can rewrite " |
37 | 42 | "any particular utterance during a single sweep " |
38 | 43 | "(default: no limit, unless beam search is applied, in which case 10)" |
39 | 44 | ) |
45 | ||
40 | 46 | argparser.add_argument( |
41 | "--max-derivations", metavar='COUNT', type=int, default=None, | |
42 | help="The maximum number of derivations to produce " | |
43 | "(default: no limit)" | |
44 | ) | |
45 | argparser.add_argument( | |
46 | "--expand-until", metavar='SIZE', type=int, default=None, | |
47 | help="Implies the `expansion` strategy. Specifies that the " | |
48 | "resulting derivations must be at least this long" | |
47 | "--strategy", metavar='STRATEGY', type=str, default=None, | |
48 | help="Will apply a particular strategy (`expand` or `contract`) " | |
49 | "under beam search" | |
49 | 50 | ) |
50 | 51 | argparser.add_argument( |
51 | 52 | "--beam-width", metavar='SIZE', type=int, default=10, |
52 | 53 | help="When traversing with a strategy, specify the beam width " |
53 | 54 | "for the beam search" |
54 | 55 | ) |
55 | ||
56 | 56 | argparser.add_argument( |
57 | "--strategy", metavar='STRATEGY', type=str, default=None, | |
58 | help="Will apply a particular strategy (`expansion` or `contraction`) " | |
59 | "under beam search" | |
57 | "--expand-until", metavar='SIZE', type=int, default=None, | |
58 | help="When using the `expand` strategy, specifies that the " | |
59 | "resulting derivations must be at least this long" | |
60 | 60 | ) |
61 | 61 | |
62 | 62 | options = argparser.parse_args(args) |
79 | 79 | print("No start set given, please supply --start or --start-set-file") |
80 | 80 | working_utterances = [] |
81 | 81 | |
82 | if options.expand_until: | |
83 | options.strategy = 'expansion' | |
84 | 82 | max_matches = options.max_rewrites_per_utterance |
85 | 83 | if options.strategy: |
86 | 84 | max_matches = max_matches or 10 |