git @ Cat's Eye Technologies relwrite / 3eedf84
Introduce new strategy. Name strategies consistently. Chris Pressey 2 years ago
3 changed file(s) with 29 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
4545 of memory and only taking a few hours of processor time:
4646
4747 ```
48 ./bin/relwrite eg/recursive-grammar.json --start "<Sentence>" --max-derivations=1 --strategy=expansion --expand-until=3000
48 ./bin/relwrite eg/recursive-grammar.json --start "<Sentence>" --max-derivations=1 --strategy=expand --expand-until=3000
4949 ```
5050
5151 Parse a really long string from a non-terminal in a grammar, without running out
5353 to be parsed is in JSON format in the file `xyz.json`.
5454
5555 ```
56 ./bin/relwrite eg/recursive-grammar.json --parse --start-set-file=xyz.json --max-derivations=1 --strategy=contraction
56 ./bin/relwrite eg/recursive-grammar.json --parse --start-set-file=xyz.json --max-derivations=1 --strategy=contract
5757 ```
5858
5959 ### Detailed usage
6969 ### TODO
7070
7171 * specify output filename
72 * try heuristic for contraction phase: highest proportion of terminals
7372 * `--goal` to assert that a particular final utterance appears
4141 final_utterances = None
4242 collected_utterances = []
4343 num_derivations = 0
44 iter = 0
4445
45 def score_expansion(u):
46 return 0 - len(u)
47
48 def score_contraction(u):
49 return len(u)
46 scoring_functions = {
47 'expand': lambda u: 0 - len(u),
48 'contract': lambda u: len(u),
49 'minimize-nonterminals': lambda u: sum(map(lambda s: s.startswith('<'), u)),
50 }
5051
5152 while working_utterances:
52
53 iter += 1
54 # if verbose: # TODO: actually this should be "if display_snapshots", or something
55 # if iter % 100 == 0:
56 # for i, wu in enumerate(working_utterances):
57 # print(i, ' '.join(wu))
5358 length = len(working_utterances)
5459 lengths = [len(u) for u in working_utterances]
5560 min_length = min(lengths)
5762 print('{} working utterances, min length = {}'.format(
5863 length, min_length
5964 ))
60 if strategy == 'expansion' and min_length >= (expand_until or 0):
65 if strategy == 'expand' and min_length >= (expand_until or 0):
6166 if verbose:
6267 print('Reached {} threshold'.format(expand_until))
63 strategy = 'contraction'
68 # TODO: make it configurable, which strategy to switch to here?
69 strategy = 'minimize-nonterminals'
6470
6571 working_utterances, final_utterances = generate(rules, working_utterances, max_matches=max_matches)
6672
6773 # beam search: sort by score and trim before continuing
6874 if strategy:
69 scoring_function = score_contraction if strategy == 'contraction' else score_expansion
70 working_utterances = sorted(working_utterances, key=scoring_function)[:beam_width]
75 working_utterances = sorted(working_utterances, key=scoring_functions[strategy])[:beam_width]
7176
7277 for utterance in final_utterances:
7378 print(' '.join(utterance))
3232 )
3333
3434 argparser.add_argument(
35 "--max-derivations", metavar='COUNT', type=int, default=None,
36 help="The maximum number of derivations to produce "
37 "(default: no limit)"
38 )
39 argparser.add_argument(
3540 "--max-rewrites-per-utterance", metavar='COUNT', type=int, default=None,
3641 help="If given, limits the number of times a pattern can rewrite "
3742 "any particular utterance during a single sweep "
3843 "(default: no limit, unless beam search is applied, in which case 10)"
3944 )
45
4046 argparser.add_argument(
41 "--max-derivations", metavar='COUNT', type=int, default=None,
42 help="The maximum number of derivations to produce "
43 "(default: no limit)"
44 )
45 argparser.add_argument(
46 "--expand-until", metavar='SIZE', type=int, default=None,
47 help="Implies the `expansion` strategy. Specifies that the "
48 "resulting derivations must be at least this long"
47 "--strategy", metavar='STRATEGY', type=str, default=None,
48 help="Will apply a particular strategy (`expand` or `contract`) "
49 "under beam search"
4950 )
5051 argparser.add_argument(
5152 "--beam-width", metavar='SIZE', type=int, default=10,
5253 help="When traversing with a strategy, specify the beam width "
5354 "for the beam search"
5455 )
55
5656 argparser.add_argument(
57 "--strategy", metavar='STRATEGY', type=str, default=None,
58 help="Will apply a particular strategy (`expansion` or `contraction`) "
59 "under beam search"
57 "--expand-until", metavar='SIZE', type=int, default=None,
58 help="When using the `expand` strategy, specifies that the "
59 "resulting derivations must be at least this long"
6060 )
6161
6262 options = argparser.parse_args(args)
7979 print("No start set given, please supply --start or --start-set-file")
8080 working_utterances = []
8181
82 if options.expand_until:
83 options.strategy = 'expansion'
8482 max_matches = options.max_rewrites_per_utterance
8583 if options.strategy:
8684 max_matches = max_matches or 10