Commit 3eedf84c8d2bdbc08616b40b85daf2e3ae404fe7 - relwrite

Introduce new strategy. Name strategies consistently. Chris Pressey 2 years ago

3 changed file(s) with 29 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all

-3

README.md less more

45	45	of memory and only taking a few hours of processor time:
46	46
47	47	```
48		./bin/relwrite eg/recursive-grammar.json --start "<Sentence>" --max-derivations=1 --strategy=expansion --expand-until=3000
	48	./bin/relwrite eg/recursive-grammar.json --start "<Sentence>" --max-derivations=1 --strategy=expand --expand-until=3000
49	49	```
50	50
51	51	Parse a really long string from a non-terminal in a grammar, without running out

53	53	to be parsed is in JSON format in the file `xyz.json`.
54	54
55	55	```
56		./bin/relwrite eg/recursive-grammar.json --parse --start-set-file=xyz.json --max-derivations=1 --strategy=contraction
	56	./bin/relwrite eg/recursive-grammar.json --parse --start-set-file=xyz.json --max-derivations=1 --strategy=contract
57	57	```
58	58
59	59	### Detailed usage

69	69	### TODO
70	70
71	71	* specify output filename
72		* try heuristic for contraction phase: highest proportion of terminals
73	72	* `--goal` to assert that a particular final utterance appears

+15

-10

src/relwrite/engine.py less more

41	41	final_utterances = None
42	42	collected_utterances = []
43	43	num_derivations = 0
	44	iter = 0
44	45
45		def score_expansion(u):
46		return 0 - len(u)
47
48		def score_contraction(u):
49		return len(u)
	46	scoring_functions = {
	47	'expand': lambda u: 0 - len(u),
	48	'contract': lambda u: len(u),
	49	'minimize-nonterminals': lambda u: sum(map(lambda s: s.startswith('<'), u)),
	50	}
50	51
51	52	while working_utterances:
52
	53	iter += 1
	54	# if verbose: # TODO: actually this should be "if display_snapshots", or something
	55	# if iter % 100 == 0:
	56	# for i, wu in enumerate(working_utterances):
	57	# print(i, ' '.join(wu))
53	58	length = len(working_utterances)
54	59	lengths = [len(u) for u in working_utterances]
55	60	min_length = min(lengths)

57	62	print('{} working utterances, min length = {}'.format(
58	63	length, min_length
59	64	))
60		if strategy == 'expansion' and min_length >= (expand_until or 0):
	65	if strategy == 'expand' and min_length >= (expand_until or 0):
61	66	if verbose:
62	67	print('Reached {} threshold'.format(expand_until))
63		strategy = 'contraction'
	68	# TODO: make it configurable, which strategy to switch to here?
	69	strategy = 'minimize-nonterminals'
64	70
65	71	working_utterances, final_utterances = generate(rules, working_utterances, max_matches=max_matches)
66	72
67	73	# beam search: sort by score and trim before continuing
68	74	if strategy:
69		scoring_function = score_contraction if strategy == 'contraction' else score_expansion
70		working_utterances = sorted(working_utterances, key=scoring_function)[:beam_width]
	75	working_utterances = sorted(working_utterances, key=scoring_functions[strategy])[:beam_width]
71	76
72	77	for utterance in final_utterances:
73	78	print(' '.join(utterance))

+12

-14

src/relwrite/main.py less more

32	32	)
33	33
34	34	argparser.add_argument(
	35	"--max-derivations", metavar='COUNT', type=int, default=None,
	36	help="The maximum number of derivations to produce "
	37	"(default: no limit)"
	38	)
	39	argparser.add_argument(
35	40	"--max-rewrites-per-utterance", metavar='COUNT', type=int, default=None,
36	41	help="If given, limits the number of times a pattern can rewrite "
37	42	"any particular utterance during a single sweep "
38	43	"(default: no limit, unless beam search is applied, in which case 10)"
39	44	)
	45
40	46	argparser.add_argument(
41		"--max-derivations", metavar='COUNT', type=int, default=None,
42		help="The maximum number of derivations to produce "
43		"(default: no limit)"
44		)
45		argparser.add_argument(
46		"--expand-until", metavar='SIZE', type=int, default=None,
47		help="Implies the `expansion` strategy. Specifies that the "
48		"resulting derivations must be at least this long"
	47	"--strategy", metavar='STRATEGY', type=str, default=None,
	48	help="Will apply a particular strategy (`expand` or `contract`) "
	49	"under beam search"
49	50	)
50	51	argparser.add_argument(
51	52	"--beam-width", metavar='SIZE', type=int, default=10,
52	53	help="When traversing with a strategy, specify the beam width "
53	54	"for the beam search"
54	55	)
55
56	56	argparser.add_argument(
57		"--strategy", metavar='STRATEGY', type=str, default=None,
58		help="Will apply a particular strategy (`expansion` or `contraction`) "
59		"under beam search"
	57	"--expand-until", metavar='SIZE', type=int, default=None,
	58	help="When using the `expand` strategy, specifies that the "
	59	"resulting derivations must be at least this long"
60	60	)
61	61
62	62	options = argparser.parse_args(args)

79	79	print("No start set given, please supply --start or --start-set-file")
80	80	working_utterances = []
81	81
82		if options.expand_until:
83		options.strategy = 'expansion'
84	82	max_matches = options.max_rewrites_per_utterance
85	83	if options.strategy:
86	84	max_matches = max_matches or 10