Commit f2e9c2dce14c649de59a96367d8f52366954615e - The-Glosscubator

Checkpoint code for adding secondary-topic resources to READMEs. Chris Pressey 8 months ago

1 changed file(s) with 77 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all

+77

-3

script/build_readmes.py less more

8	8	import re
9	9	import os
10	10	import subprocess
	11	from collections import defaultdict
11	12
12	13	from feedmark.loader import read_document_from
13	14

97	98	)
98	99
99	100
100		def write_readme_file(base_dir, topic, topic_section, entries):
101
	101	def write_readme_file(base_dir, topic, topic_section, entries, secondary_entries=None):
102	102	webpages = entries["webpages"]
103	103	repos = entries["repos"]
104	104	books = entries["books"]
105	105	papers = entries["papers"]
	106
	107	sec_webpages, sec_repos, sec_books, sec_papers = secondary_entries or (None, None, None, None)
106	108
107	109	title = topic_section["properties"].get("title", topic)
108	110	see_also = topic_section["properties"].get("see-also", "")

125	127	if webpages:
126	128	f.write("\n### Web resources\n\n")
127	129	for i, webpage in enumerate(webpages):
	130	# FIXME: heed is-heading?
128	131	f.write("{}\n".format(format_webpage(webpage)))
129	132	f.write("\n" if i < len(webpages) - 1 else "")
130	133

145	148	for i, book in enumerate(books):
146	149	f.write("{}\n".format(format_book(book)))
147	150	f.write("\n" if i < len(books) - 1 else "")
	151
	152	# Secondary entries in their own sections
	153	if any([sec_webpages, sec_repos, sec_books, sec_papers]):
	154	f.write("\n## Related entries from other topics\n\n")
	155
	156	if sec_webpages:
	157	f.write("\n### Related web resources\n\n")
	158	for source_topic, entries in sec_webpages.items():
	159	for entry in entries:
	160	if not entry["properties"].get("is-heading"):
	161	f.write("{}\n\n".format(format_webpage(entry, source_topic)))
	162
	163	if sec_repos:
	164	f.write("\n### Related repositories\n\n")
	165	for source_topic, entries in sec_repos.items():
	166	for entry in entries:
	167	f.write("{}\n\n".format(format_repo(entry, source_topic)))
	168
	169	if sec_papers:
	170	f.write("\n### Related papers\n\n")
	171	for source_topic, entries in sec_papers.items():
	172	for entry in entries:
	173	f.write("{}\n\n".format(format_paper(entry, source_topic)))
	174
	175	if sec_books:
	176	f.write("\n### Related books\n\n")
	177	for source_topic, entries in sec_books.items():
	178	for entry in entries:
	179	f.write("{}\n\n".format(format_book(entry, source_topic)))
148	180
149	181
150	182	class Collector:

159	191	self.topic_dirs = set([f for f in os.listdir(os.path.join(self.base_dir, "by-topic")) if self.is_bookmark_dir(f)])
160	192	self.seen_dirs = set()
161	193	self.counts = {"webpages": 0, "repos": 0, "books": 0, "papers": 0}
	194	self.secondary_webpages = defaultdict(lambda: defaultdict(list))
	195	self.secondary_repos = defaultdict(lambda: defaultdict(list))
	196	self.secondary_books = defaultdict(lambda: defaultdict(list))
	197	self.secondary_papers = defaultdict(lambda: defaultdict(list))
162	198
163	199	def is_bookmark_dir(self, dir_name):
164	200	return any([

185	221	self.papers[topic] = self.load_feedmark_sections(topic, "Papers.md")
186	222	self.counts["papers"] += len(self.papers[topic])
187	223
	224	def process_secondary_topics(self):
	225	"""Process entries that belong to multiple topics."""
	226	for main_topic in self.topic_dirs:
	227	# Process each type of entry
	228	for entry in self.webpages.get(main_topic, []):
	229	self._process_secondary_entry(entry, main_topic, self.secondary_webpages)
	230	for entry in self.repos.get(main_topic, []):
	231	self._process_secondary_entry(entry, main_topic, self.secondary_repos)
	232	for entry in self.books.get(main_topic, []):
	233	self._process_secondary_entry(entry, main_topic, self.secondary_books)
	234	for entry in self.papers.get(main_topic, []):
	235	self._process_secondary_entry(entry, main_topic, self.secondary_papers)
	236
	237	def _process_secondary_entry(self, entry, main_topic, secondary_dict):
	238	"""Helper method to process an entry for secondary topics."""
	239	if "topics" in entry["properties"]:
	240	topics = [t.strip() for t in entry["properties"]["topics"].split(',')]
	241	for topic in topics:
	242	if topic != main_topic: # Don't add to secondary if it's the main topic
	243	secondary_dict[topic][main_topic].append(entry)
	244
188	245	def check_entry_topics(self):
189
190	246	def check_entry(topic, entry):
191	247	assert isinstance(topic, str), "{}: {}".format(entry["title"], topic)
192	248	topics = entry["properties"].get("topics", topic)

221	277	for item in sections:
222	278	assert item["title"] not in self.commentary
223	279	self.commentary[item["title"]] = item
	280
	281	def get_entries_for_topic(self, topic):
	282	"""Get both primary and secondary entries for a topic."""
	283	primary = (
	284	self.webpages.get(topic, []),
	285	self.repos.get(topic, []),
	286	self.books.get(topic, []),
	287	self.papers.get(topic, [])
	288	)
	289
	290	secondary = (
	291	self.secondary_webpages.get(topic, {}),
	292	self.secondary_repos.get(topic, {}),
	293	self.secondary_books.get(topic, {}),
	294	self.secondary_papers.get(topic, {})
	295	)
	296
	297	return primary, secondary
224	298
225	299
226	300	def dump_at_rating(f, c, entries, target_rating, formatter):