Checkpoint code for adding secondary-topic resources to READMEs.
Chris Pressey
8 months ago
8 | 8 | import re |
9 | 9 | import os |
10 | 10 | import subprocess |
11 | from collections import defaultdict | |
11 | 12 | |
12 | 13 | from feedmark.loader import read_document_from |
13 | 14 | |
97 | 98 | ) |
98 | 99 | |
99 | 100 | |
100 | def write_readme_file(base_dir, topic, topic_section, entries): | |
101 | ||
101 | def write_readme_file(base_dir, topic, topic_section, entries, secondary_entries=None): | |
102 | 102 | webpages = entries["webpages"] |
103 | 103 | repos = entries["repos"] |
104 | 104 | books = entries["books"] |
105 | 105 | papers = entries["papers"] |
106 | ||
107 | sec_webpages, sec_repos, sec_books, sec_papers = secondary_entries or (None, None, None, None) | |
106 | 108 | |
107 | 109 | title = topic_section["properties"].get("title", topic) |
108 | 110 | see_also = topic_section["properties"].get("see-also", "") |
125 | 127 | if webpages: |
126 | 128 | f.write("\n### Web resources\n\n") |
127 | 129 | for i, webpage in enumerate(webpages): |
130 | # FIXME: heed is-heading? | |
128 | 131 | f.write("{}\n".format(format_webpage(webpage))) |
129 | 132 | f.write("\n" if i < len(webpages) - 1 else "") |
130 | 133 | |
145 | 148 | for i, book in enumerate(books): |
146 | 149 | f.write("{}\n".format(format_book(book))) |
147 | 150 | f.write("\n" if i < len(books) - 1 else "") |
151 | ||
152 | # Secondary entries in their own sections | |
153 | if any([sec_webpages, sec_repos, sec_books, sec_papers]): | |
154 | f.write("\n## Related entries from other topics\n\n") | |
155 | ||
156 | if sec_webpages: | |
157 | f.write("\n### Related web resources\n\n") | |
158 | for source_topic, entries in sec_webpages.items(): | |
159 | for entry in entries: | |
160 | if not entry["properties"].get("is-heading"): | |
161 | f.write("{}\n\n".format(format_webpage(entry, source_topic))) | |
162 | ||
163 | if sec_repos: | |
164 | f.write("\n### Related repositories\n\n") | |
165 | for source_topic, entries in sec_repos.items(): | |
166 | for entry in entries: | |
167 | f.write("{}\n\n".format(format_repo(entry, source_topic))) | |
168 | ||
169 | if sec_papers: | |
170 | f.write("\n### Related papers\n\n") | |
171 | for source_topic, entries in sec_papers.items(): | |
172 | for entry in entries: | |
173 | f.write("{}\n\n".format(format_paper(entry, source_topic))) | |
174 | ||
175 | if sec_books: | |
176 | f.write("\n### Related books\n\n") | |
177 | for source_topic, entries in sec_books.items(): | |
178 | for entry in entries: | |
179 | f.write("{}\n\n".format(format_book(entry, source_topic))) | |
148 | 180 | |
149 | 181 | |
150 | 182 | class Collector: |
159 | 191 | self.topic_dirs = set([f for f in os.listdir(os.path.join(self.base_dir, "by-topic")) if self.is_bookmark_dir(f)]) |
160 | 192 | self.seen_dirs = set() |
161 | 193 | self.counts = {"webpages": 0, "repos": 0, "books": 0, "papers": 0} |
194 | self.secondary_webpages = defaultdict(lambda: defaultdict(list)) | |
195 | self.secondary_repos = defaultdict(lambda: defaultdict(list)) | |
196 | self.secondary_books = defaultdict(lambda: defaultdict(list)) | |
197 | self.secondary_papers = defaultdict(lambda: defaultdict(list)) | |
162 | 198 | |
163 | 199 | def is_bookmark_dir(self, dir_name): |
164 | 200 | return any([ |
185 | 221 | self.papers[topic] = self.load_feedmark_sections(topic, "Papers.md") |
186 | 222 | self.counts["papers"] += len(self.papers[topic]) |
187 | 223 | |
224 | def process_secondary_topics(self): | |
225 | """Process entries that belong to multiple topics.""" | |
226 | for main_topic in self.topic_dirs: | |
227 | # Process each type of entry | |
228 | for entry in self.webpages.get(main_topic, []): | |
229 | self._process_secondary_entry(entry, main_topic, self.secondary_webpages) | |
230 | for entry in self.repos.get(main_topic, []): | |
231 | self._process_secondary_entry(entry, main_topic, self.secondary_repos) | |
232 | for entry in self.books.get(main_topic, []): | |
233 | self._process_secondary_entry(entry, main_topic, self.secondary_books) | |
234 | for entry in self.papers.get(main_topic, []): | |
235 | self._process_secondary_entry(entry, main_topic, self.secondary_papers) | |
236 | ||
237 | def _process_secondary_entry(self, entry, main_topic, secondary_dict): | |
238 | """Helper method to process an entry for secondary topics.""" | |
239 | if "topics" in entry["properties"]: | |
240 | topics = [t.strip() for t in entry["properties"]["topics"].split(',')] | |
241 | for topic in topics: | |
242 | if topic != main_topic: # Don't add to secondary if it's the main topic | |
243 | secondary_dict[topic][main_topic].append(entry) | |
244 | ||
188 | 245 | def check_entry_topics(self): |
189 | ||
190 | 246 | def check_entry(topic, entry): |
191 | 247 | assert isinstance(topic, str), "{}: {}".format(entry["title"], topic) |
192 | 248 | topics = entry["properties"].get("topics", topic) |
221 | 277 | for item in sections: |
222 | 278 | assert item["title"] not in self.commentary |
223 | 279 | self.commentary[item["title"]] = item |
280 | ||
281 | def get_entries_for_topic(self, topic): | |
282 | """Get both primary and secondary entries for a topic.""" | |
283 | primary = ( | |
284 | self.webpages.get(topic, []), | |
285 | self.repos.get(topic, []), | |
286 | self.books.get(topic, []), | |
287 | self.papers.get(topic, []) | |
288 | ) | |
289 | ||
290 | secondary = ( | |
291 | self.secondary_webpages.get(topic, {}), | |
292 | self.secondary_repos.get(topic, {}), | |
293 | self.secondary_books.get(topic, {}), | |
294 | self.secondary_papers.get(topic, {}) | |
295 | ) | |
296 | ||
297 | return primary, secondary | |
224 | 298 | |
225 | 299 | |
226 | 300 | def dump_at_rating(f, c, entries, target_rating, formatter): |