Downscale illustrations and float them or centre them.
Chris Pressey
10 years ago
0 | 0 | fetch-chronam |
1 | 1 | ============= |
2 | ||
3 | _This is not an experiment. It is a piece of lab equipment._ | |
4 | ||
5 | Abstract | |
6 | -------- | |
7 | ||
8 | This tool provides a command-line front-end for | |
9 | ||
10 | * searching the chroniclingameria API for a keyword | |
11 | * downloading _n_ scanned images from the search results | |
12 | * converting them from JPEG-2000 to PNG format | |
13 | ||
14 | This code was originally in [naive-cut-up](../naive-cut-up/), from whence it | |
15 | was extracted. | |
2 | 16 | |
3 | 17 | Requirements |
4 | 18 | ------------ |
7 | 21 | * [requests](http://docs.python-requests.org/) |
8 | 22 | * [chroniclingamerica.py](https://github.com/hugovk/chroniclingamerica.py) |
9 | 23 | * ImageMagick |
10 | ||
11 | Basic Strategy | |
12 | -------------- | |
13 | ||
14 | * Just provide a command-line front-end for | |
15 | * searching the chroniclingameria API for a keyword | |
16 | * downloading _n_ scanned images from the search results | |
17 | * converting them from JPEG-2000 to PNG format | |
18 | ||
19 | This code was originally in [naive-cut-up](../naive-cut-up/), from whence it | |
20 | was extracted. | |
21 | 24 | |
22 | 25 | Usage |
23 | 26 | ----- |
13 | 13 | * Python 2.7.6 (probably works with older versions too) |
14 | 14 | * [requests](http://docs.python-requests.org/) |
15 | 15 | * [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/) |
16 | * [Pillow](http://python-pillow.github.io/) (it might work with PIL too) | |
17 | * ImageMagick | |
16 | 18 | * some kind of input text (uses lorem ipsum for now) |
17 | 19 | |
18 | 20 | Method |
22 | 24 | such as `PD_Gutenberg` or `PD-Art_(PD-Japan)`, and write that list of |
23 | 25 | URLs to an index file. |
24 | 26 | * Select _n_ images randomly from that index and download them. |
25 | * (TODO) Inject those images as illustrations in a given text. | |
27 | * Convert them to PNGs and resize any that are wider than 400 pixels | |
28 | downward | |
29 | * Inject those images as illustrations in a given text. | |
26 | 30 | |
27 | 31 | Observations |
28 | 32 | ------------ |
45 | 49 | [...] |
46 | 50 | $ ristretto art/ |
47 | 51 | |
52 | This is all pretty crazy and a piece of lab equipment should really be broken | |
53 | off of it. | |
54 | ||
48 | 55 | TODO |
49 | 56 | ---- |
50 | 57 | |
51 | 58 | Add a flag that looks for the "guaranteed public domain" text on the media |
52 | 59 | page and only downloads if it finds it. |
53 | 60 | |
54 | Resize illustrations used in HTML (more research into using paper sizes | |
55 | in HTML might be necessary) | |
61 | Research paper size specification/usage in CSS3/HTML. Ideally we'd like to | |
62 | be able to specify image sizes in inches assuming a printed page, or smth. |
5 | 5 | |
6 | 6 | from bs4 import BeautifulSoup |
7 | 7 | import requests |
8 | import PIL | |
9 | from PIL import Image | |
8 | 10 | |
9 | 11 | |
10 | 12 | LOREM_IPSUM = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." |
44 | 46 | |
45 | 47 | comply_with_terms_of_use() |
46 | 48 | return True |
49 | ||
50 | ||
51 | def convert_image(filename, max_width=400): | |
52 | new_filename = os.path.join( | |
53 | os.path.dirname(filename), "_converted_" + os.path.basename(filename) + ".png" | |
54 | ) | |
55 | if os.path.exists(new_filename): | |
56 | print "already converted" | |
57 | return new_filename | |
58 | print 'convert {0} {1}'.format(filename, new_filename) | |
59 | exit_code = os.system("convert {0} {1}".format(filename, new_filename)) | |
60 | if exit_code != 0: | |
61 | print "ERROR converting image!" | |
62 | return False | |
63 | ||
64 | image = Image.open(new_filename) | |
65 | print image | |
66 | width = image.size[0] | |
67 | height = image.size[1] | |
68 | if width > max_width: | |
69 | scale = max_width / float(width) | |
70 | new_width = int(width * scale) | |
71 | new_height = int(height * scale) | |
72 | print "resizing to %s x %s" % (new_width, new_height) | |
73 | image = image.resize((new_width, new_height), resample=PIL.Image.ANTIALIAS) | |
74 | image.save(new_filename) | |
75 | ||
76 | return new_filename | |
47 | 77 | |
48 | 78 | |
49 | 79 | def load_index(filename): |
138 | 168 | <title>Lorum Ipsem Shkoo</title> |
139 | 169 | <style> |
140 | 170 | hr { page-break-before: always; } |
171 | .c { width: 100%; text-align: center; } | |
141 | 172 | </style> |
142 | 173 | <body> |
143 | 174 | $ |
147 | 178 | body = '' |
148 | 179 | for x in xrange(0, count): |
149 | 180 | filename = get_random_image(index, dest_dir) |
181 | filename = convert_image(filename) | |
182 | ||
150 | 183 | if x != 0: |
151 | 184 | body += '<hr>' |
152 | 185 | |
153 | 186 | paras = ['<p>' + LOREM_IPSUM + '</p>'] * 4 |
154 | paras.append('<img src="%s">' % filename) | |
187 | align = random.choice(('left', 'right', 'centre')) | |
188 | if align == 'left': | |
189 | paras.append('<img style="float: left" src="%s">' % filename) | |
190 | elif align == 'right': | |
191 | paras.append('<img style="float: right" src="%s">' % filename) | |
192 | elif align == 'centre': | |
193 | paras.append('<div class="c"><img src="%s"></div>' % filename) | |
155 | 194 | random.shuffle(paras) |
156 | 195 | body += ''.join(paras) |
157 | ||
196 | ||
158 | 197 | template = template.replace('$', body) |
159 | 198 | with open('tmp.html', 'w') as f: |
160 | 199 | f.write(template) |