git @ Cat's Eye Technologies NaNoGenLab / master fetch-chronam / fetch-chronam.py
master

Tree @master (Download .tar.gz)

fetch-chronam.py @masterraw · history · blame

#!/usr/bin/env python

import os
import random
import sys

from chroniclingamerica import ChronAm
import requests


def comply_with_terms_of_use():
    # http://www.loc.gov/legal says they reserve the right to block anyone
    # who negatively impacts their service (fair enough) and recommend that
    # consumers make no more than 10 requests per minute.  If my arithmetic
    # is correct, that's one request every 6 seconds.  Considering that
    # we spend a considerable amount of time in `convert` too, this sleep
    # is generous.
    from time import sleep
    sleep(10)


def download(item, local_id):
    url = 'http://chroniclingamerica.loc.gov/%s.jp2' % item['id'][:-1]
    local_filename = 'ca_%s.jp2' % local_id
    print '{0} --> {1}'.format(url, local_filename)
    response = requests.get(url, stream=True)
    if response.ok:
        with open(local_filename, 'w') as f:
            for block in response.iter_content(1024):
                f.write(block)
    new_filename = os.path.splitext(local_filename)[0] + '.png'
    print 'convert {0} {1}'.format(local_filename, new_filename)
    exit_code = os.system("convert {0} {1}".format(local_filename, new_filename))
    if exit_code == 0:
        os.remove(local_filename)
        print 'done!'
    else:
        print "ERROR converting image"


def main(argv):
    amount = int(argv[1])
    fetcher = ChronAm(argv[2])
    for x, item in enumerate(fetcher.fetch()):
        if x == amount:
            break
        download(item, x)
        comply_with_terms_of_use()


if __name__ == '__main__':
    import sys
    main(sys.argv)