git @ Cat's Eye Technologies Space-Madness / master bin / deposit
master

Tree @master (Download .tar.gz)

deposit @masterraw · history · blame

#!/usr/bin/env python

"""
Transfer files from incoming despository to canonical depository.

Usage: deposit canonical-dir incoming-dir

For each file found in incoming-dir, check if it exists in corresponding place
in canonical-dir.  If not, copy it over.

"""

import errno    
import hashlib
import os
import re
from shutil import copy2
import sys
from optparse import OptionParser


def md5(filename):
    """Compute and return the MD5 hash of the named file.

    """
    hash = hashlib.md5()
    file = open(filename, "r")
    eof = False
    while not eof:
        data = file.read(1024)
        if data:
            hash.update(data)
        else:
            eof = True
    file.close()
    return hash.hexdigest()


def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise


### MAIN ###

def main(argv):
    parser = OptionParser()
    parser.add_option("--clean", default=False, action='store_true',
                      help="delete files once copied over")
    parser.add_option("--dry-run", default=False, action='store_true',
                      help="don't actually modify the filesystem")

    (options, args) = parser.parse_args()

    canonical_dir = os.path.realpath(args[0])
    incoming_dir = os.path.realpath(args[1])

    os.chdir(incoming_dir)

    num_copied = 0

    for root, dirs, files in os.walk("."):
        for filename in files:
            incoming_file = os.path.normpath(os.path.join(root, filename))
            canonical_file = os.path.normpath(os.path.join(canonical_dir, incoming_file))
            if os.path.exists(canonical_file):
                incoming_md5 = md5(incoming_file)
                canonical_md5 = md5(canonical_file)
                if incoming_md5 != canonical_md5:
                    raise EnvironmentError("MD5 mismatch on '%s' - it might need to be renamed" % incoming_file)
                print "*** %s exists, skipping" % canonical_file
            else:
                if options.dry_run:
                    print "copying %s -> %s (DRY RUN)" % (incoming_file, canonical_file)
                    continue
                print "copying %s -> %s" % (incoming_file, canonical_file)
                mkdir_p(os.path.dirname(canonical_file))
                copy2(incoming_file, canonical_file)
                num_copied += 1

    print "%s files copied" % num_copied
    if options.clean:
        if options.dry_run or num_copied != 0:
            raise EnvironmentError("Can only clean on a non-copy pass (run script again to clean)")
        for root, dirs, files in os.walk("."):
            for filename in files:
                incoming_file = os.path.normpath(os.path.join(root, filename))
                print "deleting %s" % incoming_file
                os.unlink(incoming_file)


if __name__ == '__main__':
    main(sys.argv)