#!/usr/bin/env python
"""
Transfer files from incoming despository to canonical depository.
Usage: deposit canonical-dir incoming-dir
For each file found in incoming-dir, check if it exists in corresponding place
in canonical-dir. If not, copy it over.
"""
import errno
import hashlib
import os
import re
from shutil import copy2
import sys
from optparse import OptionParser
def md5(filename):
"""Compute and return the MD5 hash of the named file.
"""
hash = hashlib.md5()
file = open(filename, "r")
eof = False
while not eof:
data = file.read(1024)
if data:
hash.update(data)
else:
eof = True
file.close()
return hash.hexdigest()
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
### MAIN ###
def main(argv):
parser = OptionParser()
parser.add_option("--clean", default=False, action='store_true',
help="delete files once copied over")
parser.add_option("--dry-run", default=False, action='store_true',
help="don't actually modify the filesystem")
(options, args) = parser.parse_args()
canonical_dir = os.path.realpath(args[0])
incoming_dir = os.path.realpath(args[1])
os.chdir(incoming_dir)
num_copied = 0
for root, dirs, files in os.walk("."):
for filename in files:
incoming_file = os.path.normpath(os.path.join(root, filename))
canonical_file = os.path.normpath(os.path.join(canonical_dir, incoming_file))
if os.path.exists(canonical_file):
incoming_md5 = md5(incoming_file)
canonical_md5 = md5(canonical_file)
if incoming_md5 != canonical_md5:
raise EnvironmentError("MD5 mismatch on '%s' - it might need to be renamed" % incoming_file)
print "*** %s exists, skipping" % canonical_file
else:
if options.dry_run:
print "copying %s -> %s (DRY RUN)" % (incoming_file, canonical_file)
continue
print "copying %s -> %s" % (incoming_file, canonical_file)
mkdir_p(os.path.dirname(canonical_file))
copy2(incoming_file, canonical_file)
num_copied += 1
print "%s files copied" % num_copied
if options.clean:
if options.dry_run or num_copied != 0:
raise EnvironmentError("Can only clean on a non-copy pass (run script again to clean)")
for root, dirs, files in os.walk("."):
for filename in files:
incoming_file = os.path.normpath(os.path.join(root, filename))
print "deleting %s" % incoming_file
os.unlink(incoming_file)
if __name__ == '__main__':
main(sys.argv)