git @ Cat's Eye Technologies The-Dipple / a22cfad
Take command-line option for canonical prefix, any number of dirs. catseye 11 years ago
1 changed file(s) with 21 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
22 """
33 Find duplicate files.
44
5 Usage: find-dups dir [canonical]
5 Usage: find-dups [--canonical=prefix] {dir}
66
7 If a duplicate is found, and one of the copies is has the prefix
7 If a duplicate is found, and exactly one of the copies has the prefix
88 given by `canonical`, commands to remove all the other copies are output.
99
1010 """
1212 import hashlib
1313 import os
1414 import sys
15 from optparse import OptionParser
1516
1617
1718 def md5(filename):
3536
3637
3738 def main(argv):
39 parser = OptionParser()
40 parser.add_option("--canonical",
41 dest="canonical",
42 default=None,
43 help="output commands to delete all duplicates "
44 "that do not have this prefix")
45 (options, args) = parser.parse_args()
46
3847 hashmap = {}
39 for root, dirs, files in os.walk(argv[1]):
40 for filename in files:
41 full = os.path.normpath(os.path.join(root, filename))
42 hash = md5(full)
43 hashmap.setdefault(hash, []).append(full)
48 for directory in args:
49 for root, dirs, files in os.walk(directory):
50 for filename in files:
51 full = os.path.normpath(os.path.join(root, filename))
52 hash = md5(full)
53 hashmap.setdefault(hash, []).append(full)
4454
4555 for hash in hashmap:
4656 filenames = sorted(hashmap[hash])
4959 for filename in filenames:
5060 print "# %s" % filename
5161
52 if len(argv) == 3:
53 canonical = argv[2]
62 if options.canonical is not None:
5463 print
5564 for hash in hashmap:
5665 filenames = sorted(hashmap[hash])
5766 if len(filenames) > 1:
5867 canonicals = []
5968 for filename in filenames:
60 if filename.startswith(canonical):
69 if filename.startswith(options.canonical):
6170 canonicals.append(filename)
6271 if len(canonicals) == 1:
6372 print "# delete all except %s" % canonicals[0]
6473 for filename in filenames:
65 if not filename.startswith(canonical):
66 print "rm '%s'" % filename
74 if not filename.startswith(options.canonical):
75 print 'rm "%s"' % filename
6776 print
6877
6978 if __name__ == '__main__':