#!/usr/bin/python # # usage: mirrorcheck [-vrASs] [-F filename] [-X IP] relative master[:top] slave[:top] [...] # # -v: be more verbose. one -v is list missed/added files, two is natter # about progress too. # -r: recursively descend into subdirectories. Recommended only for # thoroughness. Don't use this routinely. # -A: don't bother saying anything about files present on a slave but not # on the master # -S: don't complain about missing symlinks # -s: check not just symlink name, but that it points to the same place # -F: exclude the filename from complaints about being added. # ('index.php' and '.message' are excluded already.) # -X: exclude the IP from checks. # # The optional ':top' argument for the master and slaves is interpreted # as the relative top-level on each server, if they mirror things at # some sub-point. For example: # mirrorcheck -r 6.2/en/os ftp.redhat.com:/updates ftp.example.com:/mirror/redhat/updates # # BUGS: rude and crude, and nowhere near elegant Python code. # also somewhat reliant on being able to actually parse ftp directory # listings, which can be ... questionable. import sys, string, socket import ftplib import signal def warn(string): sys.stderr.write(sys.argv[0] + ': ' + string + '\n') def die(string): sys.stderr.write(sys.argv[0] + ': ' + string + '\n') sys.exit(1) # SIGINT should just make us die. s = signal.signal(signal.SIGINT, signal.SIG_DFL) if s != signal.default_int_handler: signal.signal(signal.SIGINT, s) del s # Defaults: verbose = 0 recursive = 0 noadded = 0 exclsymlinks = 0 fullsymcheck = 0 exclude = [] exclfilenames = ['.message', 'index.php', '.message@', 'index.php@'] # Timeouts, in seconds, for FTP operations. DIROPTIME is the time to # get a complete directory listing of a single directory. OPENTIME is # the time to open a FTP connection. OPTIME is the time to do a simple # operation, like cwd. DIROPTIME = 120 OPENTIME = 30 OPTIME = 10 import getopt def usage(): sys.stderr.write("usage: mirrorcheck [-vrASs] [-F filename] [-X IP] rel-path master-host[:top-dir] slave[:top-dir] [slave[:top-dir] ...]\n") sys.exit(1) try: opts, args = getopt.getopt(sys.argv[1:], "h?AvrsSX:F:", []) except getopt.error, (cause): warn(cause) usage() for o, a in opts: if o in ('-?', '-h'): usage() elif o == '-v': verbose = verbose + 1 elif o == '-r': recursive = 1 elif o == '-A': noadded = 1 elif o == '-S': exclsymlinks = 1 elif o == '-s': fullsymcheck = 1 elif o == '-X': exclude.append(a) elif o == '-F': exclfilenames.append(a) else: die('Chris forgot to make me handle option: '+o) if len(args) < 3: usage() # # Some routines def isipaddr(a): octets = string.split(a, '.') if len(octets) != 4: return 0 for o in octets: try: v = string.atoi(o) except: return 0 if (v < 0) or (v > 255): return 0 return 1 # The host class. # stash a couple of pieces of data. class Host: def __init__(self, host): pos = string.find(host, ':') if pos == -1: self.host = host self._root = '' else: self.host = host[:pos] self._root = host[pos+1:] def fpath(self, partial): if partial[0] == '/': return self._root + partial else: return self._root + '/' + partial # # Routines used in getting, parsing, etc ftp listings. # # Why do we need a class for this? Namespaces keep biting me on the # rear, that's why. class Alarm: def __init__(self, base): self._state = '' self._base = base def sigalarm(self, n, f): if self._state: raise socket.error, (-1, self._base+': '+self._state) else: raise socket.error, (-1, self._base) def alarm(self, time, state = None): self._state = state signal.signal(signal.SIGALRM, self.sigalarm) signal.alarm(time) def cancel(self): signal.alarm(0) self._state = '' # this is debateable, but it turns out I want to mark symlinks visually. def symlname(src, dst): if fullsymcheck: return '%s -> %s' % (src, dst) return src + '@' # Some of this magic is researched by hand, and some is researched from # the ftp mirroring sample for ftplib. # People have some whacky ftp listing formats, they do. def parselisting(listing): f = [] d = [] for line in listing: words = string.split(line) if len(words) < 6: continue fn = words[-1] # symlink? if words[-2] == '->': # theoretically it can be harmless to exclude # symlinks from your mirror, since the real files # are there *somewhere* if exclsymlinks: continue fn = symlname(words[-3], words[-1]) if fn in ('.', '..'): continue # directories appear in the directory list in their original # form; in the main file list with a '/' postfixed. if words[0][0] == 'd': d.append(fn) fn = fn + '/' f.append(fn) return (f, d) # Trap FTP operations so that they will time out, and so that we can keep # track of our location and operation within ftp space. class TrapFtp: def __init__(self, timer): self._t = timer self._cwd = '' # directory manipulation operations: def _setd(self, path): self._cwd = path def _pushd(self, path): if not self._cwd or path[0] == '/': self._setd(path) else: self._cwd = self._cwd + '/' + path def _popd(self): pos = string.rfind(self._cwd, '/') self._cwd = self._cwd[:pos] # the generic timeout operation; it has too many arguments. def _toop(self, tm, msg, op, *args): self._t.alarm(tm, msg) res = apply(op, args, {}) self._t.cancel() return res # And the operations: def setopen(self, host): self._f = self._toop(OPENTIME, 'connect to server', ftplib.FTP, host, 'anonymous', 'mirror-checker@') def cwd(self, path): if path == '..': self._popd() else: self._pushd(path) return self._toop(OPTIME, 'cwd '+path, self._f.cwd, path) def quit(self): self._toop(OPTIME, 'quit', self._f.quit) # We might as well do this RIGHT, so: def retrlisting(self, cmd): l = [] def _rlines(line, s=self, l=l): s._t.alarm(DIROPTIME, 'getting directory listing') l.append(line) self._t.alarm(DIROPTIME, 'getting directory listing') self._f.retrlines(cmd, _rlines) self._t.cancel() return l # invariant: routines with timeouts always return with the timeout # cancelled. They may be entered with timeouts enabled, and so should # set their own ASAP. def ftplistdir(f): l = f.retrlisting('LIST') # return just filenames (fl, dl) = parselisting(l) if recursive: # recurse and add files in subdirs to our list for d in dl: f.cwd(d) l = ftplistdir(f) f.cwd('..') fl = fl + map(lambda x, y=d: y+'/'+x, l) return fl def getlisting(f, host, path): f.setopen(host) f.cwd(path) l = ftplistdir(f) # we don't actually care about errors here, we just want to # timeout sooner or later. try: f.quit() except socket.error, serr: pass l.sort() return l def nameit(h, i): """Name a host/IP pair so it looks OK.""" if h == i: return h else: return '%s (%s)' % (i, h) # # this is complicated by the fact that some mirrors mirror only .bz2 files, # and some mirror only .gz files. We need to cast out such differences; it's # ok, as long as they have SOME version of the file. def postfix(x, y): """Is y a postfix of x? Returns basename portion.""" if x[-len(y):] == y: return x[:-len(y)] return None def basename(x): """Return the 'basic' name of a file, stripped of compression.""" p = postfix(x, '.bz2.sign') if p: return p + '.sign' p = postfix(x, '.bz2') if p: return p p = postfix(x, '.gz.sign') if p: return p + '.sign' p = postfix(x, '.gz') if p: return p return x def exclfilename(fn): """Do we want to exclude this file(name) from the added file list?""" pos = string.rfind(fn, '/') if pos == -1: base = fn else: base = fn[pos+1:] return (base in exclfilenames) # does a path have a superior directory in a dictionary thereof? # we need this because if a directory is missing, we don't want to # complain about every file also missing. def pathindirs(path, dict): while 1: pos = string.rfind(path, '/') if pos == -1: break path = path[:pos+1] if dict.has_key(path): return 1 path = path[:-1] return 0 def gendiffs(l1, l2): def handlemiss(f, c, d, dl): if not (c.has_key(basename(f)) or pathindirs(f, d)): dl.append('miss ' + f) if f[-1] == '/': d[f] = 1 return 1 return 0 def handleadd(f, d, dl): if not (noadded or exclfilename(f) or pathindirs(f, d)): dl.append('add ' + f) if f[-1] == '/': d[f] = 1 return 1 return 0 diffs = [] i1, i2 = 0, 0 missdirs = {} adddirs = {} canonlist = {} added = 0 missing = 0 for l in l2: canonlist[basename(l)] = l while i1 < len(l1) and i2 < len(l2): f1, f2 = l1[i1], l2[i2] if f1 < f2: missing = missing | \ handlemiss(f1, canonlist, missdirs, diffs) i1 = i1 + 1 elif f1 > f2: added = added | \ handleadd(f2, adddirs, diffs) i2 = i2 + 1 else: # the same i1 = i1 + 1 i2 = i2 + 1 while i1 < len(l1): missing = missing | \ handlemiss(l1[i1], canonlist, missdirs, diffs) i1 = i1 + 1 while i2 < len(l2): added = added | \ handleadd(l2[i2], adddirs, diffs) i2 = i2 + 1 return (missing, added, diffs) # GRR. # why can't socket be, like, CONSISTENT? # sometimes socket.error returns '(errno, strerror(errno))', and # sometimes it just returns '(error-string,)'. From the same set # of routines, even. Gee, thanks. def sockunpack(pack): if len(pack.args) == 1: return (-1, pack.args[0]) else: return pack.args # all the ftplib errors that return only a message. ftperrs = (ftplib.error_reply, ftplib.error_temp, ftplib.error_perm, ftplib.error_proto) def egetlisting(host, path): try: tf = getlisting(ftpops, host, path) except socket.error, serr: (errn, msg) = sockunpack(serr) return ('network problem: %s' % (msg,), []) except ftperrs, (msg): emsg = 'ftp problem: %s' % (msg,) if ftpops._cwd: emsg = emsg + '\n\tdirectory: ' + ftpops._cwd return (emsg, []) except EOFError: return ('ftp problem: EOF on receive', []) return ('', tf) # # -- no guts, no glory. # so here's some guts. relp = args[0] master = Host(args[1]) slaves = map(Host, args[2:]) # set the alarm handler. timer = Alarm('FTP timeout') ftpops = TrapFtp(timer) if verbose > 1: print "getting listing from master host %s" % (master.host,) (emsg, files) = egetlisting(master.host, master.fpath(relp)) if emsg: print 'master %s: %s' % (master.host, emsg) sys.exit(1) if not files: print "No files from master: exiting" # for every slave hostname or IP address: # - derive all IP addresses. # for each derived IP address: # - get listing # - diff listing. for s in slaves: ips = [] hn = s.host try: if isipaddr(hn): ips = [hn] else: (c, als, ips) = socket.gethostbyname_ex(hn) except socket.error, serr: print "%s: not resolvable" % (hn,) continue # for every IP of the slave: for i in ips: if i in exclude: if verbose > 1: print "skipping excluded %s" % (nameit(hn,i),) continue if verbose > 1: print "checking %s" % (nameit(hn, i),) (emsg, tf) = egetlisting(i, s.fpath(relp)) if emsg: print '%s: %s' % (nameit(hn,i), emsg) continue # sometimes the directory just doesn't have any files. # (or, alternately, it has a broken LIST/NLST that doesn't # give us a list of any.) if not tf: print "%s: empty directory" % (nameit(hn, i),) continue (m, a, res) = gendiffs(files, tf) if res: # what differences exactly? if m and a: dstr = 'differences from master' elif m: dstr = 'missing files' elif a: dstr = 'added files' else: dstr = 'internal problem?!' print "%s: %s" % (nameit(hn, i), dstr) if verbose: print "\t%s" % (string.join(res, '\n\t'),) # all done. sys.exit(0) # # Copyright (C) 2001 Chris Siebenmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details.