#!/usr/bin/env ruby # # USAGE:: # # script/update_images [mode] [args...] # # DESCRIPTION:: # # This is the daemon that moves images to the remote image servers. Run the # script with --help to get full set of modes of operation and arguments. # ################################################################################ # Default delay between checks for new images in daemon mode. DAEMON_DELAY = 60 # Number of seconds between progress updates. PROGRESS_DELAY = 10 # Number of times to retry upload/download before giving up. RETRIES = 5 # Default root path for Rails. ROOT = '/var/web/mo' # ROOT = '/home/jason/public_html/mo_trunk' # Where daemon and cronjob log stuff by default. LOG_FILE = 'log/update_images.log' # Default local path (where daemon runs). LOCAL_PATH = 'public/images' # Aliases for our image servers. SERVER_ALIASES = { 'dreamhost' => 'cdmr@digitalmycology.com:images.digitalmycology.com', 'slicehost' => 'mushroomobserver.org:/var/web/mo/public/images', } # List of official image servers to keep updated from main webserver. OFFICIAL_SERVERS = [ 'dreamhost' ] # Default image server to use for --sync mode. DEFAULT_IMAGE_SERVER = 'dreamhost' # List of all image sizes available. ALL_SIZES = ['thumb', '320', '640', '960', '1280', 'orig'] # List of sizes we don't want to keep on slicehost due to space constraints. DELETE_SIZES = ['640', '960', '1280', 'orig'] # ---------------------------- # Command line parsing. # ---------------------------- # Is first argument a value or a flag? def is_next_arg_value? @argv ||= ARGV @argv.first && @argv.first[0,1] != '-' end # Get and remove first argument. def get_next_arg @argv ||= ARGV @argv.shift end # Look for and remove a bare flag: '--thumb' def get_flag(arg) @argv ||= ARGV result = nil if i = @argv.index(arg) @argv.delete_at(i) result = true end return result end # Look for and remove an argument with a value: '--path blah' def get_parm(arg) @argv ||= ARGV result = nil if i = @argv.index(arg) result = @argv[i+1] @argv.delete_at(i) @argv.delete_at(i) end return result end # Make sure there are no unexpected arguments, dying if there are. def any_args_left? @argv ||= ARGV if !@argv.empty? puts "Unexpected arguments: [#{@argv.join(', ')}]\n" exit 1 end end # ---------------------------- # Nightly job. # ---------------------------- def get_cached_listings(cronjob=false) file_lists = {} verbose("Getting local listing...\n") file_lists['local'] = list = get_cached_file_list('local') for size in ALL_SIZES verbose(" %-5s: %d\n" % [ size, list.keys.select {|f| f[0,size.length] == size}.length ]) end for server in OFFICIAL_SERVERS verbose("Getting #{server} listing...\n") list = get_cached_file_list(server) for size in ALL_SIZES verbose(" %-5s: %d\n" % [ size, list.keys.select {|f| f[0,size.length] == size}.length ]) end file_lists[server] = list end return file_lists end def refresh_listings(cronjob=false) log("Refreshing listings\n") if cronjob file_lists = {} # Get local list for diagnostic purposes only. log("Getting local listing\n") if cronjob verbose("Getting local listing...\n") file_lists['local'] = list = list_local_files(@@local_path) report_sizes(list) if @@verbose write_cached_file_list('local', list) # Get remote lists so we can refresh the caches. for server in OFFICIAL_SERVERS log("Getting #{server} listing\n") if cronjob verbose("Getting #{server} listing...\n") if list = list_remote_files(server) write_cached_file_list(server, list) report_sizes(list) if @@verbose file_lists[server] = list else log(" (failed)\n") if cronjob puts "Server #{server} is down!\n" end end return file_lists end # Report the numbers of files if in verbose mode. def report_sizes(list) for size in ALL_SIZES if size != 'orig' verbose(" %-5s: %d\n" % [ size, list.keys.select do |f| f[0,size.length] == size end.length ]) else verbose(" %-5s: %d + %d\n" % [ size, list.keys.select do |f| f[0,size.length] == size && f[-3..-1] == 'jpg' end.length, list.keys.select do |f| f[0,size.length] == size && f[-3..-1] != 'jpg' end.length ]) end end end # This takes a hash of file-list hashes and checks for mismatches in size. # Returns Hash of Arrays of files that need to be uploaded, keyed on server. def check_for_mismatches(file_lists, cronjob=false) servers = file_lists.keys.sort need_to_upload = {} verbose("Checking for mismatches between #{servers.join(', ')}") # Get union of files on all servers. files = {} for server in servers for file in file_lists[server].keys files[file] = nil end end verbose("Number of files in union: #{files.keys.length}") # Get list of files with mismatches. mismatches = files.keys.select do |file| size = nil any_different = false any_missing = false if file != 'date' for server in servers size2 = file_lists[server][file] any_different = true if size && size2 && size != size2 any_missing = true if !size2 size = size2 if !size local_size = size2 if server == 'local' end # Remove originals if they exist and are correct on all remote servers. if cronjob && !any_different && !any_missing && (DELETE_SIZES.any? {|size| file[0,size.length] == size}) && File.exists?("#{@@local_path}/#{file}") File.delete("#{@@local_path}/#{file}") log("Deleting #{file}\n") verbose("deleting #{file}") end # Get a list of files that we need to be uploaded. if local_size && local_size != 0 && (any_different || any_missing) for server in servers if file_lists[server][file] != local_size verbose("need to upload #{file} to #{server}") need_to_upload[server] ||= [] need_to_upload[server] << file end end end end # Collect list of mismatches. any_different end # Now print out the list of problems in a table. if !mismatches.empty? str = "Found #{mismatches.length} mismatch(es):\n\n" str += sprintf("%-20s %s\n", 'file', servers.map do |server| sprintf('%15s', server) end.join(' ')) for file in mismatches str += sprintf("%-20s %s\n", file, servers.map do |server| size = file_lists[server][file] size ? sprintf('%15d', size) : sprintf('%15s', '--') end.join(' ')) end puts str log(str + "\n") if cronjob end # Return list of files that need to be uploaded. return need_to_upload end # ----------------------------- # Command-line utility mode. # ----------------------------- def sync_command(local,remote, do_upload,do_download, size) local_files = {} # local_files['orig/1234.jpg'] = 123456 (bytes) remote_files = {} # remote_files['orig/1234.jpg'] = 123456 # Get initial list of files locally and on remote server. puts "Getting local listing...\n" if @@use_cache local_files = get_cached_file_list('local', size) else local_files = list_local_files(local, size) write_cached_file_list('local', local_files, size) end puts "Getting remote listing...\n" if @@use_cache remote_files = get_cached_file_list(remote, size) else remote_files = list_remote_files(remote, size) write_cached_file_list(remote, remote_files, size) end check_for_mismatches('local' => local_files, 'remote' => remote_files) # Upload files missing on remote server. if do_upload files = [] size = 0 for file in local_files.keys - remote_files.keys files.push(file) size += local_files[file] end if files.empty? puts "Nothing to upload.\n" else puts "Uploading #{size} bytes in #{files.length} file(s)...\n" upload_files(files, local, remote) end end # Download files missing locally. if do_download files = [] size = 0 for file in remote_files.keys - local_files.keys files.push(file) size += remote_files[file] end if files.empty? puts "Nothing to download.\n" else puts "Downloading #{size} bytes in #{files.length} file(s)...\n" download_files(files, local, remote) end end end # --------------------------------- # Logging and verbose messaging. # --------------------------------- def log_break if File.exists?(@@log_file) open(@@log_file, 'a') do |fh| fh.puts '' end end end def log(msg) open(@@log_file, 'a') do |fh| time = Time.now.strftime('[%Y%m%d:%H:%M:%S] ') fh.puts(time + msg) end end def verbose(msg) if @@verbose puts msg.gsub(/^/, ' ') end end # ---------------------------- # Get file lists. # ---------------------------- # Get list of local images. Returns a hash whose keys are filenames (without # path, just 'thumb/1234.jpg') and whose values are file sizes. def list_local_files(path, size=nil) files = {} files['date'] = Time.now sizes = size ? [size] : ALL_SIZES for type in sizes cmd = "ls #{path}/#{type}" verbose cmd open("|#{cmd}") do |dir| dir.each_line do |line| file = line.chomp if file && file.match(/^\d+\.\w+$/) size = File.size("#{path}/#{type}/#{file}") if size > 0 files["#{type}/#{file}"] = size end end end end end return files end # Get list of images on a remote server. The +server+ parameter can be an # alias of an established image server, or a scp-style url (user@host:path). def list_remote_files(server, size=nil) url = SERVER_ALIASES[server] || server if url.index(':') url, path = url.split(':') else path = '.' end files = {} files['date'] = Time.now sizes = size ? [size] : ALL_SIZES for type in sizes cmd = "ssh #{url} ls -lU --color=never #{path}/#{type}" verbose cmd open("|#{cmd}") do |dir| dir.each_line do |line| size, file = line.chomp.split.values_at(4, 7) if file && file.match(/^\d+\.\w+$/) && size.to_i != 0 files["#{type}/#{file}"] = size.to_i end end end end return files end # Get cached directory. Returns a hash whose keys are filenames (no path, just # 'thumb/1234.jpg'), and whose values are the size of the file in bytes. The # +server+ parameter is either 'local' or an alias for an remote image server. def get_cached_file_list(server, size=nil) files = nil cache_file = "#{@@local_path}/#{server}.files" if File.exists?(cache_file) files = {} verbose "reading #{size || 'all'} from #{cache_file}" open(cache_file) do |fh| fh.each_line do |line| file, size2 = line.chomp.split if !size || file[0,size.length] == size files[file] = size2.to_i end end end files['date'] = File.mtime(cache_file) end return files end # Write directory cache. def write_cached_file_list(server, files, size=nil) cache_file = "#{@@local_path}/#{server}.files" if !size verbose "writing all to #{cache_file}" open(cache_file, 'w') do |fh| files.each_pair do |file, size| fh.puts("#{file} #{size}\n") end end else old = [] verbose "updating #{size} in #{cache_file}" if File.exists?(cache_file) open(cache_file) do |fh| fh.each_line do |line| old << line.chomp if line[0,size.length] != size end end end open(cache_file, 'w') do |fh| old.each do |line| fh.puts(line) end files.each_pair do |file, size| fh.puts("#{file} #{size}\n") end end end files['date'] = Time.now end # ---------------------------- # File transfers. # ---------------------------- # Upload/download a single file. def transfer_file(file, from, to) from = SERVER_ALIASES[from] if SERVER_ALIASES.has_key?(from) to = SERVER_ALIASES[to] if SERVER_ALIASES.has_key?(to) cmd = "scp #{from}/#{file} #{to}/#{file}" verbose cmd system cmd end # Upload a list of files. def upload_files(files, local, remote) remote = SERVER_ALIASES[remote] if SERVER_ALIASES.has_key?(remote) if remote.index(':') url, path = remote.split(':') else url = remote path = '.' end tempfile = "/tmp/update_images.#{$$}" open(tempfile, 'w') do |fh| for file in files.sort fh.puts(file) end end cmd = "tar -T #{tempfile} -cf - | ssh #{url} \\(cd #{path}\\; tar -xvPf -\\)" verbose cmd cmd = "(#{cmd}) > /dev/null" if !@@verbose cmd = "cd #{local}; #{cmd}; rm #{tempfile}" system cmd # This causes a fork and runs out of memory. # save = Dir.getwd # Dir.chdir(local) # open("|#{cmd}") do |fh| # fh.each_line do |line| # verbose line # end # end # Dir.chdir(save) # # File.delete(tempfile) end # Download a list of files. def download_files(files, local, remote) remote = SERVER_ALIASES[remote] if SERVER_ALIASES.has_key?(remote) if remote.index(':') url, path = remote.split(':') else url = remote path = '.' end tempfile = "/tmp/update_images.#{$$}" open(tempfile, 'w') do |fh| for file in files.sort fh.puts(file) end end cmd = "cat #{tempfile} | ssh #{url} \\(cd #{path}\\; tar -T - -cf -\\) | tar -xvPf -" verbose cmd cmd = "(#{cmd}) > /dev/null" if !@@verbose cmd = "cd #{local}; #{cmd}; rm #{tempfile}" system cmd # This causes a fork and runs out of memory. # save = Dir.getwd # Dir.chdir(local) # open("|#{cmd}") do |fh| # fh.each_line do |line| # verbose line # end # end # Dir.chdir(save) # # File.delete(tempfile) end # ---------------------------- # Main program. # ---------------------------- # Turn on verbose messages. @@verbose = get_flag('-v') || get_flag('--verbose') # Tell it not to refresh listings (speeds things up for debugging. @@use_cache = get_flag('-u') || get_flag('--use_cache') # Let user override some of the defaults. @@root = get_parm('-R') || get_parm('--root') || ROOT @@log_file = get_parm('-L') || get_parm('--log-file') || "#{@@root}/#{LOG_FILE}" @@local_path = get_parm('-P') || get_parm('--local-path') || "#{@@root}/#{LOCAL_PATH}" case get_next_arg when '-l', '--ls' any_args_left? files = @@use_cache ? get_cached_listings : refresh_listings check_for_mismatches(files) exit 0 when '-c', '--clean' any_args_left? files = @@use_cache ? get_cached_listings(:cron) : refresh_listings(:cron) uploads = check_for_mismatches(files, :cron) for server, files in uploads upload_files(files, @@local_path, server) end exit 0 when '-s', '--sync' remote = is_next_arg_value? ? get_next_arg : DEFAULT_IMAGE_SERVER do_upload = true if get_flag('-U') || get_flag('--upload') do_download = true if get_flag('-D') || get_flag('--download') do_upload = do_download = true unless do_upload || do_download do_size = {} for size in ALL_SIZES flag = size.match(/^\d/) ? '-N' : ('-' + size[0,1].upcase) do_size[size] = true if get_flag(flag) || get_flag("--#{size}") end if do_size.empty? for size in ALL_SIZES do_size[size] = true end end any_args_left? # Try to find local image directory if File.directory?(@@local_path) local = @@local_path elsif File.directory?('public/images') local = 'public/images' elsif File.directory?('images') local = 'images' elsif ALL_SIZES.any? {|x| File.directory?(x)} local = '.' else puts "I cannot find your local images directory.\n" + "Please create it or use --local-path argument.\n" exit 1 end puts "Local directory: #{local}\n" puts "Remote server: #{remote}\n" # Make sure subdirectories exist. for size in ALL_SIZES if do_size[size] && !File.directory?("#{local}/#{size}") puts "Creating #{local}/#{size}\n" Dir.mkdir("#{local}/#{size}\n") end end # Do sizes one at a time. puts "ALL_SIZES = #{ALL_SIZES}\n" for size in (ALL_SIZES & do_size.keys) puts "size = #{size}\n" sync_command(local,remote, do_upload,do_download, size) end exit 0 ################################################################################ when '-h', '--help' puts "USAGE script/update_images -- [args...] MODES -l --ls Refresh the local list of images on the remote servers. -c --clean Refresh the list and remove originals that are done. -s --sync [server] Sync up the images at the given server. -h --help Print this help message. OPTIONS These can be used in any mode: -R --root Override rails root path. /var/web/mo -L --log-file Override log file. $root/log/$0.log -P --local-path Override local image path. $root/public/images -v --verbose Turn on verbose messages. -u --use_cache Use cached file lists. These can all be used in --sync mode: -U --upload Only upload images to remote server. -D --download Only download images from remote server. -T --thumb Only update thumbnails. -N --320|640|960... Only update normal images. -O --orig Only update originals. DESCRIPTION This program is used to keep image servers in sync. It first grabs a directory of all images both locally and remotely. If there are any size mismatches, it complains and ignores those images until you can decide which is correct. Then it copies any images that are on one but not the other so that both machines wind up with identical copies. By default it will update both local and remote servers, and it will do all image sizes (thumbnails, normal-sized, etc.). You can restrict it to just upload or just download, and you can tell it only to do thumbnails, originals, etc. It copies all files of a given type in a single command, running an instance of tar on both machines connected via pipes in ssh. This should be very efficient. SSH PASSWORDS You must set up password-less ssh with each remote machine for this to work. Create a local key if you haven't already done so: ssh-keygen -t rsa (Just press return three times.) Then copy it to each remote machine: ssh-copy-id -i ~/.ssh/id_rsa.pub user@server.url FILES Servers are specified either by alias, or by scp-type syntax: user@host.url:path The aliases recognized currently are: " for k, v in SERVER_ALIASES puts " #{k} = #{v}\n" # dreamhost velosa@images.mushroomobserver.org:images.mushroomobserver.org # slicehost mushroomobserver.org:/var/web/mo/public/images end puts " The defaults are: root /var/web/mo file lists /var/web/mo/public/images/dreamhost.files local path /var/web/mo/public/images server dreamhost EXAMPLES update_images --ls Force daemon to refresh its list of images on the remote servers. update_images --clean Refresh listings and remove any originals that have been transferred. update_images --sync dreamhost --upload Run this on slicehost to force it to upload any images missing on dreamhost. update_images --root . --sync dreamhost --download --orig -P image_backup Run this on your local machine to update your backup copy of the original images in a directory called image_backup/orig. update_images --root . --sync dreamhost --download --thumb Run this on your local machine to grab all the thumbnails for testing purposes. update_images --root . --sync dreamhost Run this on a hypothetical second image server to sync them up: copy images missing on the second from the first, and vice versa. " exit 0 else puts "Invalid or missing mode. Use -h or --help for list.\n" exit 1 end