#!/usr/bin/env ruby # # USAGE:: # # script/update_lichen_list # # DESCRIPTION:: # # This is a weekly cronjob that makes sure the "lichens" species list is # complete. It uses # # RAILS_ROOT/public/lichen_genera.txt # # to determine which species are lichens. It is going to include # unlichenized lichenicolous fungi and allies, but it will be close. # # This list can then be used to determine if a name is a lichen via a simple # sql query: # # SELECT names.id FROM names # JOIN observations o ON o.name_id = names.id # JOIN observations_species_lists os ON os.species_list_id = xxx # # (Where "xxx" is the id of the lichen species list.) # ################################################################################ require File.expand_path('../../config/boot.rb', __FILE__) require File.expand_path('../../config/environment.rb', __FILE__) verbose = (ARGV[0] == '-v') # Do this as "admin". user = User.find(0) User.current = user now = Time.now # Get list of lichen genera. genera = {} file = "#{RAILS_ROOT}/public/lichen_genera.txt" for genus in File.readlines(file).map(&:chomp) if !genus.match(/#/) genera[genus] = true end end puts "found #{genera.keys.length.inspect} genera" if verbose # Get complete list of all taxa at and below these genera. new_names = [] for id, text_name in Name.connection.select_rows %( SELECT id, text_name FROM names ) if genera[text_name.split.first] new_names << id end end puts "found #{new_names.length.inspect} names" if verbose # Make sure species list exists! spl = SpeciesList.find_by_title('lichens') spl ||= SpeciesList.create( :created => now, :modified => now, :when => now, :user => User.find_by_login('jason'), :where => 'Unknown', :title => 'lichens', :notes => %( List of all lichenized, lichenicolous and allied species and genera in the world. I keep it relatively up to date for North America via the "Esslinger checklist":http://www.ndsu.edu/pubweb/~esslinge/chcklst/chcklst7.htm, but coverage is probably still very poor for the rest of the world. Includes many deprecated synonyms, too. ).strip.gsub(/\n/,' ')) # Get existing list of names in "lichens" list. old_names = Name.connection.select_values %( SELECT names.id FROM observations_species_lists os JOIN observations o ON os.observation_id = o.id JOIN names ON names.id = o.name_id WHERE os.species_list_id = #{spl.id} ) puts "found #{old_names.length.inspect} existing names" if verbose # Add new names. ids = new_names - old_names if ids.any? puts "adding #{ids.length} names" n = 0 for id in ids n += 1 if verbose $stdout.print("%.1f%%...\r" % (n.to_f/ids.length*100)) $stdout.flush end fields = [ 'created', 'modified', 'user_id', 'when', 'where', 'name_id', 'specimen', 'is_collection_location' ].map {|x| "`#{x}`"}.join(', ') values = [ now, now, user.id, now.to_date, 'Unknown', id, false, false ].map {|x| Name.connection.quote(x)}.join(', ') obs_id = Name.connection.insert %( INSERT INTO observations (#{fields}) VALUES (#{values}) ) Name.connection.insert %( INSERT INTO observations_species_lists (observation_id, species_list_id) VALUES (#{obs_id}, #{spl.id}) ) end end # Remove errors? ids = old_names - new_names if ids.any? puts "removing #{ids.length} names..." ids = ids.join(',') ids2 = Name.connection.select_values %( SELECT o.id FROM observations_species_lists os JOIN observations o ON o.id = os.observation_id WHERE os.species_list_id = #{spl.id} AND o.name_id IN (#{ids}) ) ids2 = ids.join(',') Name.connection.delete %( DELETE FROM observations_species_lists WHERE observation_id IN (#{ids2}) ) Name.connection.delete %( DELETE FROM observations WHERE id IN (#{ids2}) ) end exit 0