Skip to content

Commit

Permalink
Merge branch 'main' into njw-project-aliases
Browse files Browse the repository at this point in the history
  • Loading branch information
mo-nathan committed Jan 28, 2025
2 parents 121ed35 + 2510d12 commit 796de68
Show file tree
Hide file tree
Showing 21 changed files with 718 additions and 351 deletions.
120 changes: 120 additions & 0 deletions app/classes/lookup.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# frozen_string_literal: true

# Lookup
#
# A flexible looker-upper of records. It can handle any identifiers we're likely
# to throw at it: a string, ID, instance, or a mixed array of any of those. The
# `lookup_method` has to be configured in the Lookup child class, because the
# lookup column names are different for each model.
#
# Primarily used to get a clean set of ids for ActiveRecord query params.
# For example, indexes like "Observations for (given) Projects" can be filtered
# for more than one project at a time: "NEMF 2023" and "NEMF 2024".
# The observation query needs the project IDs, and Lookup just allows callers
# to send whatever param type is available. This is handy in the API and
# in searches.
#
# Create an instance of a child class with a string, instance or id, or a mixed
# array of any of these. Returns an array of ids, instances or strings (names)
# via instance methods `ids`, `instances` and `titles`.
#
# Use:
# project_ids = Lookup::Projects.new(["NEMF 2023", "NEMF 2024"]).ids
# Observation.where(project: project_ids)
#
# fred_ids = Lookup::Users.new(["Fred", "Freddie", "Freda", "Anni Frid"]).ids
# Image.where(user: fred_ids)
#
# Instance methods:
# (all return arrays)
#
# ids: Array of ids of records matching the values sent to the instance
# instances: Array of instances of those records
# titles: Array of names of those records, via @title_column set in subclass
# (A `names` method seemed too confusing, because Lookup::Names...)
#
# Class constants:
# (defined in subclass)
#
# MODEL:
# TITLE_COLUMN:
#
class Lookup
attr_reader :vals, :params

def initialize(vals, params = {})
unless defined?(self.class::MODEL)
raise("Lookup is only usable via the subclasses, like Lookup::Names.")
end

@model = self.class::MODEL
@title_column = self.class::TITLE_COLUMN
@vals = prepare_vals(vals)
@params = params
end

def prepare_vals(vals)
return [] if vals.blank?

[vals].flatten
end

def ids
@ids ||= lookup_ids
end

def instances
@instances ||= lookup_instances
end

def titles
@titles ||= lookup_titles
end

def lookup_ids
return [] if @vals.blank?

evaluate_values_as_ids
end

# Could just look them up from the ids, but vals may already have instances
def lookup_instances
return [] if @vals.blank?

evaluate_values_as_instances
end

def lookup_titles
return [] if @vals.blank?

instances.map(&:"#{@title_column}")
end

def evaluate_values_as_ids
@vals.map do |val|
if val.is_a?(@model)
val.id
elsif val.is_a?(AbstractModel)
raise("Passed a #{val.class} to LookupIDs for #{@model}.")
elsif /^\d+$/.match?(val.to_s)
val
else
lookup_method(val).map(&:id) # each lookup returns an array
end
end.flatten.uniq.compact
end

def evaluate_values_as_instances
@vals.map do |val|
if val.is_a?(@model)
val
elsif val.is_a?(AbstractModel)
raise("Passed a #{val.class} to LookupIDs for #{@model}.")
elsif /^\d+$/.match?(val.to_s)
@model.find(val.to_i)
else
lookup_method(val)
end
end.flatten.uniq.compact
end
end
14 changes: 14 additions & 0 deletions app/classes/lookup/external_sites.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true

class Lookup::ExternalSites < Lookup
MODEL = ExternalSite
TITLE_COLUMN = :name

def initialize(vals, params = {})
super
end

def lookup_method(name)
ExternalSite.where(name: name)
end
end
14 changes: 14 additions & 0 deletions app/classes/lookup/herbaria.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true

class Lookup::Herbaria < Lookup
MODEL = Herbarium
TITLE_COLUMN = :name

def initialize(vals, params = {})
super
end

def lookup_method(name)
Herbarium.where(name: name)
end
end
14 changes: 14 additions & 0 deletions app/classes/lookup/herbarium_records.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true

class Lookup::HerbariumRecords < Lookup
MODEL = HerbariumRecord
TITLE_COLUMN = :id

def initialize(vals, params = {})
super
end

def lookup_method(name)
HerbariumRecord.where(id: name)
end
end
17 changes: 17 additions & 0 deletions app/classes/lookup/locations.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# frozen_string_literal: true

class Lookup::Locations < Lookup
MODEL = Location
TITLE_COLUMN = :name

def initialize(vals, params = {})
super
end

def lookup_method(name)
# Downcases and removes all punctuation, so it's a multi-string search
# e.g. "sonoma co california usa"
pattern = Location.clean_name(name.to_s).clean_pattern
Location.name_contains(pattern)
end
end
214 changes: 214 additions & 0 deletions app/classes/lookup/names.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
# frozen_string_literal: true

class Lookup::Names < Lookup
MODEL = Name
TITLE_COLUMN = :search_name

def initialize(vals, params = {})
super
end

def prepare_vals(vals)
if vals.blank?
complain_about_unused_flags!
return []
end

[vals].flatten
end

def lookup_ids
return [] if @vals.blank?

names = add_synonyms_if_necessary(original_names)
names_plus_subtaxa = add_subtaxa_if_necessary(names)
names = add_synonyms_again(names, names_plus_subtaxa)
names -= original_names if @params[:exclude_original_names]
names.map(&:id)
end

# Re-lookup all instances from the matched ids. Too complicated to try to grab
# instances if they were in the given vals, because of `add_other_spellings`.
def lookup_instances
return [] if @vals.blank?

ids.map { |id| Name.find(id) }
end

# "Original" names could turn out to be quite a few more than the given vals.
# Memoized to avoid recalculating, or passing the value around.
def original_names
@original_names ||= if @params[:exclude_original_names]
add_other_spellings(original_matches)
else
original_matches
end
end

# Matches for the given vals, from the db.
def original_matches
@original_matches ||= @vals.map do |val|
if val.is_a?(@model)
val.id
elsif val.is_a?(AbstractModel)
raise("Passed a #{val.class} to LookupIDs for #{@model}.")
elsif /^\d+$/.match?(val.to_s) # from an id
Name.where(id: val).select(*minimal_name_columns)
else # from a string
find_matching_names(val)
end
end.flatten.uniq.compact
end

# NOTE: Name.parse_name returns a ParsedName instance, not an Name instance.
# A ParsedName is a hash of segments and formatted strings of the name.
def find_matching_names(val)
parse = Name.parse_name(val)
srch_str = if parse
parse.search_name
else
Name.clean_incoming_string(val)
end
if parse&.author.present?
matches = Name.where(search_name: srch_str).select(*minimal_name_columns)
end
return matches unless matches.empty?

Name.where(text_name: srch_str).select(*minimal_name_columns)
end

def add_synonyms_if_necessary(names)
if @params[:include_synonyms]
add_synonyms(names)
elsif !@params[:exclude_original_names]
add_other_spellings(names)
else
names
end
end

def add_subtaxa_if_necessary(names)
if @params[:include_subtaxa]
add_subtaxa(names)
elsif @params[:include_immediate_subtaxa]
add_immediate_subtaxa(names)
else
names
end
end

def add_synonyms_again(names, names_plus_subtaxa)
if names.length >= names_plus_subtaxa.length
names
elsif @params[:include_synonyms]
add_synonyms(names_plus_subtaxa)
else
add_other_spellings(names_plus_subtaxa)
end
end

def add_other_spellings(names)
ids = names.map { |name| name[:correct_spelling_id] || name[:id] }
return [] if ids.empty?

Name.where(Name[:correct_spelling_id].coalesce(Name[:id]).
in(limited_id_set(ids))).select(*minimal_name_columns)
end

def add_synonyms(names)
ids = names.pluck(:synonym_id).compact
return names if ids.empty?

names.reject { |name| name[:synonym_id] } +
Name.where(synonym_id: limited_id_set(ids)).
select(*minimal_name_columns)
end

def add_subtaxa(names)
higher_names = genera_and_up(names)
lower_names = genera_and_down(names)
@name_query = Name.where(id: names.map(&:id))
@name_query = add_lower_names(lower_names)
@name_query = add_higher_names(higher_names) unless higher_names.empty?
@name_query.distinct.select(*minimal_name_columns)
end

def add_lower_names(names)
@name_query.or(Name.where(Name[:text_name] =~ /^(#{names.join("|")}) /))
end

def add_higher_names(names)
@name_query.or(
Name.where(Name[:classification] =~ /: _(#{names.join("|")})_/)
)
end

def add_immediate_subtaxa(names)
higher_names = genera_and_up(names)
lower_names = genera_and_down(names)

@name_query = Name.where(id: names.map(&:id))
@name_query = add_immediate_lower_names(lower_names)
unless higher_names.empty?
@name_query = add_immediate_higher_names(higher_names)
end
@name_query.distinct.select(*minimal_name_columns)
end

def add_immediate_lower_names(lower_names)
@name_query.or(Name.
where(Name[:text_name] =~
/^(#{lower_names.join("|")}) [^[:blank:]]+( [^[:blank:]]+)?$/))
end

def add_immediate_higher_names(higher_names)
@name_query.or(Name.
where(Name[:classification] =~ /: _(#{higher_names.join("|")})_$/).
where.not(Name[:text_name].matches("% %")))
end

def genera_and_up(names)
names.pluck(:text_name).
reject { |name| name.include?(" ") }
end

def genera_and_down(names)
genera = {}
text_names = names.pluck(:text_name)
# Make hash of all genera present.
text_names.each do |text_name|
genera[text_name] = true unless text_name.include?(" ")
end
# Remove species if genus also present.
text_names.reject do |text_name|
text_name.include?(" ") && genera[text_name.split.first]
end.uniq
end

# Selecting "minimal_name_columns" is a way to avoid having Rails instantiate
# all the names getting passed around (which can get quite huge if we've got
# all the children of Kingdom Fungi!) It allows us to use quicker AR selects,
# optimized to restrict the dataflow back and forth to the database to just
# the few columns we actually need.
def minimal_name_columns
[:id, :correct_spelling_id, :synonym_id, :text_name]
end

# array of max of MO.query_max_array unique ids for use with Arel "in"
# where(<x>.in(limited_id_set(ids)))
def limited_id_set(ids)
ids.map(&:to_i).uniq[0, MO.query_max_array]
end

def complain_about_unused_flags!
return if @params.blank?

@params.each_key { |param| complain_about_unused_flag!(param) }
end

def complain_about_unused_flag!(param)
return if @params[param].nil?

raise("Flag \"#{param}\" is invalid without \"names\" parameter.")
end
end
Loading

0 comments on commit 796de68

Please sign in to comment.