Skip to content

Instantly share code, notes, and snippets.

@marcosgz
Last active March 24, 2021 22:50
Show Gist options
  • Save marcosgz/6da411c78b026f31ab61577784811212 to your computer and use it in GitHub Desktop.
Save marcosgz/6da411c78b026f31ab61577784811212 to your computer and use it in GitHub Desktop.
RGB clustering using k-Means algorithm
#!/usr/bin/env ruby
# It's ODM (Object Document Mapper) to the RGB data
class RGB
COLORS = {
r: ->(val) { "\e[31m#{val}\e[0m" },
g: ->(val) { "\e[32m#{val}\e[0m" },
b: ->(val) { "\e[34m#{val}\e[0m" },
}.freeze
# @overload initialize(r, g, b)
# @param r [Integer] Value for R
# @param g [Integer] Value for G
# @param b [Integer] Value for B
def initialize(*args)
@r, @g, @b = args
end
# @param value [Array<R Integer, G Integer, B Integer>, RGB] An array with R, G, B values
# @raise <ArgumentError> when value cannot be converted to RGB
def self.coerce(value)
case value
when Array
RGB.new(*value)
when RGB
value
else
raise ArgumentError, format('can not convert %<value>p into a RGB')
end
end
# Calculate euclidian distance between two instances of RGB
# @param a [RBG] An instance of RGB
# @param b [RBG] An instance of RGB
# @return [Float]
def self.distance(a, b)
power = %i[r g b].map { |attribute| (a[attribute] - b[attribute]) ** 2 }.reduce(&:+)
Math.sqrt(power)
end
# @param variable_name [Symbol, String] The name of the instance variable
# @raise [ArgumentError] when instance variable is not defined
def [](variable_name)
var_name = :"@#{variable_name}"
raise ArgumentError unless instance_variable_defined?(var_name)
instance_variable_get(var_name)
end
# Calculate euclidian distance to the given RGB object
# @param rgb[RGB] An instance of RGB
# @raise [ArgumentError] When the given rgb is not an instance of RGB
def distance(rgb)
raise ArgumentError unless rgb.is_a?(RGB)
RGB.distance(self, rgb)
end
# @return [Array<Integer, Integer, Integer>] Returns an array with [R, G, B] values
def to_a
[@r, @g, @b]
end
# @option color [Boolean] colored class name
def inspect(color: true)
if color
attrs = COLORS.map { |k, v| v.(self[k]) }.compact.join(' ')
"#<#{COLORS.map { |k, v| format('%s:%s', v.(k.to_s.upcase), v.(self[k])) }.compact.join(' ')}>"
else
attrs = COLORS.keys.map { |k| self[k].to_s.rjust(3) }.compact.join(' ')
"#<RGB #{attrs}>"
end
end
alias to_s inspect
end
# Used to group similar RBG objects using K-Means algoritm.
class Clusterer
MIN_VALUE = 0
MAX_VALUE = 255
attr_reader :dataset, :interactions
# Initialize a new Clusterer
# @param dataset [Array<RBG>] A collection of RGB instances
def initialize(dataset)
@dataset = dataset
end
# The results differ for each run
#
# @param opts [Hash] Options to create K-Means clusters
# @option opts [Integer] :centroids (2) The number of centroids to group dataset
# @option opts [Integer] :max_interactions (200) The maximum number of times the algorith should execute
# @option opts [Boolean] :debug (true) Enable/Disable verbose logging
# @option opts [Integer] :min_movement (0.05) The minimum moviment that the centroid should move between interactions
# @return [Hash<RGB, Array>] Return group of RGB
def cluster(centroids: 3, max_interactions: 200, debug: false, min_movement: 0.05)
centroids = centroids.times.map { RGB.coerce(generate_random_rgb_values) }
reload!
cluster = nil
until (movement = centroids_movement(centroids)) < min_movement.abs || (max_interactions < @interactions)
cluster = centroids.each_with_object({}) { |v, mem| mem[v] = [] }
dataset.each do |rgb|
centroid = centroids.map { |centroid| [centroid, centroid.distance(rgb)] }.sort_by {|_cid, dist| dist }.dig(0, 0)
cluster[centroid] << rgb
end
@interactions += 1
if debug
puts format('Interaction #%<loop>d | Movement of: %<movement>.2f',
loop: @interactions,
movement: movement,
)
cluster_tree(cluster)
end
centroids = cluster.map do |centroid, values|
if values.any?
RGB.new(*values.map(&:to_a).transpose.map {|v| v.reduce(&:+) / v.size })
else
centroid
end
end
end
cluster
end
def cluster_tree(cluster)
cluster.each do |key, values|
puts("─> #{key}")
values.each do |value|
puts(" #{value == values.last ? '└' : '├' }──>#{value.inspect}")
end
end
end
private
def centroids_movement(centroids)
unless @prev_centroids # First execution this variable will be nil
@prev_centroids = centroids
return MAX_VALUE + 1
end
result = [centroids, @prev_centroids].transpose.map { |a, b| a.distance(b) }.sum
@prev_centroids = centroids
result.abs
end
def reload!
@prev_centroids = nil
@interactions = 0
end
# Generate random values within the permitted range
def generate_random_rgb_values
3.times.map { (MIN_VALUE..MAX_VALUE).to_a.sample }
end
end
# Initialize a collection of RGBs
samples = [
[1, 10, 200],
[2, 20, 230],
[6, 25, 150],
[7, 45, 100],
[10, 50, 125],
[3, 24, 111],
[100, 4, 10],
[250, 7, 50],
[243, 5, 68],
[210, 2, 90],
[200, 1, 95],
[215, 0, 68],
[56, 200, 1],
[79, 234, 3],
[80, 210, 8],
[95, 200, 10],
[80, 210, 4],
[49, 207, 1],
].map do |sample|
RGB.new(*sample)
end
clusterer = Clusterer.new(samples)
cluster = clusterer.cluster(debug: false)
puts "Result after #{clusterer.interactions} interactions:"
clusterer.cluster_tree(cluster)
$ ruby kmeans.rb [ruby-2.6.3p62]
Result after 4 interactions:
─> #<R:73 G:210 B:4>
├──>#<R:56 G:200 B:1>
├──>#<R:79 G:234 B:3>
├──>#<R:80 G:210 B:8>
├──>#<R:95 G:200 B:10>
├──>#<R:80 G:210 B:4>
└──>#<R:49 G:207 B:1>
─> #<R:4 G:29 B:152>
├──>#<R:1 G:10 B:200>
├──>#<R:2 G:20 B:230>
├──>#<R:6 G:25 B:150>
├──>#<R:7 G:45 B:100>
├──>#<R:10 G:50 B:125>
└──>#<R:3 G:24 B:111>
─> #<R:203 G:3 B:63>
├──>#<R:100 G:4 B:10>
├──>#<R:250 G:7 B:50>
├──>#<R:243 G:5 B:68>
├──>#<R:210 G:2 B:90>
├──>#<R:200 G:1 B:95>
└──>#<R:215 G:0 B:68>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment