2 * Copyright (C) 2004 John Ellis
3 * Copyright (C) 2008 - 2016 The Geeqie Team
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 * These functions are intended to find images with similar color content. For
36 * example when an image was saved at different compression levels or dimensions
37 * (scaled down/up) the contents are similar, but these files do not match by file
38 * size, dimensions, or checksum.
40 * These functions create a 32 x 32 array for each color channel (red, green, blue).
41 * The array represents the average color of each corresponding part of the
42 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid.
43 * Each grid is then processed for the average color value, this is what
44 * is stored in the array)
46 * To compare two images, generate a ImageSimilarityData for each image, then
47 * pass them to the compare function. The return value is the percent match
48 * of the two images. (for this, simple comparisons are used, basically the return
49 * is an average of the corresponding array differences)
51 * for image_sim_compare(), the return is 0.0 to 1.0: \n
52 * 1.0 for exact matches (an image is compared to itself) \n
53 * 0.0 for exact opposite images (compare an all black to an all white image) \n
54 * generally only a match of > 0.85 are significant at all, and >.95 is useful to
55 * find images that have been re-saved to other formats, dimensions, or compression.
61 using ImageSimilarityCheckAbort = std::function<bool(gdouble)>;
63 void image_sim_channel_equal(guint8 *pix, gsize len)
71 std::vector<IndexedPix> buf;
74 for (gsize i = 0; i < len; i++)
76 buf.push_back({i, pix[i]});
79 std::sort(buf.begin(), buf.end(), [](const IndexedPix &a, const IndexedPix &b){ return a.pix < b.pix; });
81 for (gsize i = 0; i < len; i++)
83 gint n = buf[i].index;
85 pix[n] = static_cast<guint8>(255 * i / len);
90 * 4 rotations (0, 90, 180, 270) combined with two mirrors (0, H)
91 * generate all possible isometric transformations
93 * = change dir of x, change dir of y, exchange x and y = 2^3 = 8
95 gdouble image_sim_data_compare_transfo(const ImageSimilarityData *a, const ImageSimilarityData *b, gchar transfo, const ImageSimilarityCheckAbort &check_abort)
97 if (!a || !b || !a->filled || !b->filled) return 0.0;
105 if (transfo & 1) { i = &j2; j = &i2; } else { i = &i2; j = &j2; }
106 for (gint j1 = 0; j1 < 32; j1++)
108 if (transfo & 2) *j = 31-j1; else *j = j1;
109 for (gint i1 = 0; i1 < 32; i1++)
111 if (transfo & 4) *i = 31-i1; else *i = i1;
112 sim += abs(a->avg_r[i1*32+j1] - b->avg_r[i2*32+j2]);
113 sim += abs(a->avg_g[i1*32+j1] - b->avg_g[i2*32+j2]);
114 sim += abs(a->avg_b[i1*32+j1] - b->avg_b[i2*32+j2]);
115 /* check for abort, if so return 0.0 */
116 if (check_abort(sim)) return 0.0;
120 return 1.0 - (static_cast<gdouble>(sim) / (255.0 * 1024.0 * 3.0));
123 gdouble image_sim_data_compare(const ImageSimilarityData *a, const ImageSimilarityData *b, const ImageSimilarityCheckAbort &check_abort)
125 gchar max_t = (options->rot_invariant_sim ? 8 : 1);
126 gdouble max_score = 0;
128 for (gchar t = 0; t < max_t; t++)
130 max_score = std::max(image_sim_data_compare_transfo(a, b, t, check_abort), max_score);
138 ImageSimilarityData *image_sim_new()
140 auto sd = g_new0(ImageSimilarityData, 1);
145 void image_sim_free(ImageSimilarityData *sd)
150 static void image_sim_channel_norm(guint8 *pix, gint len)
160 for (i = 0; i < len; i++)
162 if (pix[i] < l) l = pix[i];
163 if (pix[i] > h) h = pix[i];
167 scale = (delta != 0) ? 255.0 / static_cast<gdouble>(delta) : 1.0;
169 for (i = 0; i < len; i++)
171 pix[i] = static_cast<guint8>(static_cast<gdouble>(pix[i] - l) * scale);
176 * The Alternate algorithm is only for testing of new techniques to
177 * improve the result, and hopes to reduce false positives.
179 void image_sim_alternate_processing(ImageSimilarityData *sd)
183 if (!options->alternate_similarity_algorithm.enabled)
188 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r));
189 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g));
190 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b));
192 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r));
193 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g));
194 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b));
196 if (options->alternate_similarity_algorithm.grayscale)
198 for (i = 0; i < (gint)sizeof(sd->avg_r); i++)
202 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3);
203 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n;
208 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf)
228 gboolean x_small = FALSE; /* if less than 32 w or h, set TRUE */
229 gboolean y_small = FALSE;
230 if (!sd || !pixbuf) return;
232 w = gdk_pixbuf_get_width(pixbuf);
233 h = gdk_pixbuf_get_height(pixbuf);
234 rs = gdk_pixbuf_get_rowstride(pixbuf);
235 pix = gdk_pixbuf_get_pixels(pixbuf);
236 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf);
238 p_step = has_alpha ? 4 : 3;
258 for (ys = 0; ys < 32; ys++)
260 if (y_small) j = static_cast<gdouble>(h) / 32 * ys;
261 else y_inc = std::lround(static_cast<gdouble>(h_left)/(32-ys));
265 for (xs = 0; xs < 32; xs++)
275 if (x_small) i = static_cast<gdouble>(w) / 32 * xs;
276 else x_inc = std::lround(static_cast<gdouble>(w_left)/(32-xs));
277 xy_inc = x_inc * y_inc;
279 xpos = pix + (i * p_step);
281 for (y = j; y < j + y_inc; y++)
284 for (x = i; x < i + x_inc; x++)
313 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf)
315 ImageSimilarityData *sd;
317 sd = image_sim_new();
318 image_sim_fill_data(sd, pixbuf);
323 static gdouble alternate_image_sim_compare_fast(const ImageSimilarityData *a, const ImageSimilarityData *b, gdouble min)
330 if (!a || !b || !a->filled || !b->filled) return 0.0;
335 for (j = 0; j < 1024; j += 32)
337 for (i = j; i < j + 32; i++)
344 cr = abs(a->avg_r[i] - b->avg_r[i]);
345 cg = abs(a->avg_g[i] - b->avg_g[i]);
346 cb = abs(a->avg_b[i] - b->avg_b[i]);
349 sim += cd + abs(cd - ld);
352 /* check for abort, if so return 0.0 */
353 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0;
356 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) );
359 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b)
361 return image_sim_data_compare(a, b, [](gdouble){ return false; });
364 /* this uses a cutoff point so that it can abort early when it gets to
365 * a point that can simply no longer make the cut-off point.
367 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
371 if (options->alternate_similarity_algorithm.enabled)
373 return alternate_image_sim_compare_fast(a, b, min);
376 return image_sim_data_compare(a, b, [min](gdouble sim){ return (sim / (255.0 * 1024.0 * 3.0)) > min; });
378 /* vim: set shiftwidth=8 softtabstop=0 cindent cinoptions={1s: */