4 * Copyright (C) 2008 - 2012 The Geeqie Team
8 * This software is released under the GNU General Public License (GNU GPL).
9 * Please read the included file COPYING for more information.
10 * This software comes with no warranty of any kind, use at your own risk!
18 * These functions are intended to find images with similar color content. For
19 * example when an image was saved at different compression levels or dimensions
20 * (scaled down/up) the contents are similar, but these files do not match by file
21 * size, dimensions, or checksum.
23 * These functions create a 32 x 32 array for each color channel (red, green, blue).
24 * The array represents the average color of each corresponding part of the
25 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid.
26 * Each grid is then processed for the average color value, this is what
27 * is stored in the array)
29 * To compare two images, generate a ImageSimilarityData for each image, then
30 * pass them to the compare function. The return value is the percent match
31 * of the two images. (for this, simple comparisons are used, basically the return
32 * is an average of the corresponding array differences)
34 * for image_sim_compare(), the return is 0.0 to 1.0:
35 * 1.0 for exact matches (an image is compared to itself)
36 * 0.0 for exact opposite images (compare an all black to an all white image)
37 * generally only a match of > 0.85 are significant at all, and >.95 is useful to
38 * find images that have been re-saved to other formats, dimensions, or compression.
43 * The experimental (alternate) algorithm is only for testing of new techniques to
44 * improve the result, and hopes to reduce false positives.
47 static gboolean alternate_enabled = FALSE;
49 void image_sim_alternate_set(gboolean enable)
51 alternate_enabled = enable;
54 gboolean image_sim_alternate_enabled(void)
56 return alternate_enabled;
59 ImageSimilarityData *image_sim_new(void)
61 ImageSimilarityData *sd = g_new0(ImageSimilarityData, 1);
66 void image_sim_free(ImageSimilarityData *sd)
71 static gint image_sim_channel_eq_sort_cb(gconstpointer a, gconstpointer b)
73 gint *pa = (gpointer)a;
74 gint *pb = (gpointer)b;
75 if (pa[1] < pb[1]) return -1;
76 if (pa[1] > pb[1]) return 1;
80 static void image_sim_channel_equal(guint8 *pix, gint len)
86 buf = g_new0(gint, len * 2);
89 for (i = 0; i < len; i++)
93 buf[p] = (gint)pix[i];
97 qsort(buf, len, sizeof(gint) * 2, image_sim_channel_eq_sort_cb);
100 for (i = 0; i < len; i++)
106 pix[n] = (guint8)(255 * i / len);
112 static void image_sim_channel_norm(guint8 *pix, gint len)
120 for (i = 0; i < len; i++)
122 if (pix[i] < l) l = pix[i];
123 if (pix[i] > h) h = pix[i];
127 scale = (delta != 0) ? 255.0 / (gdouble)(delta) : 1.0;
129 for (i = 0; i < len; i++)
131 pix[i] = (guint8)((gdouble)(pix[i] - l) * scale);
136 * define these to enable various components of the experimental compare functions
138 * Convert the thumbprint to greyscale (ignore all color information when comparing)
139 * #define ALTERNATE_USES_GREYSCALE 1
141 * Take into account the difference in change from one pixel to the next
142 * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1
145 void image_sim_alternate_processing(ImageSimilarityData *sd)
147 #ifdef ALTERNATE_USES_GREYSCALE
151 if (!alternate_enabled) return;
153 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r));
154 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g));
155 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b));
157 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r));
158 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g));
159 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b));
161 #ifdef ALTERNATE_USES_GREYSCALE
162 for (i = 0; i < sizeof(sd->avg_r); i++)
166 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3);
167 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n;
172 gint mround(gdouble x)
175 gdouble fpart = x-ipart;
176 return (fpart < 0.5 ? ipart : ipart+1);
179 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf)
190 gint x_inc, y_inc, xy_inc;
194 gboolean x_small = FALSE; /* if less than 32 w or h, set TRUE */
195 gboolean y_small = FALSE;
196 if (!sd || !pixbuf) return;
198 w = gdk_pixbuf_get_width(pixbuf);
199 h = gdk_pixbuf_get_height(pixbuf);
200 rs = gdk_pixbuf_get_rowstride(pixbuf);
201 pix = gdk_pixbuf_get_pixels(pixbuf);
202 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf);
204 p_step = has_alpha ? 4 : 3;
224 for (ys = 0; ys < 32; ys++)
226 if (y_small) j = (gdouble)h / 32 * ys;
227 else y_inc = mround((gdouble)h_left/(32-ys));
231 for (xs = 0; xs < 32; xs++)
238 if (x_small) i = (gdouble)w / 32 * xs;
239 else x_inc = mround((gdouble)w_left/(32-xs));
240 xy_inc = x_inc * y_inc;
242 xpos = pix + (i * p_step);
244 for (y = j; y < j + y_inc; y++)
247 for (x = i; x < i + x_inc; x++)
276 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf)
278 ImageSimilarityData *sd;
280 sd = image_sim_new();
281 image_sim_fill_data(sd, pixbuf);
286 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
287 static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
294 if (!a || !b || !a->filled || !b->filled) return 0.0;
300 for (j = 0; j < 1024; j += 32)
302 for (i = j; i < j + 32; i++)
307 cr = abs(a->avg_r[i] - b->avg_r[i]);
308 cg = abs(a->avg_g[i] - b->avg_g[i]);
309 cb = abs(a->avg_b[i] - b->avg_b[i]);
312 sim += cd + abs(cd - ld);
315 /* check for abort, if so return 0.0 */
316 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0;
319 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) );
323 gdouble image_sim_compare_transfo(ImageSimilarityData *a, ImageSimilarityData *b, gchar transfo)
329 if (!a || !b || !a->filled || !b->filled) return 0.0;
333 if (transfo & 1) { i = &j2; j = &i2; } else { i = &i2; j = &j2; }
334 for (j1 = 0; j1 < 32; j1++)
336 if (transfo & 2) *j = 31-j1; else *j = j1;
337 for (i1 = 0; i1 < 32; i1++)
339 if (transfo & 4) *i = 31-i1; else *i = i1;
340 sim += abs(a->avg_r[i1*32+j1] - b->avg_r[i2*32+j2]);
341 sim += abs(a->avg_g[i1*32+j1] - b->avg_g[i2*32+j2]);
342 sim += abs(a->avg_b[i1*32+j1] - b->avg_b[i2*32+j2]);
346 return 1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0));
349 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b)
351 gboolean test_transformations = TRUE; /* could be a function parameter */
352 gint max_t = (test_transformations ? 8 : 1);
355 gdouble score, max_score = 0;
357 for(t = 0; t < max_t; t++)
359 score = image_sim_compare_transfo(a, b, t);
360 if (score > max_score) max_score = score;
367 4 rotations (0, 90, 180, 270) combined with two mirrors (0, H)
368 generate all possible isometric transformations
370 = change dir of x, change dir of y, exchange x and y = 2^3 = 8
372 gdouble image_sim_compare_fast_transfo(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min, gchar transfo)
378 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
379 if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min);
382 if (!a || !b || !a->filled || !b->filled) return 0.0;
387 if (transfo & 1) { i = &j2; j = &i2; } else { i = &i2; j = &j2; }
388 for (j1 = 0; j1 < 32; j1++)
390 if (transfo & 2) *j = 31-j1; else *j = j1;
391 for (i1 = 0; i1 < 32; i1++)
393 if (transfo & 4) *i = 31-i1; else *i = i1;
394 sim += abs(a->avg_r[i1*32+j1] - b->avg_r[i2*32+j2]);
395 sim += abs(a->avg_g[i1*32+j1] - b->avg_g[i2*32+j2]);
396 sim += abs(a->avg_b[i1*32+j1] - b->avg_b[i2*32+j2]);
398 /* check for abort, if so return 0.0 */
399 if ((gdouble)sim / (255.0 * 1024.0 * 3.0) > min) return 0.0;
402 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)) );
405 /* this uses a cutoff point so that it can abort early when it gets to
406 * a point that can simply no longer make the cut-off point.
408 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
410 gboolean test_transformations = TRUE; /* could be a function parameter */
411 gint max_t = (test_transformations ? 8 : 1);
414 gdouble score, max_score = 0;
416 for(t = 0; t < max_t; t++)
418 score = image_sim_compare_fast_transfo(a, b, min, t);
419 if (score > max_score) max_score = score;
423 /* vim: set shiftwidth=8 softtabstop=0 cindent cinoptions={1s: */