7 * This software is released under the GNU General Public License (GNU GPL).
8 * Please read the included file COPYING for more information.
9 * This software comes with no warranty of any kind, use at your own risk!
17 * These functions are intended to find images with similar color content. For
18 * example when an image was saved at different compression levels or dimensions
19 * (scaled down/up) the contents are similar, but these files do not match by file
20 * size, dimensions, or checksum.
22 * These functions create a 32 x 32 array for each color channel (red, green, blue).
23 * The array represents the average color of each corresponding part of the
24 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid.
25 * Each grid is then processed for the average color value, this is what
26 * is stored in the array)
28 * To compare two images, generate a ImageSimilarityData for each image, then
29 * pass them to the compare function. The return value is the percent match
30 * of the two images. (for this, simple comparisons are used, basically the return
31 * is an average of the corresponding array differences)
33 * for image_sim_compare(), the return is 0.0 to 1.0:
34 * 1.0 for exact matches (an image is compared to itself)
35 * 0.0 for exact opposite images (compare an all black to an all white image)
36 * generally only a match of > 0.85 are significant at all, and >.95 is useful to
37 * find images that have been re-saved to other formats, dimensions, or compression.
42 * The experimental (alternate) algorithm is only for testing of new techniques to
43 * improve the result, and hopes to reduce false positives.
46 static gint alternate_enabled = FALSE;
48 void image_sim_alternate_set(gint enable)
50 alternate_enabled = enable;
53 gint image_sim_alternate_enabled(void)
55 return alternate_enabled;
58 ImageSimilarityData *image_sim_new(void)
60 ImageSimilarityData *sd = g_new0(ImageSimilarityData, 1);
65 void image_sim_free(ImageSimilarityData *sd)
71 static void image_sim_channel_expand(guint8 *pix, gint len)
77 /* set the start values */
81 for (i = 0; i < len; i++)
83 if (pix[i] < l) l = pix[i];
84 if (pix[i] > h) h = pix[i];
87 /* calc scale from range */
90 scale = 255.0 / (gdouble)(h - l);
97 for (i = 0; i < len; i++)
99 pix[i] = (guint8)((gdouble)pix[i] - l * scale);
104 static int image_sim_channel_eq_sort_cb(const void *a, const void *b)
106 gint *pa = (void *)a;
107 gint *pb = (void *)b;
108 if (pa[1] < pb[1]) return -1;
109 if (pa[1] > pb[1]) return 1;
113 static void image_sim_channel_equal(guint8 *pix, gint len)
119 buf = g_new0(gint, len * 2);
122 for (i = 0; i < len; i++)
126 buf[p] = (gint)pix[i];
130 qsort (buf, len, sizeof(gint) * 2, image_sim_channel_eq_sort_cb);
133 for (i = 0; i < len; i++)
139 pix[n] = (guint8)(255 * i / len);
145 static void image_sim_channel_norm(guint8 *pix, gint len)
153 for (i = 0; i < len; i++)
155 if (pix[i] < l) l = pix[i];
156 if (pix[i] > h) h = pix[i];
159 scale = (h-l !=0) ? 255.0 / (gdouble)(h - l) : 1.0;
161 for (i = 0; i < len; i++)
163 pix[i] = (guint8)((gdouble)(pix[i] - l) * scale);
168 * define these to enable various components of the experimental compare functions
170 * Convert the thumbprint to greyscale (ignore all color information when comparing)
171 * #define ALTERNATE_USES_GREYSCALE 1
173 * Take into account the difference in change from one pixel to the next
174 * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1
177 void image_sim_alternate_processing(ImageSimilarityData *sd)
179 #ifdef ALTERNATE_USES_GREYSCALE
183 if (!alternate_enabled) return;
185 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r));
186 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g));
187 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b));
189 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r));
190 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g));
191 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b));
193 #ifdef ALTERNATE_USES_GREYSCALE
194 for (i = 0; i < sizeof(sd->avg_r); i++)
198 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3);
199 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n;
204 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf)
218 gint x_small = FALSE; /* if less than 32 w or h, set TRUE */
219 gint y_small = FALSE;
221 if (!sd || !pixbuf) return;
223 w = gdk_pixbuf_get_width(pixbuf);
224 h = gdk_pixbuf_get_height(pixbuf);
225 rs = gdk_pixbuf_get_rowstride(pixbuf);
226 pix = gdk_pixbuf_get_pixels(pixbuf);
227 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf);
229 p_step = has_alpha ? 4 : 3;
246 for (ys = 0; ys < 32; ys++)
248 if (y_small) j = (gdouble)h / 32 * ys;
252 for (xs = 0; xs < 32; xs++)
258 if (x_small) i = (gdouble)w / 32 * xs;
262 for (y = j; y < j + y_inc; y++)
264 p = pix + (y * rs) + (i * p_step);
265 for (x = i; x < i + x_inc; x++)
293 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf)
295 ImageSimilarityData *sd;
297 sd = image_sim_new();
298 image_sim_fill_data(sd, pixbuf);
303 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
304 static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
311 if (!a || !b || !a->filled || !b->filled) return 0.0;
318 for (j = 0; j < 1024; j+= 32)
320 for (i = j; i < j + 32; i++)
325 cr = abs(a->avg_r[i] - b->avg_r[i]);
326 cg = abs(a->avg_g[i] - b->avg_g[i]);
327 cb = abs(a->avg_b[i] - b->avg_b[i]);
330 sim += cd + abs(cd - ld);
333 /* check for abort, if so return 0.0 */
334 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0;
337 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) );
341 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b)
346 if (!a || !b || !a->filled || !b->filled) return 0.0;
350 for (i = 0; i < 1024; i++)
352 sim += abs(a->avg_r[i] - b->avg_r[i]);
353 sim += abs(a->avg_g[i] - b->avg_g[i]);
354 sim += abs(a->avg_b[i] - b->avg_b[i]);
357 return 1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0));
360 /* this uses a cutoff point so that it can abort early when it gets to
361 * a point that can simply no longer make the cut-off point.
363 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
369 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
370 if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min);
373 if (!a || !b || !a->filled || !b->filled) return 0.0;
378 for (j = 0; j < 1024; j+= 32)
380 for (i = j; i < j + 32; i++)
382 sim += abs(a->avg_r[i] - b->avg_r[i]);
383 sim += abs(a->avg_g[i] - b->avg_g[i]);
384 sim += abs(a->avg_b[i] - b->avg_b[i]);
386 /* check for abort, if so return 0.0 */
387 if ((gdouble)sim / (255.0 * 1024.0 * 3.0) > min) return 0.0;
390 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)) );