From b3e8e4fdc974996b7897c62ddfd303507c7f8831 Mon Sep 17 00:00:00 2001 From: Colin Clark Date: Wed, 18 Oct 2023 12:36:56 +0100 Subject: [PATCH] Document similarity algorithms and enable alternate The alternate algorithm can be enabled on Preferences/Advanced. --- doc/docbook/GuideOptionsAdvanced.xml | 4 ++ doc/docbook/GuideReference.xml | 1 + .../GuideReferenceSimilarityAlgorithms.xml | 40 +++++++++++++ src/main.cc | 15 +---- src/options.h | 6 ++ src/preferences.cc | 33 ++++++++--- src/rcfile.cc | 9 +++ src/similar.cc | 59 ++++++------------- 8 files changed, 105 insertions(+), 62 deletions(-) create mode 100644 doc/docbook/GuideReferenceSimilarityAlgorithms.xml diff --git a/doc/docbook/GuideOptionsAdvanced.xml b/doc/docbook/GuideOptionsAdvanced.xml index aacbb751..0c052421 100644 --- a/doc/docbook/GuideOptionsAdvanced.xml +++ b/doc/docbook/GuideOptionsAdvanced.xml @@ -92,4 +92,8 @@ Thread Pools This option will limit the number of threads (cores) that are used when performing a duplicate image search. A value of 0 means use all available threads. This will give the fastest processing time, but will slow other processes including user input response time. +
+ Alternate Algorithm + Alternate Similarity Algorithm +
diff --git a/doc/docbook/GuideReference.xml b/doc/docbook/GuideReference.xml index fb149869..f25a9501 100644 --- a/doc/docbook/GuideReference.xml +++ b/doc/docbook/GuideReference.xml @@ -12,6 +12,7 @@ + diff --git a/doc/docbook/GuideReferenceSimilarityAlgorithms.xml b/doc/docbook/GuideReferenceSimilarityAlgorithms.xml new file mode 100644 index 00000000..efdcdb47 --- /dev/null +++ b/doc/docbook/GuideReferenceSimilarityAlgorithms.xml @@ -0,0 +1,40 @@ + +
+ Similarity Algorithms + + This function is intended to find images with similar color content. For example when an image was saved at different compression levels or dimensions (scaled down/up) the contents are similar, but these files do not match by file size, dimensions, or checksum. + + A 32 x 32 array is created for each image. Imagine the image cut into 1024 rectangles, 32 across and 32 down. + + For each array element, the average value of all the red and the green and the blue pixels is computed and stored in the array. Therefore the array represents the average color of each corresponding part of the image. + + This data is stored in a file with the same name is the image and with the extension .sim. It is stored in the same location as thumbnails. If many images are to be compered, run-time is reduced by having these .sim files already created. This can be done via Edit/Cache Maintenance or by the command line instruction: + geeqie --cache-maintenance <path> + +
+ Standard Algorithm + + To compare two images, each array element of each image is compared in turn. The computed value is the percent match of all elements of the two images. For this, simple comparisons are used - basically the value is an average of the corresponding array differences. + + The value computed is in the range 0% to 100%. + + 100% for exact matches (an image is compared to itself) + 0% for exact opposite images (compare an all black to an all white image) + + Generally only a match of >85% is significant at all, and >95% is useful to find images that have been re-saved to other formats, dimensions, or compression. + + If the Ignore Orientation checkbox on the Duplicates window is selected, images are also checked for 90°, 180°, 270°, rotations and mirror and flip. This will increase run-time. +
+
+ Alternate Algorithm + + The alternate algorithm can be enabled on the Advanced tab of Preferences. + + It does not check for rotations, mirror or flip. + + After comparing two array elements of two images, the difference from the preceding element comparison is included in the computation. + + There is an additional option to reduce the fingerprint to grayscale before comparisons are made. + +
+
diff --git a/src/main.cc b/src/main.cc index de12173e..7e0857e0 100644 --- a/src/main.cc +++ b/src/main.cc @@ -503,12 +503,6 @@ static void parse_command_line(gint argc, gchar *argv[]) printf_term(FALSE, "%s %s GTK%d\n", GQ_APPNAME, VERSION, gtk_major_version); exit(0); } - else if (strcmp(cmd_line, "--alternate") == 0) - { - /* enable faster experimental algorithm */ - log_printf("Alternate similarity algorithm enabled\n"); - image_sim_alternate_set(TRUE); - } else if (strcmp(cmd_line, "-h") == 0 || strcmp(cmd_line, "--help") == 0) { @@ -523,7 +517,7 @@ static void parse_command_line(gint argc, gchar *argv[]) print_term(FALSE, _(" -h, --help show this message\n")); print_term(FALSE, _(" -l, --list [files] [collections] open collection window for command line\n")); print_term(FALSE, _(" -n, --new-instance open a new instance of Geeqie\n")); - print_term(FALSE, _(" -o:, --log-file: save log data to file\n")); + print_term(FALSE, _(" -o:, --log-file: save log data to file\n")); print_term(FALSE, _(" -r, --remote send following commands to open window\n")); print_term(FALSE, _(" -rh, --remote-help print remote command list\n")); print_term(FALSE, _(" -s, --slideshow start in slideshow mode\n")); @@ -533,14 +527,9 @@ static void parse_command_line(gint argc, gchar *argv[]) print_term(FALSE, _(" +w, --show-log-window show log window\n")); #ifdef DEBUG print_term(FALSE, _(" --debug[=level] turn on debug output\n")); - print_term(FALSE, _(" -g:, --grep: filter debug output\n")); + print_term(FALSE, _(" -g:, --grep: filter debug output\n")); #endif -#if 0 - /* these options are not officially supported! - * only for testing new features, no need to translate them */ - print_term(FALSE, " --alternate use alternate similarity algorithm\n"); -#endif print_term(FALSE, "\n"); remote_help(); diff --git a/src/options.h b/src/options.h index 4d1a9404..b38feac1 100644 --- a/src/options.h +++ b/src/options.h @@ -392,6 +392,12 @@ struct ConfOptions gboolean status_bar; } selectable_bars; + /* Alternate similarity algorithm */ + struct { + gboolean enabled; + gboolean grayscale; /**< convert fingerprint to greyscale */ + } alternate_similarity_algorithm; + gchar *mouse_button_8; /**< user-definable mouse buttons */ gchar *mouse_button_9; /**< user-definable mouse buttons */ diff --git a/src/preferences.cc b/src/preferences.cc index 808849d9..6b000da8 100644 --- a/src/preferences.cc +++ b/src/preferences.cc @@ -465,6 +465,9 @@ static void config_window_apply() options->threads.duplicates = c_options->threads.duplicates > 0 ? c_options->threads.duplicates : -1; + options->alternate_similarity_algorithm.enabled = c_options->alternate_similarity_algorithm.enabled; + options->alternate_similarity_algorithm.grayscale = c_options->alternate_similarity_algorithm.grayscale; + #ifdef DEBUG set_debug_level(debug_c); #endif @@ -3838,18 +3841,20 @@ static gint extension_sort_cb(gconstpointer a, gconstpointer b) static void config_tab_advanced(GtkWidget *notebook) { - GtkWidget *vbox; - GtkWidget *group; - GSList *formats_list; - GList *extensions_list = nullptr; gchar **extensions; - GtkWidget *tabcomp; GdkPixbufFormat *fm; gint i; + GList *extensions_list = nullptr; + GSList *formats_list; GString *types_string = g_string_new(nullptr); - GtkWidget *types_string_label; - GtkWidget *threads_string_label; + GtkWidget *alternate_checkbox; GtkWidget *dupes_threads_spin; + GtkWidget *group; + GtkWidget *subgroup; + GtkWidget *tabcomp; + GtkWidget *threads_string_label; + GtkWidget *types_string_label; + GtkWidget *vbox; vbox = scrolled_notebook_page(notebook, _("Advanced")); group = pref_group_new(vbox, FALSE, _("External preview extraction"), GTK_ORIENTATION_VERTICAL); @@ -3929,6 +3934,20 @@ static void config_tab_advanced(GtkWidget *notebook) dupes_threads_spin = pref_spin_new_int(vbox, _("Duplicate check:"), _("max. threads"), 0, get_cpu_cores(), 1, options->threads.duplicates, &c_options->threads.duplicates); gtk_widget_set_tooltip_markup(dupes_threads_spin, _("Set to 0 for unlimited")); + + pref_spacer(group, PREF_PAD_GROUP); + + pref_line(vbox, PREF_PAD_SPACE); + + group = pref_group_new(vbox, FALSE, _("Alternate similarity alogorithm"), GTK_ORIENTATION_VERTICAL); + + alternate_checkbox = pref_checkbox_new_int(group, _("Enable alternate similarity algorithm"), options->alternate_similarity_algorithm.enabled, &c_options->alternate_similarity_algorithm.enabled); + + subgroup = pref_box_new(group, FALSE, GTK_ORIENTATION_VERTICAL, PREF_PAD_GAP); + pref_checkbox_link_sensitivity(alternate_checkbox, subgroup); + + alternate_checkbox = pref_checkbox_new_int(subgroup, _("Use grayscale"), options->alternate_similarity_algorithm.grayscale, &c_options->alternate_similarity_algorithm.grayscale); + gtk_widget_set_tooltip_text(alternate_checkbox, _("Reduce fingerprint to grayscale")); } /* stereo tab */ diff --git a/src/rcfile.cc b/src/rcfile.cc index f1cf6d1e..2073a0eb 100644 --- a/src/rcfile.cc +++ b/src/rcfile.cc @@ -559,6 +559,11 @@ static void write_global_attributes(GString *outstr, gint indent) /* GPU - see main.cc */ WRITE_NL(); WRITE_BOOL(*options, override_disable_gpu); WRITE_SEPARATOR(); + + /* Alternate similarity algorithm */ + WRITE_NL(); WRITE_BOOL(*options, alternate_similarity_algorithm.enabled); + WRITE_NL(); WRITE_BOOL(*options, alternate_similarity_algorithm.grayscale); + WRITE_SEPARATOR(); } static void write_color_profile(GString *outstr, gint indent) @@ -1050,6 +1055,10 @@ static gboolean load_global_params(const gchar **attribute_names, const gchar ** /* GPU - see main.cc */ if (READ_BOOL(*options, override_disable_gpu)) continue; + /* Alternative similarity algorithm */ + if (READ_BOOL(*options, alternate_similarity_algorithm.enabled)) continue; + if (READ_BOOL(*options, alternate_similarity_algorithm.grayscale)) continue; + /* Dummy options */ if (READ_DUMMY(*options, image.dither_quality, "deprecated since 2012-08-13")) continue; diff --git a/src/similar.cc b/src/similar.cc index f6cb826a..bd090caa 100644 --- a/src/similar.cc +++ b/src/similar.cc @@ -49,26 +49,6 @@ * find images that have been re-saved to other formats, dimensions, or compression. */ -/* - * The experimental (alternate) algorithm is only for testing of new techniques to - * improve the result, and hopes to reduce false positives. - */ - -static gboolean alternate_enabled = FALSE; - -void image_sim_alternate_set(gboolean enable) -{ - alternate_enabled = enable; -} - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-function" -gboolean image_sim_alternate_enabled_unused(void) -{ - return alternate_enabled; -} -#pragma GCC diagnostic pop - ImageSimilarityData *image_sim_new() { auto sd = g_new0(ImageSimilarityData, 1); @@ -146,22 +126,17 @@ static void image_sim_channel_norm(guint8 *pix, gint len) } /* - * define these to enable various components of the experimental compare functions - * - * Convert the thumbprint to greyscale (ignore all color information when comparing) - * #define ALTERNATE_USES_GREYSCALE 1 - * - * Take into account the difference in change from one pixel to the next - * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1 + * The Alternate algorithm is only for testing of new techniques to + * improve the result, and hopes to reduce false positives. */ - void image_sim_alternate_processing(ImageSimilarityData *sd) { -#ifdef ALTERNATE_USES_GREYSCALE gint i; -#endif - if (!alternate_enabled) return; + if (!options->alternate_similarity_algorithm.enabled) + { + return; + } image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r)); image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g)); @@ -171,15 +146,16 @@ void image_sim_alternate_processing(ImageSimilarityData *sd) image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g)); image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b)); -#ifdef ALTERNATE_USES_GREYSCALE - for (i = 0; i < sizeof(sd->avg_r); i++) + if (options->alternate_similarity_algorithm.grayscale) { - guint8 n; + for (i = 0; i < (gint)sizeof(sd->avg_r); i++) + { + guint8 n; - n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3); - sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n; + n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3); + sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n; + } } -#endif } gint mround(gdouble x) @@ -296,7 +272,6 @@ ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf) return sd; } -#ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min) { gint sim; @@ -331,7 +306,6 @@ static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSim return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) ); } -#endif gdouble image_sim_compare_transfo(ImageSimilarityData *a, ImageSimilarityData *b, gchar transfo) { @@ -387,9 +361,10 @@ gdouble image_sim_compare_fast_transfo(ImageSimilarityData *a, ImageSimilarityDa gint i1, i2, *i; gint j1, j2, *j; -#ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE - if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min); -#endif + if (options->alternate_similarity_algorithm.enabled) + { + return alternate_image_sim_compare_fast(a, b, min); + } if (!a || !b || !a->filled || !b->filled) return 0.0; -- 2.20.1