Fix #477: similarity duplicate search
[geeqie.git] / src / dupe.c
index 292c175..932606e 100644 (file)
@@ -1,16 +1,24 @@
 /*
- * Geeqie
- * (C) 2005 John Ellis
- * Copyright (C) 2008 - 2012 The Geeqie Team
+ * Copyright (C) 2005 John Ellis
+ * Copyright (C) 2008 - 2016 The Geeqie Team
  *
  * Author: John Ellis
  *
- * This software is released under the GNU General Public License (GNU GPL).
- * Please read the included file COPYING for more information.
- * This software comes with no warranty of any kind, use at your own risk!
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-
 #include "main.h"
 #include "dupe.h"
 
@@ -404,10 +412,6 @@ static void dupe_item_read_cache(DupeItem *di)
                        di->width = cd->width;
                        di->height = cd->height;
                        }
-               if (di->checksum == 0 && cd->have_checksum)
-                       {
-                       di->checksum = cd->checksum;
-                       }
                if (!di->md5sum && cd->have_md5sum)
                        {
                        di->md5sum = md5_digest_to_text(cd->md5sum);
@@ -432,7 +436,6 @@ static void dupe_item_write_cache(DupeItem *di)
                cd->path = cache_get_location(CACHE_TYPE_SIM, di->fd->path, TRUE, NULL);
 
                if (di->width != 0) cache_sim_data_set_dimensions(cd, di->width, di->height);
-               if (di->checksum != 0) cache_sim_data_set_checksum(cd, di->checksum);
                if (di->md5sum)
                        {
                        guchar digest[16];
@@ -571,6 +574,8 @@ static void dupe_listview_add(DupeWindow *dw, DupeItem *parent, DupeItem *child)
        g_free(text[DUPE_COLUMN_DIMENSIONS]);
 }
 
+static void dupe_listview_select_dupes(DupeWindow *dw, DupeSelectType parents);
+
 static void dupe_listview_populate(DupeWindow *dw)
 {
        GtkListStore *store;
@@ -604,6 +609,16 @@ static void dupe_listview_populate(DupeWindow *dw)
                }
 
        gtk_tree_view_columns_autosize(GTK_TREE_VIEW(dw->listview));
+
+       if (options->duplicates_select_type == DUPE_SELECT_GROUP1)
+               {
+               dupe_listview_select_dupes(dw, DUPE_SELECT_GROUP1);
+               }
+       else if (options->duplicates_select_type == DUPE_SELECT_GROUP2)
+               {
+               dupe_listview_select_dupes(dw, DUPE_SELECT_GROUP2);
+               }
+
 }
 
 static void dupe_listview_remove(DupeWindow *dw, DupeItem *di)
@@ -709,7 +724,7 @@ static gboolean dupe_listview_item_is_selected(DupeWindow *dw, DupeItem *di, Gtk
        return found;
 }
 
-static void dupe_listview_select_dupes(DupeWindow *dw, gint parents)
+static void dupe_listview_select_dupes(DupeWindow *dw, DupeSelectType parents)
 {
        GtkTreeModel *store;
        GtkTreeSelection *selection;
@@ -726,7 +741,7 @@ static void dupe_listview_select_dupes(DupeWindow *dw, gint parents)
                DupeItem *di;
 
                gtk_tree_model_get(store, &iter, DUPE_COLUMN_POINTER, &di, -1);
-               if ( (dupe_match_find_parent(dw, di) == di) == (parents) )
+               if ((dupe_match_find_parent(dw, di) == di) == (parents == DUPE_SELECT_GROUP1))
                        {
                        gtk_tree_selection_select_iter(selection, &iter);
                        }
@@ -1051,6 +1066,20 @@ static void dupe_match_sort_groups(GList *list)
                }
 }
 
+static gint dupe_match_totals_sort_cb(gconstpointer a, gconstpointer b)
+{
+       DupeItem *da = (DupeItem *)a;
+       DupeItem *db = (DupeItem *)b;
+
+       if (g_list_length(da->group) > g_list_length(db->group)) return -1;
+       if (g_list_length(da->group) < g_list_length(db->group)) return 1;
+
+       if (da->group_rank < db->group_rank) return -1;
+       if (da->group_rank > db->group_rank) return 1;
+
+       return 0;
+}
+
 static gint dupe_match_rank_sort_cb(gconstpointer a, gconstpointer b)
 {
        DupeItem *da = (DupeItem *)a;
@@ -1084,6 +1113,15 @@ static GList *dupe_match_rank_sort(GList *source_list)
        return g_list_sort(list, dupe_match_rank_sort_cb);
 }
 
+/* returns allocated GList of dupes sorted by totals */
+static GList *dupe_match_totals_sort(GList *source_list)
+{
+       source_list = g_list_sort(source_list, dupe_match_totals_sort_cb);
+
+       source_list = g_list_first(source_list);
+       return g_list_reverse(source_list);
+}
+
 static void dupe_match_rank(DupeWindow *dw)
 {
        GList *list;
@@ -1101,6 +1139,11 @@ static void dupe_match_rank(DupeWindow *dw)
        if (required_debug_level(2)) dupe_match_print_list(list);
 
        list = dupe_match_rank_sort(list);
+       if (options->sort_totals)
+               {
+               list = dupe_match_totals_sort(list);
+               }
+       if (required_debug_level(2)) dupe_match_print_list(list);
 
        g_list_free(dw->dupes);
        dw->dupes = list;
@@ -1753,6 +1796,40 @@ static void dupe_files_add(DupeWindow *dw, CollectionData *collection, CollectIn
 
        if (!di) return;
 
+       /* Ensure images in the lists have unique FileDatas */
+       GList *work;
+       DupeItem *di_list;
+       work = g_list_first(dw->list);
+       while (work)
+               {
+               di_list = work->data;
+               if (di_list->fd == di->fd)
+                       {
+                       return;
+                       }
+               else
+                       {
+                       work = work->next;
+                       }
+               }
+
+       if (dw->second_list)
+               {
+               work = g_list_first(dw->second_list);
+               while (work)
+                       {
+                       di_list = work->data;
+                       if (di_list->fd == di->fd)
+                               {
+                               return;
+                               }
+                       else
+                               {
+                               work = work->next;
+                               }
+                       }
+               }
+
        if (dw->second_drop)
                {
                dupe_second_add(dw, di);
@@ -2092,6 +2169,7 @@ static void dupe_menu_select_all_cb(GtkWidget *widget, gpointer data)
        DupeWindow *dw = data;
        GtkTreeSelection *selection;
 
+       options->duplicates_select_type = DUPE_SELECT_NONE;
        selection = gtk_tree_view_get_selection(GTK_TREE_VIEW(dw->listview));
        gtk_tree_selection_select_all(selection);
 }
@@ -2101,6 +2179,7 @@ static void dupe_menu_select_none_cb(GtkWidget *widget, gpointer data)
        DupeWindow *dw = data;
        GtkTreeSelection *selection;
 
+       options->duplicates_select_type = DUPE_SELECT_NONE;
        selection = gtk_tree_view_get_selection(GTK_TREE_VIEW(dw->listview));
        gtk_tree_selection_unselect_all(selection);
 }
@@ -2109,14 +2188,16 @@ static void dupe_menu_select_dupes_set1_cb(GtkWidget *widget, gpointer data)
 {
        DupeWindow *dw = data;
 
-       dupe_listview_select_dupes(dw, TRUE);
+       options->duplicates_select_type = DUPE_SELECT_GROUP1;
+       dupe_listview_select_dupes(dw, DUPE_SELECT_GROUP1);
 }
 
 static void dupe_menu_select_dupes_set2_cb(GtkWidget *widget, gpointer data)
 {
        DupeWindow *dw = data;
 
-       dupe_listview_select_dupes(dw, FALSE);
+       options->duplicates_select_type = DUPE_SELECT_GROUP2;
+       dupe_listview_select_dupes(dw, DUPE_SELECT_GROUP2);
 }
 
 static void dupe_menu_edit_cb(GtkWidget *widget, gpointer data)
@@ -2210,7 +2291,7 @@ static void dupe_menu_popup_destroy_cb(GtkWidget *widget, gpointer data)
        GList *editmenu_fd_list = data;
 
        filelist_free(editmenu_fd_list);
-}      
+}
 
 static GList *dupe_window_get_fd_list(DupeWindow *dw)
 {
@@ -2253,7 +2334,7 @@ static GtkWidget *dupe_menu_popup_main(DupeWindow *dw, DupeItem *di)
        menu_item_add_sensitive(menu, _("Select group _2 duplicates"), (dw->dupes != NULL),
                                G_CALLBACK(dupe_menu_select_dupes_set2_cb), dw);
        menu_item_add_divider(menu);
-       
+
        editmenu_fd_list = dupe_window_get_fd_list(dw);
        g_signal_connect(G_OBJECT(menu), "destroy",
                         G_CALLBACK(dupe_menu_popup_destroy_cb), editmenu_fd_list);
@@ -2595,6 +2676,15 @@ static void dupe_second_set_toggle_cb(GtkWidget *widget, gpointer data)
        dupe_window_recompare(dw);
 }
 
+static void dupe_sort_totals_toggle_cb(GtkWidget *widget, gpointer data)
+{
+       DupeWindow *dw = data;
+
+       options->sort_totals = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widget));
+       dupe_window_recompare(dw);
+
+}
+
 /*
  *-------------------------------------------------------------------
  * match type menu
@@ -2616,6 +2706,8 @@ static void dupe_menu_type_cb(GtkWidget *combo, gpointer data)
        if (!gtk_combo_box_get_active_iter(GTK_COMBO_BOX(combo), &iter)) return;
        gtk_tree_model_get(store, &iter, DUPE_MENU_COLUMN_MASK, &dw->match_mask, -1);
 
+       options->duplicates_match = dw->match_mask;
+
        dupe_window_recompare(dw);
 }
 
@@ -2790,6 +2882,7 @@ static void dupe_window_show_thumb_cb(GtkWidget *widget, gpointer data)
        DupeWindow *dw = data;
 
        dw->show_thumbs = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widget));
+       options->duplicates_thumbnails = dw->show_thumbs;
 
        if (dw->show_thumbs)
                {
@@ -2818,6 +2911,41 @@ static void dupe_window_show_thumb_cb(GtkWidget *widget, gpointer data)
        dupe_listview_set_height(dw->listview, dw->show_thumbs);
 }
 
+static void dupe_window_rotation_invariant_cb(GtkWidget *widget, gpointer data)
+{
+       DupeWindow *dw = data;
+
+       options->rot_invariant_sim = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widget));
+       dupe_window_recompare(dw);
+}
+
+static void dupe_window_custom_threshold_cb(GtkWidget *widget, gpointer data)
+{
+       DupeWindow *dw = data;
+       DupeMatchType match_type;
+       GtkTreeModel *store;
+       gboolean valid;
+       GtkTreeIter iter;
+
+       options->duplicates_similarity_threshold = gtk_spin_button_get_value_as_int(GTK_SPIN_BUTTON(widget));
+       dw->match_mask = DUPE_MATCH_SIM_CUSTOM;
+
+       store = gtk_combo_box_get_model(GTK_COMBO_BOX(dw->combo));
+       valid = gtk_tree_model_get_iter_first(store, &iter);
+       while (valid)
+               {
+               gtk_tree_model_get(store, &iter, DUPE_MENU_COLUMN_MASK, &match_type, -1);
+               if (match_type == DUPE_MATCH_SIM_CUSTOM)
+                       {
+                       break;
+                       }
+               valid = gtk_tree_model_iter_next(store, &iter);
+               }
+
+       gtk_combo_box_set_active_iter(GTK_COMBO_BOX(dw->combo), &iter);
+       dupe_window_recompare(dw);
+}
+
 static void dupe_popup_menu_pos_cb(GtkMenu *menu, gint *x, gint *y, gboolean *push_in, gpointer data)
 {
        GtkWidget *view = data;
@@ -2986,10 +3114,12 @@ static gboolean dupe_window_keypress_cb(GtkWidget *widget, GdkEventKey *event, g
                                        }
                                break;
                        case '1':
-                               dupe_listview_select_dupes(dw, TRUE);
+                               options->duplicates_select_type = DUPE_SELECT_GROUP1;
+                               dupe_listview_select_dupes(dw, DUPE_SELECT_GROUP1);
                                break;
                        case '2':
-                               dupe_listview_select_dupes(dw, FALSE);
+                               options->duplicates_select_type = DUPE_SELECT_GROUP2;
+                               dupe_listview_select_dupes(dw, DUPE_SELECT_GROUP2);
                                break;
                        case GDK_KEY_Menu:
                        case GDK_KEY_F10:
@@ -3010,6 +3140,9 @@ static gboolean dupe_window_keypress_cb(GtkWidget *widget, GdkEventKey *event, g
                                                       dupe_popup_menu_pos_cb, listview, 0, GDK_CURRENT_TIME);
                                        }
                                break;
+                       case GDK_KEY_F1:
+                               help_window_show("GuideReferenceKeyboardShortcuts.html#DuplicatesKeyboardShortcuts");
+                               break;
                        default:
                                stop_signal = FALSE;
                                break;
@@ -3068,7 +3201,7 @@ static gint dupe_window_delete(GtkWidget *widget, GdkEvent *event, gpointer data
 }
 
 /* collection and files can be NULL */
-DupeWindow *dupe_window_new(DupeMatchType match_mask)
+DupeWindow *dupe_window_new()
 {
        DupeWindow *dw;
        GtkWidget *vbox;
@@ -3083,7 +3216,18 @@ DupeWindow *dupe_window_new(DupeMatchType match_mask)
 
        dw = g_new0(DupeWindow, 1);
 
-       dw->match_mask = match_mask;
+       dw->match_mask = DUPE_MATCH_NAME;
+       if (options->duplicates_match == DUPE_MATCH_NAME) dw->match_mask = DUPE_MATCH_NAME;
+       if (options->duplicates_match == DUPE_MATCH_SIZE) dw->match_mask = DUPE_MATCH_SIZE;
+       if (options->duplicates_match == DUPE_MATCH_DATE) dw->match_mask = DUPE_MATCH_DATE;
+       if (options->duplicates_match == DUPE_MATCH_DIM) dw->match_mask = DUPE_MATCH_DIM;
+       if (options->duplicates_match == DUPE_MATCH_SUM) dw->match_mask = DUPE_MATCH_SUM;
+       if (options->duplicates_match == DUPE_MATCH_PATH) dw->match_mask = DUPE_MATCH_PATH;
+       if (options->duplicates_match == DUPE_MATCH_SIM_HIGH) dw->match_mask = DUPE_MATCH_SIM_HIGH;
+       if (options->duplicates_match == DUPE_MATCH_SIM_MED) dw->match_mask = DUPE_MATCH_SIM_MED;
+       if (options->duplicates_match == DUPE_MATCH_SIM_LOW) dw->match_mask = DUPE_MATCH_SIM_LOW;
+       if (options->duplicates_match == DUPE_MATCH_SIM_CUSTOM) dw->match_mask = DUPE_MATCH_SIM_CUSTOM;
+       if (options->duplicates_match == DUPE_MATCH_NAME_CI) dw->match_mask = DUPE_MATCH_NAME_CI;
 
        dw->window = window_new(GTK_WINDOW_TOPLEVEL, "dupe", NULL, NULL, _("Find duplicates"));
 
@@ -3189,12 +3333,21 @@ DupeWindow *dupe_window_new(DupeMatchType match_mask)
        gtk_widget_show(dw->combo);
 
        dw->button_thumbs = gtk_check_button_new_with_label(_("Thumbnails"));
+       dw->show_thumbs = options->duplicates_thumbnails;
        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(dw->button_thumbs), dw->show_thumbs);
        g_signal_connect(G_OBJECT(dw->button_thumbs), "toggled",
                         G_CALLBACK(dupe_window_show_thumb_cb), dw);
        gtk_box_pack_start(GTK_BOX(status_box), dw->button_thumbs, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(dw->button_thumbs);
 
+       dw->button_rotation_invariant = gtk_check_button_new_with_label(_("Ignore Rotation"));
+       gtk_widget_set_tooltip_text(GTK_WIDGET(dw->button_rotation_invariant), "Ignore image orientation");
+       gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(dw->button_rotation_invariant), options->rot_invariant_sim);
+       g_signal_connect(G_OBJECT(dw->button_rotation_invariant), "toggled",
+                        G_CALLBACK(dupe_window_rotation_invariant_cb), dw);
+       gtk_box_pack_start(GTK_BOX(status_box), dw->button_rotation_invariant, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_widget_show(dw->button_rotation_invariant);
+
        button = gtk_check_button_new_with_label(_("Compare two file sets"));
        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(button), dw->second_set);
        g_signal_connect(G_OBJECT(button), "toggled",
@@ -3215,8 +3368,31 @@ DupeWindow *dupe_window_new(DupeMatchType match_mask)
        gtk_container_add(GTK_CONTAINER(frame), dw->status_label);
        gtk_widget_show(dw->status_label);
 
+       button = gtk_check_button_new_with_label(_("Sort"));
+       gtk_widget_set_tooltip_text(GTK_WIDGET(button), "Sort by group totals");
+       gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(button), options->sort_totals);
+       g_signal_connect(G_OBJECT(button), "toggled",
+                        G_CALLBACK(dupe_sort_totals_toggle_cb), dw);
+       gtk_box_pack_start(GTK_BOX(status_box), button, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_widget_show(button);
+
+       label = gtk_label_new(_("Custom Threshold"));
+       gtk_box_pack_start(GTK_BOX(status_box), label, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_widget_show(label);
+       dw->custom_threshold = gtk_spin_button_new_with_range(1, 100, 1);
+       gtk_widget_set_tooltip_text(GTK_WIDGET(dw->custom_threshold), "Custom similarity threshold");
+       gtk_spin_button_set_value(GTK_SPIN_BUTTON(dw->custom_threshold), options->duplicates_similarity_threshold);
+       g_signal_connect(G_OBJECT(dw->custom_threshold), "value_changed",
+                                                                                                       G_CALLBACK(dupe_window_custom_threshold_cb), dw);
+       gtk_box_pack_start(GTK_BOX(status_box), dw->custom_threshold, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_widget_show(dw->custom_threshold);
+
        dw->extra_label = gtk_progress_bar_new();
        gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(dw->extra_label), 0.0);
+#if GTK_CHECK_VERSION(3,0,0)
+       gtk_progress_bar_set_text(GTK_PROGRESS_BAR(dw->extra_label), "");
+       gtk_progress_bar_set_show_text(GTK_PROGRESS_BAR(dw->extra_label), TRUE);
+#endif
        gtk_box_pack_end(GTK_BOX(status_box), dw->extra_label, FALSE, FALSE, 0);
        gtk_widget_show(dw->extra_label);
 
@@ -3355,8 +3531,6 @@ static void dupe_dnd_data_set(GtkWidget *widget, GdkDragContext *context,
                              guint time, gpointer data)
 {
        DupeWindow *dw = data;
-       gchar *uri_text;
-       gint length;
        GList *list;
 
        switch (info)
@@ -3369,7 +3543,6 @@ static void dupe_dnd_data_set(GtkWidget *widget, GdkDragContext *context,
                        filelist_free(list);
                        break;
                default:
-                       uri_text = NULL;
                        break;
                }
 }
@@ -3529,7 +3702,7 @@ static void dupe_notify_cb(FileData *fd, NotifyType type, gpointer data)
        if (!(type & NOTIFY_CHANGE) || !fd->change) return;
 
        DEBUG_1("Notify dupe: %s %04x", fd->path, type);
-       
+
        switch (fd->change->type)
                {
                case FILEDATA_CHANGE_MOVE: