Mergin several old merge requests
[geeqie.git] / src / dupe.c
index 94d3263..f9c624e 100644 (file)
 
 #define DUPE_DEF_WIDTH 800
 #define DUPE_DEF_HEIGHT 400
+#define DUPE_PROGRESS_PULSE_STEP 0.0001
 
-/* column assignment order (simply change them here) */
+/** column assignment order (simply change them here)
+ */
 enum {
        DUPE_COLUMN_POINTER = 0,
        DUPE_COLUMN_RANK,
@@ -71,11 +73,40 @@ enum {
        DUPE_COLUMN_PATH,
        DUPE_COLUMN_COLOR,
        DUPE_COLUMN_SET,
-       DUPE_COLUMN_COUNT       /* total columns */
+       DUPE_COLUMN_COUNT       /**< total columns */
 };
 
+typedef enum {
+       DUPE_MATCH = 0,
+       DUPE_NO_MATCH,
+       DUPE_NAME_MATCH
+} DUPE_CHECK_RESULT;
 
-static GList *dupe_window_list = NULL; /* list of open DupeWindow *s */
+typedef struct _DupeQueueItem DupeQueueItem;
+/** Used for similarity checks. One for each item pushed
+ * onto the thread pool.
+ */
+struct _DupeQueueItem
+{
+       DupeItem *needle;
+       DupeWindow *dw;
+       GList *work; /**< pointer into \a dw->list or \a dw->second_list (#DupeItem) */
+       gint index; /**< The order items pushed onto thread pool. Used to sort returned matches */
+};
+
+typedef struct _DupeSearchMatch DupeSearchMatch;
+/** Used for similarity checks thread. One for each pair match found.
+ */
+struct _DupeSearchMatch
+{
+       DupeItem *a; /**< \a a / \a b matched pair found */
+       DupeItem *b; /**< \a a / \a b matched pair found */
+       gdouble rank;
+       gint index; /**< The order items pushed onto thread pool. Used to sort returned matches */
+};
+
+static DupeMatchType param_match_mask;
+static GList *dupe_window_list = NULL; /**< list of open DupeWindow *s */
 
 /*
  * Well, after adding the 'compare two sets' option things got a little sloppy in here
@@ -97,16 +128,95 @@ static GtkWidget *dupe_menu_popup_second(DupeWindow *dw, DupeItem *di);
 static void dupe_dnd_init(DupeWindow *dw);
 
 static void dupe_notify_cb(FileData *fd, NotifyType type, gpointer data);
+static void delete_finished_cb(gboolean success, const gchar *dest_path, gpointer data);
 
 static GtkWidget *submenu_add_export(GtkWidget *menu, GtkWidget **menu_item, GCallback func, gpointer data);
 static void dupe_pop_menu_export_cb(GtkWidget *widget, gpointer data);
+
+static void dupe_init_list_cache(DupeWindow *dw);
+static void dupe_destroy_list_cache(DupeWindow *dw);
+static gboolean dupe_insert_in_list_cache(DupeWindow *dw, FileData *fd);
+
+static void dupe_match_link(DupeItem *a, DupeItem *b, gdouble rank);
+static gint dupe_match_link_exists(DupeItem *child, DupeItem *parent);
+
+/**
+ * @brief The function run in threads for similarity checks
+ * @param d1 #DupeQueueItem
+ * @param d2 #DupeWindow
+ * 
+ * Used only for similarity checks.\n
+ * Search \a dqi->list for \a dqi->needle and if a match is
+ * found, create a #DupeSearchMatch and add to \a dw->search_matches list\n
+ * If \a dw->abort is set, just increment \a dw->thread_count
+ */
+static void dupe_comparison_func(gpointer d1, gpointer d2)
+{
+       DupeQueueItem *dqi = d1;
+       DupeWindow *dw = d2;
+       DupeSearchMatch *dsm;
+       DupeItem *di;
+       GList *matches = NULL;
+       gdouble rank = 0;
+
+       if (!dw->abort)
+               {
+               GList *work = dqi->work;
+               while (work)
+                       {
+                       di = work->data;
+
+                       /* forward for second set, back for simple compare */
+                       if (dw->second_set)
+                               {
+                               work = work->next;
+                               }
+                       else
+                               {
+                               work = work->prev;
+                               }
+
+                       if (dupe_match(di, dqi->needle, dqi->dw->match_mask, &rank, TRUE))
+                               {
+                               dsm = g_new0(DupeSearchMatch, 1);
+                               dsm->a = di;
+                               dsm->b = dqi->needle;
+                               dsm->rank = rank;
+                               matches = g_list_prepend(matches, dsm);
+                               dsm->index = dqi->index;
+                               }
+
+                       if (dw->abort)
+                               {
+                               break;
+                               }
+                       }
+
+               matches = g_list_reverse(matches);
+               g_mutex_lock(&dw->search_matches_mutex);
+               dw->search_matches = g_list_concat(dw->search_matches, matches);
+               g_mutex_unlock(&dw->search_matches_mutex);
+               }
+
+       g_mutex_lock(&dw->thread_count_mutex);
+       dw->thread_count++;
+       g_mutex_unlock(&dw->thread_count_mutex);
+       g_free(dqi);
+}
+
 /*
  * ------------------------------------------------------------------
  * Window updates
  * ------------------------------------------------------------------
  */
 
-
+/**
+ * @brief Update display of status label
+ * @param dw 
+ * @param count_only 
+ * 
+ * 
+ */
 static void dupe_window_update_count(DupeWindow *dw, gboolean count_only)
 {
        gchar *text;
@@ -135,6 +245,12 @@ static void dupe_window_update_count(DupeWindow *dw, gboolean count_only)
        g_free(text);
 }
 
+/**
+ * @brief Returns time in µsec since Epoch
+ * @returns 
+ * 
+ * 
+ */
 static guint64 msec_time(void)
 {
        struct timeval tv;
@@ -149,6 +265,16 @@ static gint dupe_iterations(gint n)
        return (n * ((n + 1) / 2));
 }
 
+/**
+ * @brief 
+ * @param dw 
+ * @param status 
+ * @param value 
+ * @param force 
+ * 
+ * If \a status is blank, clear status bar text and set progress to zero. \n
+ * If \a force is not set, after 2 secs has elapsed, update time-to-go every 250 ms. 
+ */
 static void dupe_window_update_progress(DupeWindow *dw, const gchar *status, gdouble value, gboolean force)
 {
        const gchar *status_text;
@@ -419,6 +545,7 @@ static void dupe_item_read_cache(DupeItem *di)
                        {
                        di->width = cd->width;
                        di->height = cd->height;
+                       di->dimensions = (di->width << 16) + di->height;
                        }
                if (!di->md5sum && cd->have_md5sum)
                        {
@@ -700,7 +827,7 @@ static GList *dupe_listview_get_selection(DupeWindow *dw, GtkWidget *listview)
                        }
                work = work->next;
                }
-       g_list_foreach(slist, (GFunc)gtk_tree_path_free, NULL);
+       g_list_foreach(slist, (GFunc)tree_path_free_wrapper, NULL);
        g_list_free(slist);
 
        return g_list_reverse(list);
@@ -728,7 +855,7 @@ static gboolean dupe_listview_item_is_selected(DupeWindow *dw, DupeItem *di, Gtk
                if (di_n == di) found = TRUE;
                work = work->next;
                }
-       g_list_foreach(slist, (GFunc)gtk_tree_path_free, NULL);
+       g_list_foreach(slist, (GFunc)tree_path_free_wrapper, NULL);
        g_list_free(slist);
 
        return found;
@@ -778,6 +905,13 @@ static void dupe_listview_select_dupes(DupeWindow *dw, DupeSelectType parents)
  * ------------------------------------------------------------------
  */
 
+/**
+ * @brief Search \a parent->group for \a child (#DupeItem)
+ * @param child 
+ * @param parent 
+ * @returns 
+ * 
+ */
 static DupeMatch *dupe_match_find_match(DupeItem *child, DupeItem *parent)
 {
        GList *work;
@@ -792,6 +926,13 @@ static DupeMatch *dupe_match_find_match(DupeItem *child, DupeItem *parent)
        return NULL;
 }
 
+/**
+ * @brief Create #DupeMatch structure for \a child, and insert into \a parent->group list.
+ * @param child 
+ * @param parent 
+ * @param rank 
+ * 
+ */
 static void dupe_match_link_child(DupeItem *child, DupeItem *parent, gdouble rank)
 {
        DupeMatch *dm;
@@ -802,12 +943,26 @@ static void dupe_match_link_child(DupeItem *child, DupeItem *parent, gdouble ran
        parent->group = g_list_append(parent->group, dm);
 }
 
+/**
+ * @brief Link \a a & \a b as both parent and child
+ * @param a 
+ * @param b 
+ * @param rank 
+ * 
+ * Link \a a as child of \a b, and \a b as child of \a a
+ */
 static void dupe_match_link(DupeItem *a, DupeItem *b, gdouble rank)
 {
        dupe_match_link_child(a, b, rank);
        dupe_match_link_child(b, a, rank);
 }
 
+/**
+ * @brief Remove \a child #DupeMatch from \a parent->group list.
+ * @param child 
+ * @param parent 
+ * 
+ */
 static void dupe_match_unlink_child(DupeItem *child, DupeItem *parent)
 {
        DupeMatch *dm;
@@ -820,12 +975,27 @@ static void dupe_match_unlink_child(DupeItem *child, DupeItem *parent)
                }
 }
 
+/**
+ * @brief  Unlink \a a from \a b, and \a b from \a a
+ * @param a 
+ * @param b 
+ *
+ * Free the relevant #DupeMatch items from the #DupeItem group lists
+ */
 static void dupe_match_unlink(DupeItem *a, DupeItem *b)
 {
        dupe_match_unlink_child(a, b);
        dupe_match_unlink_child(b, a);
 }
 
+/**
+ * @brief 
+ * @param parent 
+ * @param unlink_children 
+ * 
+ * If \a unlink_children is set, unlink all entries in \a parent->group list. \n
+ * Free the \a parent->group list and set group_rank to zero;
+ */
 static void dupe_match_link_clear(DupeItem *parent, gboolean unlink_children)
 {
        GList *work;
@@ -846,11 +1016,25 @@ static void dupe_match_link_clear(DupeItem *parent, gboolean unlink_children)
        parent->group_rank = 0.0;
 }
 
+/**
+ * @brief Search \a parent->group list for \a child
+ * @param child 
+ * @param parent 
+ * @returns boolean TRUE/FALSE found/not found
+ * 
+ */
 static gint dupe_match_link_exists(DupeItem *child, DupeItem *parent)
 {
        return (dupe_match_find_match(child, parent) != NULL);
 }
 
+/**
+ * @brief  Search \a parent->group for \a child, and return \a child->rank
+ * @param child 
+ * @param parent 
+ * @returns \a dm->di->rank
+ *
+ */
 static gdouble dupe_match_link_rank(DupeItem *child, DupeItem *parent)
 {
        DupeMatch *dm;
@@ -861,6 +1045,15 @@ static gdouble dupe_match_link_rank(DupeItem *child, DupeItem *parent)
        return 0.0;
 }
 
+/**
+ * @brief Find highest rank in \a child->group
+ * @param child 
+ * @returns 
+ * 
+ * Search the #DupeMatch entries in the \a child->group list.
+ * Return the #DupeItem with the highest rank. If more than one have
+ * the same rank, the first encountered is used.
+ */
 static DupeItem *dupe_match_highest_rank(DupeItem *child)
 {
        DupeMatch *dr;
@@ -871,13 +1064,22 @@ static DupeItem *dupe_match_highest_rank(DupeItem *child)
        while (work)
                {
                DupeMatch *dm = work->data;
-               if (!dr || dm->rank > dr->rank) dr = dm;
+               if (!dr || dm->rank > dr->rank)
+                       {
+                       dr = dm;
+                       }
                work = work->next;
                }
 
        return (dr) ? dr->di : NULL;
 }
 
+/** 
+ * @brief Compute and store \a parent->group_rank
+ * @param parent 
+ * 
+ * Group_rank = (sum of all child ranks) / n
+ */
 static void dupe_match_rank_update(DupeItem *parent)
 {
        GList *work;
@@ -920,6 +1122,13 @@ static DupeItem *dupe_match_find_parent(DupeWindow *dw, DupeItem *child)
        return NULL;
 }
 
+/**
+ * @brief 
+ * @param work (#DupeItem) dw->list or dw->second_list
+ * 
+ * Unlink all #DupeItem-s in \a work.
+ * Do not unlink children.
+ */
 static void dupe_match_reset_list(GList *work)
 {
        while (work)
@@ -986,11 +1195,25 @@ static void dupe_match_print_list(GList *list)
 }
 
 /* level 3, unlinking and orphan handling */
+/**
+ * @brief 
+ * @param child 
+ * @param parent \a di from \a child->group
+ * @param[inout] list \a dw->list sorted by rank (#DupeItem)
+ * @param dw 
+ * @returns modified \a list
+ *
+ * Called for each entry in \a child->group (#DupeMatch) with \a parent set to \a dm->di. \n
+ * Find the highest rank #DupeItem of the \a parent's children. \n
+ * If that is == \a child OR
+ * highest rank #DupeItem of \a child == \a parent then FIXME:
+ * 
+ */
 static GList *dupe_match_unlink_by_rank(DupeItem *child, DupeItem *parent, GList *list, DupeWindow *dw)
 {
-       DupeItem *best;
+       DupeItem *best = NULL;
 
-       best = dupe_match_highest_rank(parent);
+       best = dupe_match_highest_rank(parent); // highest rank in parent->group
        if (best == child || dupe_match_highest_rank(child) == parent)
                {
                GList *work;
@@ -1018,7 +1241,7 @@ static GList *dupe_match_unlink_by_rank(DupeItem *child, DupeItem *parent, GList
                                }
                        }
 
-               rank = dupe_match_link_rank(child, parent);
+               rank = dupe_match_link_rank(child, parent); // child->rank
                dupe_match_link_clear(parent, TRUE);
                dupe_match_link(child, parent, rank);
                list = g_list_remove(list, parent);
@@ -1034,6 +1257,16 @@ static GList *dupe_match_unlink_by_rank(DupeItem *child, DupeItem *parent, GList
 }
 
 /* level 2 */
+/**
+ * @brief 
+ * @param[inout] list \a dw->list sorted by rank (#DupeItem)
+ * @param di 
+ * @param dw 
+ * @returns modified \a list
+ * 
+ * Called for each entry in \a list.
+ * Call unlink for each child in \a di->group
+ */
 static GList *dupe_match_group_filter(GList *list, DupeItem *di, DupeWindow *dw)
 {
        GList *work;
@@ -1050,6 +1283,15 @@ static GList *dupe_match_group_filter(GList *list, DupeItem *di, DupeWindow *dw)
 }
 
 /* level 1 (top) */
+/**
+ * @brief 
+ * @param[inout] list \a dw->list sorted by rank (#DupeItem)
+ * @param dw 
+ * @returns Filtered \a list
+ * 
+ * Called once.
+ * Call group filter for each \a di in \a list
+ */
 static GList *dupe_match_group_trim(GList *list, DupeWindow *dw)
 {
        GList *work;
@@ -1076,6 +1318,12 @@ static gint dupe_match_sort_groups_cb(gconstpointer a, gconstpointer b)
        return 0;
 }
 
+/**
+ * @brief Sorts the children of each #DupeItem in \a list
+ * @param list #DupeItem
+ * 
+ * Sorts the #DupeItem->group children on rank
+ */
 static void dupe_match_sort_groups(GList *list)
 {
        GList *work;
@@ -1103,6 +1351,14 @@ static gint dupe_match_totals_sort_cb(gconstpointer a, gconstpointer b)
        return 0;
 }
 
+/**
+ * @brief Callback for group_rank sort
+ * @param a 
+ * @param b 
+ * @returns 
+ * 
+ * 
+ */
 static gint dupe_match_rank_sort_cb(gconstpointer a, gconstpointer b)
 {
        DupeItem *da = (DupeItem *)a;
@@ -1113,7 +1369,15 @@ static gint dupe_match_rank_sort_cb(gconstpointer a, gconstpointer b)
        return 0;
 }
 
-/* returns allocated GList of dupes sorted by rank */
+/**
+ * @brief Sorts \a source_list by group-rank
+ * @param source_list #DupeItem
+ * @returns 
+ *
+ * Computes group_rank for each #DupeItem. \n
+ * Items with no group list are ignored.
+ * Returns allocated GList of #DupeItem-s sorted by group_rank
+ */
 static GList *dupe_match_rank_sort(GList *source_list)
 {
        GList *list = NULL;
@@ -1126,7 +1390,7 @@ static GList *dupe_match_rank_sort(GList *source_list)
 
                if (di->group)
                        {
-                       dupe_match_rank_update(di);
+                       dupe_match_rank_update(di); // Compute and store group_rank for di
                        list = g_list_prepend(list, di);
                        }
 
@@ -1136,7 +1400,13 @@ static GList *dupe_match_rank_sort(GList *source_list)
        return g_list_sort(list, dupe_match_rank_sort_cb);
 }
 
-/* returns allocated GList of dupes sorted by totals */
+/**
+ * @brief Returns allocated GList of dupes sorted by totals
+ * @param source_list 
+ * @returns 
+ * 
+ * 
+ */
 static GList *dupe_match_totals_sort(GList *source_list)
 {
        source_list = g_list_sort(source_list, dupe_match_totals_sort_cb);
@@ -1145,11 +1415,17 @@ static GList *dupe_match_totals_sort(GList *source_list)
        return g_list_reverse(source_list);
 }
 
+/**
+ * @brief 
+ * @param dw 
+ * 
+ * Called once.
+ */
 static void dupe_match_rank(DupeWindow *dw)
 {
        GList *list;
 
-       list = dupe_match_rank_sort(dw->list);
+       list = dupe_match_rank_sort(dw->list); // sorted by group_rank, no-matches filtered out
 
        if (required_debug_level(2)) dupe_match_print_list(list);
 
@@ -1178,6 +1454,18 @@ static void dupe_match_rank(DupeWindow *dw)
  * ------------------------------------------------------------------
  */
 
+/**
+ * @brief 
+ * @param[in] a 
+ * @param[in] b 
+ * @param[in] mask 
+ * @param[out] rank 
+ * @param[in] fast 
+ * @returns 
+ * 
+ * For similarity checks, compute rank - (similarity factor between a and b). \n
+ * If rank < user-set sim value, returns FALSE.
+ */
 static gboolean dupe_match(DupeItem *a, DupeItem *b, DupeMatchType mask, gdouble *rank, gint fast)
 {
        *rank = 0.0;
@@ -1294,13 +1582,447 @@ static gboolean dupe_match(DupeItem *a, DupeItem *b, DupeMatchType mask, gdouble
 
                DEBUG_3("similar: %32s %32s = %f", a->fd->name, b->fd->name, f);
                }
-
-       return TRUE;
+
+       return TRUE;
+}
+
+/**
+ * @brief  Determine if there is a match
+ * @param di1 
+ * @param di2 
+ * @param data 
+ * @returns DUPE_MATCH/DUPE_NO_MATCH/DUPE_NAME_MATCH
+ *                     DUPE_NAME_MATCH is used for name != contents searches:
+ *                                                     the name and content match i.e.
+ *                                                     no match, but keep searching
+ * 
+ * Called when stepping down the array looking for adjacent matches,
+ * and from the 2nd set search.
+ * 
+ * Is not used for similarity checks.
+ */
+static DUPE_CHECK_RESULT dupe_match_check(DupeItem *di1, DupeItem *di2, gpointer data)
+{
+       DupeWindow *dw = data;
+       DupeMatchType mask = dw->match_mask;
+
+       if (mask & DUPE_MATCH_ALL)
+               {
+               return DUPE_MATCH;
+               }
+       if (mask & DUPE_MATCH_PATH)
+               {
+               if (utf8_compare(di1->fd->path, di2->fd->path, TRUE) != 0)
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+       if (mask & DUPE_MATCH_NAME)
+               {
+               if (g_strcmp0(di1->fd->collate_key_name, di2->fd->collate_key_name) != 0)
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+       if (mask & DUPE_MATCH_NAME_CI)
+               {
+               if (g_strcmp0(di1->fd->collate_key_name_nocase, di2->fd->collate_key_name_nocase) != 0 )
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+       if (mask & DUPE_MATCH_NAME_CONTENT)
+               {
+               if (g_strcmp0(di1->fd->collate_key_name, di2->fd->collate_key_name) == 0)
+                       {
+                       if (g_strcmp0(di1->md5sum, di2->md5sum) == 0)
+                               {
+                               return DUPE_NAME_MATCH;
+                               }
+                       }
+               else
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+       if (mask & DUPE_MATCH_NAME_CI_CONTENT)
+               {
+               if (strcmp(di1->fd->collate_key_name_nocase, di2->fd->collate_key_name_nocase) == 0)
+                       {
+                       if (g_strcmp0(di1->md5sum, di2->md5sum) == 0)
+                               {
+                               return DUPE_NAME_MATCH;
+                               }
+                       }
+               else
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+       if (mask & DUPE_MATCH_SIZE)
+               {
+               if (di1->fd->size != di2->fd->size)
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+       if (mask & DUPE_MATCH_DATE)
+               {
+               if (di1->fd->date != di2->fd->date)
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+       if (mask & DUPE_MATCH_SUM)
+               {
+               if (g_strcmp0(di1->md5sum, di2->md5sum) != 0)
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+       if (mask & DUPE_MATCH_DIM)
+               {
+               if (di1->dimensions != di2->dimensions)
+                       {
+                       return DUPE_NO_MATCH;
+                       }
+               }
+
+       return DUPE_MATCH;
+}
+
+/**
+ * @brief The callback for the binary search
+ * @param a 
+ * @param b 
+ * @param param_match_mask
+ * @returns negative/0/positive
+ * 
+ * Is not used for similarity checks.
+ *
+ * Used only when two file sets are used.
+ * Requires use of a global for param_match_mask because there is no
+ * g_array_binary_search_with_data() function in glib.
+ */
+static gint dupe_match_binary_search_cb(gconstpointer a, gconstpointer b)
+{
+       const DupeItem *di1 = *((DupeItem **) a);
+       const DupeItem *di2 = b;
+       DupeMatchType mask = param_match_mask;
+
+       if (mask & DUPE_MATCH_ALL)
+               {
+               return 0;
+               }
+       if (mask & DUPE_MATCH_PATH)
+               {
+               return utf8_compare(di1->fd->path, di2->fd->path, TRUE);
+               }
+       if (mask & DUPE_MATCH_NAME)
+               {
+               return g_strcmp0(di1->fd->collate_key_name, di2->fd->collate_key_name);
+               }
+       if (mask & DUPE_MATCH_NAME_CI)
+               {
+               return strcmp(di1->fd->collate_key_name_nocase, di2->fd->collate_key_name_nocase);
+               }
+       if (mask & DUPE_MATCH_NAME_CONTENT)
+               {
+               return g_strcmp0(di1->fd->collate_key_name, di2->fd->collate_key_name);
+               }
+       if (mask & DUPE_MATCH_NAME_CI_CONTENT)
+               {
+               return strcmp(di1->fd->collate_key_name_nocase, di2->fd->collate_key_name_nocase);
+               }
+       if (mask & DUPE_MATCH_SIZE)
+               {
+               return (di1->fd->size - di2->fd->size);
+               }
+       if (mask & DUPE_MATCH_DATE)
+               {
+               return (di1->fd->date - di2->fd->date);
+               }
+       if (mask & DUPE_MATCH_SUM)
+               {
+               return g_strcmp0(di1->md5sum, di2->md5sum);
+               }
+       if (mask & DUPE_MATCH_DIM)
+               {
+               return (di1->dimensions - di2->dimensions);
+               }
+
+       return 0;
+}
+
+/**
+ * @brief The callback for the array sort
+ * @param a 
+ * @param b 
+ * @param data 
+ * @returns negative/0/positive
+ * 
+ * Is not used for similarity checks.
+*/
+static gint dupe_match_sort_cb(gconstpointer a, gconstpointer b, gpointer data)
+{
+       const DupeItem *di1 = *((DupeItem **) a);
+       const DupeItem *di2 = *((DupeItem **) b);
+       DupeWindow *dw = data;
+       DupeMatchType mask = dw->match_mask;
+
+       if (mask & DUPE_MATCH_ALL)
+               {
+               return 0;
+               }
+       if (mask & DUPE_MATCH_PATH)
+               {
+               return utf8_compare(di1->fd->path, di2->fd->path, TRUE);
+               }
+       if (mask & DUPE_MATCH_NAME)
+               {
+               return g_strcmp0(di1->fd->collate_key_name, di2->fd->collate_key_name);
+               }
+       if (mask & DUPE_MATCH_NAME_CI)
+               {
+               return strcmp(di1->fd->collate_key_name_nocase, di2->fd->collate_key_name_nocase);
+               }
+       if (mask & DUPE_MATCH_NAME_CONTENT)
+               {
+               return g_strcmp0(di1->fd->collate_key_name, di2->fd->collate_key_name);
+               }
+       if (mask & DUPE_MATCH_NAME_CI_CONTENT)
+               {
+               return strcmp(di1->fd->collate_key_name_nocase, di2->fd->collate_key_name_nocase);
+               }
+       if (mask & DUPE_MATCH_SIZE)
+               {
+               return (di1->fd->size - di2->fd->size);
+               }
+       if (mask & DUPE_MATCH_DATE)
+               {
+               return (di1->fd->date - di2->fd->date);
+               }
+       if (mask & DUPE_MATCH_SUM)
+               {
+               if (di1->md5sum[0] == '\0' || di2->md5sum[0] == '\0')
+                   {
+                       return -1;
+                       }
+               else
+                       {
+                       return strcmp(di1->md5sum, di2->md5sum);
+                       }
+               }
+       if (mask & DUPE_MATCH_DIM)
+               {
+               if (!di1 || !di2 || !di1->width || !di1->height || !di2->width || !di2->height)
+                       {
+                       return -1;
+                       }
+               return (di1->dimensions - di2->dimensions);
+               }
+
+       return 0; // should not execute
+}
+
+/**
+ * @brief Check for duplicate matches
+ * @param dw 
+ *
+ * Is not used for similarity checks.
+ *
+ * Loads the file sets into an array and sorts on the searched
+ * for parameter.
+ * 
+ * If one file set, steps down the array looking for adjacent equal values.
+ * 
+ * If two file sets, steps down the first set and for each value
+ * does a binary search for matches in the second set.
+ */ 
+static void dupe_array_check(DupeWindow *dw )
+{
+       GArray *array_set1;
+       GArray *array_set2;
+       GList *work;
+       gint i_set1;
+       gint i_set2;
+       DUPE_CHECK_RESULT check_result;
+       DupeMatchType mask = dw->match_mask;
+       param_match_mask = dw->match_mask;
+       guint out_match_index;
+       gboolean match_found = FALSE;;
+
+       if (!dw->list) return;
+
+       array_set1 = g_array_new(TRUE, TRUE, sizeof(gpointer));
+       array_set2 = g_array_new(TRUE, TRUE, sizeof(gpointer));
+       dupe_match_reset_list(dw->list);
+
+       work = dw->list;
+       while (work)
+               {
+               DupeItem *di = work->data;
+               g_array_append_val(array_set1, di);
+               work = work->next;
+               }
+
+       g_array_sort_with_data(array_set1, dupe_match_sort_cb, dw);
+
+       if (dw->second_set)
+               {
+               /* Two sets - nothing can be done until a second set is loaded */
+               if (dw->second_list)
+                       {
+                       work = dw->second_list;
+                       while (work)
+                               {
+                               DupeItem *di = work->data;
+                               g_array_append_val(array_set2, (work->data));
+                               work = work->next;
+                               }
+                       g_array_sort_with_data(array_set2, dupe_match_sort_cb, dw);
+
+                       for (i_set1 = 0; i_set1 <= (gint)(array_set1->len) - 1; i_set1++)
+                               {
+                               DupeItem *di1 = g_array_index(array_set1, gpointer, i_set1);
+                               DupeItem *di2 = NULL;
+                               /* If multiple identical entries in set 1, use the last one */
+                               if (i_set1 < (gint)(array_set1->len) - 2)
+                                       {
+                                       di2 = g_array_index(array_set1, gpointer, i_set1 + 1);
+                                       check_result = dupe_match_check(di1, di2, dw);
+                                       if (check_result == DUPE_MATCH || check_result == DUPE_NAME_MATCH)
+                                               {
+                                               continue;
+                                               }
+                                       }
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION >= 62))
+                               match_found = g_array_binary_search(array_set2, di1, dupe_match_binary_search_cb, &out_match_index);
+#else
+                               gint i;
+
+                               match_found = FALSE;
+                               for(i=0; i < array_set2->len; i++)
+                                       {
+                                       di2 = g_array_index(array_set2,  gpointer, i);
+                                       check_result = dupe_match_check(di1, di2, dw);
+                                       if (check_result == DUPE_MATCH)
+                                               {
+                                               match_found = TRUE;
+                                               out_match_index = i;
+                                               break;
+                                               }
+                                       }
+#endif
+
+                               if (match_found)
+                                       {
+                                       di2 = g_array_index(array_set2, gpointer, out_match_index);
+
+                                       check_result = dupe_match_check(di1, di2, dw);
+                                       if (check_result == DUPE_MATCH || check_result == DUPE_NAME_MATCH)
+                                               {
+                                               if (check_result == DUPE_MATCH)
+                                                       {
+                                                       dupe_match_link(di2, di1, 0.0);
+                                                       }
+                                               i_set2 = out_match_index + 1;
+
+                                               if (i_set2 > (gint)(array_set2->len) - 1)
+                                                       {
+                                                       break;
+                                                       }
+                                               /* Look for multiple matches in set 2 for item di1 */
+                                               di2 = g_array_index(array_set2, gpointer, i_set2);
+                                               check_result = dupe_match_check(di1, di2, dw);
+                                               while (check_result == DUPE_MATCH || check_result == DUPE_NAME_MATCH)
+                                                       {
+                                                       if (check_result == DUPE_MATCH)
+                                                               {
+                                                               dupe_match_link(di2, di1, 0.0);
+                                                               }
+                                                       i_set2++;
+                                                       if (i_set2 > (gint)(array_set2->len) - 1)
+                                                               {
+                                                               break;
+                                                               }
+                                                       di2 = g_array_index(array_set2, gpointer, i_set2);
+                                                       check_result = dupe_match_check(di1, di2, dw);
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+       else
+               {
+               /* File set 1 only */
+               g_list_free(dw->dupes);
+               dw->dupes = NULL;
+
+               if ((gint)(array_set1->len) > 1)
+                       {
+                       for (i_set1 = 0; i_set1 <= (gint)(array_set1->len) - 2; i_set1++)
+                               {
+                               DupeItem *di1 = g_array_index(array_set1, gpointer, i_set1);
+                               DupeItem *di2 = g_array_index(array_set1, gpointer, i_set1 + 1);
+
+                               check_result = dupe_match_check(di1, di2, dw);
+                               if (check_result == DUPE_MATCH || check_result == DUPE_NAME_MATCH)
+                                       {
+                                       if (check_result == DUPE_MATCH)
+                                               {
+                                               dupe_match_link(di2, di1, 0.0);
+                                               }
+                                       i_set1++;
+
+                                       if ( i_set1 + 1 > (gint)(array_set1->len) - 1)
+                                               {
+                                               break;
+                                               }
+                                       /* Look for multiple matches for item di1 */
+                                       di2 = g_array_index(array_set1, gpointer, i_set1 + 1);
+                                       check_result = dupe_match_check(di1, di2, dw);
+                                       while (check_result == DUPE_MATCH || check_result == DUPE_NAME_MATCH)
+                                               {
+                                               if (check_result == DUPE_MATCH)
+                                                       {
+                                                       dupe_match_link(di2, di1, 0.0);
+                                                       }
+                                               i_set1++;
+
+                                               if (i_set1 + 1 > (gint)(array_set1->len) - 1)
+                                                       {
+                                                       break;
+                                                       }
+                                               di2 = g_array_index(array_set1, gpointer, i_set1 + 1);
+                                               check_result = dupe_match_check(di1, di2, dw);
+                                               }
+                                       }
+                               }
+                       }
+               }
+       g_array_free(array_set1, TRUE);
+       g_array_free(array_set2, TRUE);
 }
 
+/**
+ * @brief Look for similarity match
+ * @param dw 
+ * @param needle 
+ * @param start 
+ * 
+ * Only used for similarity checks.\n
+ * Called from dupe_check_cb.
+ * Called for each entry in the list.
+ * Steps through the list looking for matches against needle.
+ * Pushes a #DupeQueueItem onto thread pool queue.
+ */
 static void dupe_list_check_match(DupeWindow *dw, DupeItem *needle, GList *start)
 {
        GList *work;
+       DupeQueueItem *dqi;
 
        if (dw->second_set)
                {
@@ -1315,26 +2037,12 @@ static void dupe_list_check_match(DupeWindow *dw, DupeItem *needle, GList *start
                work = g_list_last(dw->list);
                }
 
-       while (work)
-               {
-               DupeItem *di = work->data;
-
-               /* speed opt: forward for second set, back for simple compare */
-               if (dw->second_set)
-                       work = work->next;
-               else
-                       work = work->prev;
-
-               if (!dupe_match_link_exists(needle, di))
-                       {
-                       gdouble rank;
-
-                       if (dupe_match(di, needle, dw->match_mask, &rank, TRUE))
-                               {
-                               dupe_match_link(di, needle, rank);
-                               }
-                       }
-               }
+       dqi = g_new0(DupeQueueItem, 1);
+       dqi->needle = needle;
+       dqi->dw = dw;
+       dqi->work = work;
+       dqi->index = dw->queue_count;
+       g_thread_pool_push(dw->dupe_comparison_thread_pool, dqi, NULL);
 }
 
 /*
@@ -1461,10 +2169,45 @@ static void dupe_thumb_step(DupeWindow *dw)
 
 static void dupe_check_stop(DupeWindow *dw)
 {
-       if (dw->idle_id || dw->img_loader || dw->thumb_loader)
+       if (dw->idle_id > 0)
                {
                g_source_remove(dw->idle_id);
                dw->idle_id = 0;
+               }
+
+       dw->abort = TRUE;
+
+       while (dw->thread_count < dw->queue_count) // Wait for the queue to empty
+               {
+               dupe_window_update_progress(dw, NULL, 0.0, FALSE);
+               widget_set_cursor(dw->listview, -1);
+               }
+
+       g_list_free(dw->search_matches);
+       dw->search_matches = NULL;
+
+       if (dw->idle_id || dw->img_loader || dw->thumb_loader)
+               {
+               if (dw->idle_id > 0)
+                       {
+                       g_source_remove(dw->idle_id);
+                       dw->idle_id = 0;
+                       }
+               dupe_window_update_progress(dw, NULL, 0.0, FALSE);
+               widget_set_cursor(dw->listview, -1);
+               }
+
+       if (dw->add_files_queue_id)
+               {
+               g_source_remove(dw->add_files_queue_id);
+               dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
+               gtk_widget_set_sensitive(dw->controls_box, TRUE);
+               if (g_list_length(dw->add_files_queue) > 0)
+                       {
+                       filelist_free(dw->add_files_queue);
+                       }
+               dw->add_files_queue = NULL;
                dupe_window_update_progress(dw, NULL, 0.0, FALSE);
                widget_set_cursor(dw->listview, -1);
                }
@@ -1541,18 +2284,22 @@ static GList *dupe_setup_point_step(DupeWindow *dw, GList *p)
        return NULL;
 }
 
-static gboolean dupe_check_cb(gpointer data)
+/**
+ * @brief Generates the sumcheck or dimensions
+ * @param list Set1 or set2
+ * @returns TRUE/FALSE = not completed/completed
+ * 
+ * Ensures that the DIs contain the MD5SUM or dimensions for all items in
+ * the list. One item at a time. Re-enters if not completed.
+ */
+static gboolean create_checksums_dimensions(DupeWindow *dw, GList *list)
 {
-       DupeWindow *dw = data;
-
-       if (!dw->idle_id) return FALSE;
-
-       if (!dw->setup_done)
-               {
-               if ((dw->match_mask & DUPE_MATCH_SUM) &&
-                   !(dw->setup_mask & DUPE_MATCH_SUM) )
+               if ((dw->match_mask & DUPE_MATCH_SUM) ||
+                       (dw->match_mask & DUPE_MATCH_NAME_CONTENT) ||
+                       (dw->match_mask & DUPE_MATCH_NAME_CI_CONTENT))
                        {
-                       if (!dw->setup_point) dw->setup_point = dw->list;
+                       /* MD5SUM only */
+                       if (!dw->setup_point) dw->setup_point = list; // setup_point clear on 1st entry
 
                        while (dw->setup_point)
                                {
@@ -1569,7 +2316,10 @@ static gboolean dupe_check_cb(gpointer data)
                                        if (options->thumbnails.enable_caching)
                                                {
                                                dupe_item_read_cache(di);
-                                               if (di->md5sum) return TRUE;
+                                               if (di->md5sum)
+                                                       {
+                                                       return TRUE;
+                                                       }
                                                }
 
                                        di->md5sum = md5_text_from_file_utf8(di->fd->path, "");
@@ -1580,13 +2330,13 @@ static gboolean dupe_check_cb(gpointer data)
                                        return TRUE;
                                        }
                                }
-                       dw->setup_mask |= DUPE_MATCH_SUM;
                        dupe_setup_reset(dw);
                        }
-               if ((dw->match_mask & DUPE_MATCH_DIM) &&
-                   !(dw->setup_mask & DUPE_MATCH_DIM) )
+
+               if ((dw->match_mask & DUPE_MATCH_DIM)  )
                        {
-                       if (!dw->setup_point) dw->setup_point = dw->list;
+                       /* Dimensions only */
+                       if (!dw->setup_point) dw->setup_point = list;
 
                        while (dw->setup_point)
                                {
@@ -1602,10 +2352,14 @@ static gboolean dupe_check_cb(gpointer data)
                                        if (options->thumbnails.enable_caching)
                                                {
                                                dupe_item_read_cache(di);
-                                               if (di->width != 0 || di->height != 0) return TRUE;
+                                               if (di->width != 0 || di->height != 0)
+                                                       {
+                                                       return TRUE;
+                                                       }
                                                }
 
                                        image_load_dimensions(di->fd, &di->width, &di->height);
+                                       di->dimensions = (di->width << 16) + di->height;
                                        if (options->thumbnails.enable_caching)
                                                {
                                                dupe_item_write_cache(di);
@@ -1613,15 +2367,69 @@ static gboolean dupe_check_cb(gpointer data)
                                        return TRUE;
                                        }
                                }
-                       dw->setup_mask |= DUPE_MATCH_DIM;
                        dupe_setup_reset(dw);
                        }
+
+       return FALSE;
+}
+
+/**
+ * @brief Compare func. for sorting search matches
+ * @param a #DupeSearchMatch
+ * @param b #DupeSearchMatch
+ * @returns 
+ * 
+ * Used only for similarity checks\n
+ * Sorts search matches on order they were inserted into the pool queue
+ */
+static gint sort_func(gconstpointer a, gconstpointer b)
+{
+       return (((DupeSearchMatch *)a)->index - ((DupeSearchMatch *)b)->index);
+}
+
+/**
+ * @brief Check set 1 (and set 2) for matches
+ * @param data DupeWindow
+ * @returns TRUE/FALSE = not completed/completed
+ * 
+ * Initiated from start, loader done and item remove
+ *
+ * On first entry generates di->MD5SUM, di->dimensions and sim data,
+ * and updates the cache.
+ */
+static gboolean dupe_check_cb(gpointer data)
+{
+       DupeWindow *dw = data;
+       DupeSearchMatch *search_match_list_item;
+
+       if (!dw->idle_id)
+               {
+               return FALSE;
+               }
+
+       if (!dw->setup_done) /* Clear on 1st entry */
+               {
+               if (dw->list)
+                       {
+                       if (create_checksums_dimensions(dw, dw->list))
+                               {
+                               return TRUE;
+                               }
+                       }
+               if (dw->second_list)
+                       {
+                       if (create_checksums_dimensions(dw, dw->second_list))
+                               {
+                               return TRUE;
+                               }
+                       }
                if ((dw->match_mask & DUPE_MATCH_SIM_HIGH ||
                     dw->match_mask & DUPE_MATCH_SIM_MED ||
                     dw->match_mask & DUPE_MATCH_SIM_LOW ||
                     dw->match_mask & DUPE_MATCH_SIM_CUSTOM) &&
                    !(dw->setup_mask & DUPE_MATCH_SIM_MED) )
                        {
+                       /* Similarity only */
                        if (!dw->setup_point) dw->setup_point = dw->list;
 
                        while (dw->setup_point)
@@ -1666,20 +2474,73 @@ static gboolean dupe_check_cb(gpointer data)
                        dw->setup_mask |= DUPE_MATCH_SIM_MED;
                        dupe_setup_reset(dw);
                        }
+
+               /* End of setup not done */
                dupe_window_update_progress(dw, _("Comparing..."), 0.0, FALSE);
                dw->setup_done = TRUE;
                dupe_setup_reset(dw);
                dw->setup_count = g_list_length(dw->list);
                }
 
+       /* Setup done - dw->working set to NULL below
+        * Set before 1st entry: dw->working = g_list_last(dw->list)
+        * Set before 1st entry: dw->setup_count = g_list_length(dw->list)
+        */
        if (!dw->working)
                {
-               if (dw->setup_count > 0)
+               /* Similarity check threads may still be running */
+               if (dw->setup_count > 0 && (dw->match_mask == DUPE_MATCH_SIM_HIGH ||
+                       dw->match_mask == DUPE_MATCH_SIM_MED ||
+                       dw->match_mask == DUPE_MATCH_SIM_LOW ||
+                       dw->match_mask == DUPE_MATCH_SIM_CUSTOM))
                        {
+                       if( dw->thread_count < dw->queue_count)
+                               {
+                               dupe_window_update_progress(dw, _("Comparing..."), 0.0, FALSE);
+
+                               return TRUE;
+                               }
+
+                       if (dw->search_matches_sorted == NULL)
+                               {
+                               dw->search_matches_sorted = g_list_sort(dw->search_matches, sort_func);
+                               dupe_setup_reset(dw);
+                               }
+
+                       while (dw->search_matches_sorted)
+                               {
+                               dw->setup_n++;
+                               dupe_window_update_progress(dw, _("Sorting..."), 0.0, FALSE);
+                               search_match_list_item = dw->search_matches_sorted->data;
+
+                               if (!dupe_match_link_exists(search_match_list_item->a, search_match_list_item->b))
+                                       {
+                                       dupe_match_link(search_match_list_item->a, search_match_list_item->b, search_match_list_item->rank);
+                                       }
+
+                               dw->search_matches_sorted = dw->search_matches_sorted->next;
+
+                               if (dw->search_matches_sorted != NULL)
+                                       {
+                                       return TRUE;
+                                       }
+                               }
+                       g_list_free(dw->search_matches);
+                       dw->search_matches = NULL;
+                       g_list_free(dw->search_matches_sorted);
+                       dw->search_matches_sorted = NULL;
                        dw->setup_count = 0;
-                       dupe_window_update_progress(dw, _("Sorting..."), 1.0, TRUE);
-                       return TRUE;
                        }
+               else
+                       {
+                       if (dw->setup_count > 0)
+                               {
+                               dw->setup_count = 0;
+                               dupe_window_update_progress(dw, _("Sorting..."), 1.0, TRUE);
+                               return TRUE;
+                               }
+                       }
+
                dw->idle_id = 0;
                dupe_window_update_progress(dw, NULL, 0.0, FALSE);
 
@@ -1694,13 +2555,32 @@ static gboolean dupe_check_cb(gpointer data)
                widget_set_cursor(dw->listview, -1);
 
                return FALSE;
+               /* The end */
                }
 
-       dupe_list_check_match(dw, (DupeItem *)dw->working->data, dw->working);
-       dupe_window_update_progress(dw, _("Comparing..."), dw->setup_count == 0 ? 0.0 : (gdouble) dw->setup_n / dw->setup_count, FALSE);
-       dw->setup_n++;
+       /* Setup done - working */
+       if (dw->match_mask == DUPE_MATCH_SIM_HIGH ||
+               dw->match_mask == DUPE_MATCH_SIM_MED ||
+               dw->match_mask == DUPE_MATCH_SIM_LOW ||
+               dw->match_mask == DUPE_MATCH_SIM_CUSTOM)
+               {
+               /* This is the similarity comparison */
+               dupe_list_check_match(dw, (DupeItem *)dw->working->data, dw->working);
+               dupe_window_update_progress(dw, _("Queuing..."), dw->setup_count == 0 ? 0.0 : (gdouble) dw->setup_n / dw->setup_count, FALSE);
+               dw->setup_n++;
+               dw->queue_count++;
 
-       dw->working = dw->working->prev;
+               dw->working = dw->working->prev; /* Is NULL when complete */
+               }
+       else
+               {
+               /* This is the comparison for all other parameters.
+                * dupe_array_check() processes the entire list in one go
+               */
+               dw->working = NULL;
+               dupe_window_update_progress(dw, _("Comparing..."), 0.0, FALSE);
+               dupe_array_check(dw);
+               }
 
        return TRUE;
 }
@@ -1719,12 +2599,25 @@ static void dupe_check_start(DupeWindow *dw)
 
        dupe_window_update_count(dw, TRUE);
        widget_set_cursor(dw->listview, GDK_WATCH);
+       dw->queue_count = 0;
+       dw->thread_count = 0;
+       dw->search_matches_sorted = NULL;
+       dw->abort = FALSE;
 
        if (dw->idle_id) return;
 
        dw->idle_id = g_idle_add(dupe_check_cb, dw);
 }
 
+static gboolean dupe_check_start_cb(gpointer data)
+{
+       DupeWindow *dw = data;
+
+       dupe_check_start(dw);
+
+       return FALSE;
+}
+
 /*
  * ------------------------------------------------------------------
  * Item addition, removal
@@ -1829,6 +2722,92 @@ static gboolean dupe_item_remove_by_path(DupeWindow *dw, const gchar *path)
        return TRUE;
 }
 
+static gboolean dupe_files_add_queue_cb(gpointer data)
+{
+       DupeItem *di = NULL;
+       DupeWindow *dw = data;
+       FileData *fd;
+       GList *queue = dw->add_files_queue;
+
+       gtk_progress_bar_pulse(GTK_PROGRESS_BAR(dw->extra_label));
+
+       if (queue == NULL)
+               {
+               dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
+               g_idle_add(dupe_check_start_cb, dw);
+               gtk_widget_set_sensitive(dw->controls_box, TRUE);
+               return FALSE;
+               }
+
+       fd = queue->data;
+       if (fd)
+               {
+               if (isfile(fd->path))
+                       {
+                       di = dupe_item_new(fd);
+                       }
+               else if (isdir(fd->path))
+                       {
+                       GList *f, *d;
+                       dw->add_files_queue = g_list_remove(dw->add_files_queue, g_list_first(dw->add_files_queue)->data);
+
+                       if (filelist_read(fd, &f, &d))
+                               {
+                               f = filelist_filter(f, FALSE);
+                               d = filelist_filter(d, TRUE);
+
+                               dw->add_files_queue = g_list_concat(f, dw->add_files_queue);
+                               dw->add_files_queue = g_list_concat(d, dw->add_files_queue);
+                               }
+                       }
+               else
+                       {
+                       /* Not a file and not a dir */
+                       dw->add_files_queue = g_list_remove(dw->add_files_queue, g_list_first(dw->add_files_queue)->data);
+                       }
+               }
+
+       if (!di)
+               {
+               /* A dir was found. Process the contents on next entry */
+               return TRUE;
+               }
+
+       dw->add_files_queue = g_list_remove(dw->add_files_queue, g_list_first(dw->add_files_queue)->data);
+
+       dupe_item_read_cache(di);
+
+       /* Ensure images in the lists have unique FileDatas */
+       if (!dupe_insert_in_list_cache(dw, di->fd))
+               {
+               dupe_item_free(di);
+               return TRUE;
+               }
+
+       if (dw->second_drop)
+               {
+               dupe_second_add(dw, di);
+               }
+       else
+               {
+               dw->list = g_list_prepend(dw->list, di);
+               }
+
+       if (dw->add_files_queue != NULL)
+               {
+               return TRUE;
+               }
+       else
+               {
+               dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
+               g_idle_add(dupe_check_start_cb, dw);
+               gtk_widget_set_sensitive(dw->controls_box, TRUE);
+               return FALSE;
+               }
+}
+
 static void dupe_files_add(DupeWindow *dw, CollectionData *collection, CollectInfo *info,
                           FileData *fd, gboolean recurse)
 {
@@ -1840,7 +2819,7 @@ static void dupe_files_add(DupeWindow *dw, CollectionData *collection, CollectIn
                }
        else if (fd)
                {
-               if (isfile(fd->path))
+               if (isfile(fd->path) && !g_file_test(fd->path, G_FILE_TEST_IS_SYMLINK))
                        {
                        di = dupe_item_new(fd);
                        }
@@ -1920,6 +2899,51 @@ static void dupe_files_add(DupeWindow *dw, CollectionData *collection, CollectIn
                }
 }
 
+static void dupe_init_list_cache(DupeWindow *dw)
+{
+       dw->list_cache = g_hash_table_new(g_direct_hash, g_direct_equal);
+       dw->second_list_cache = g_hash_table_new(g_direct_hash, g_direct_equal);
+
+       for (GList *i = dw->list; i != NULL; i = i->next)
+               {
+                       DupeItem *di = i->data;
+
+                       g_hash_table_add(dw->list_cache, di->fd);
+               }
+
+       for (GList *i = dw->second_list; i != NULL; i = i->next)
+               {
+                       DupeItem *di = i->data;
+
+                       g_hash_table_add(dw->second_list_cache, di->fd);
+               }
+}
+
+static void dupe_destroy_list_cache(DupeWindow *dw)
+{
+       g_hash_table_destroy(dw->list_cache);
+       g_hash_table_destroy(dw->second_list_cache);
+}
+
+/**
+ * @brief Return true if the fd was not in the cache
+ * @param dw 
+ * @param fd 
+ * @returns 
+ * 
+ * 
+ */
+static gboolean dupe_insert_in_list_cache(DupeWindow *dw, FileData *fd)
+{
+       GHashTable *table =
+               dw->second_drop ? dw->second_list_cache : dw->list_cache;
+       /* We do this as a lookup + add as we don't want to overwrite
+          items as that would leak the old value. */
+       if (g_hash_table_lookup(table, fd) != NULL)
+               return FALSE;
+       return g_hash_table_add(table, fd);
+}
+
 void dupe_window_add_collection(DupeWindow *dw, CollectionData *collection)
 {
        CollectInfo *info;
@@ -1943,11 +2967,40 @@ void dupe_window_add_files(DupeWindow *dw, GList *list, gboolean recurse)
                {
                FileData *fd = work->data;
                work = work->next;
+               if (isdir(fd->path) && !recurse)
+                       {
+                       GList *f, *d;
+
+                       if (filelist_read(fd, &f, &d))
+                               {
+                               GList *work_file;
+                               work_file = f;
 
-               dupe_files_add(dw, NULL, NULL, fd, recurse);
+                               while (work_file)
+                                       {
+                                       /* Add only the files, ignore the dirs when no recurse */
+                                       dw->add_files_queue = g_list_prepend(dw->add_files_queue, work_file->data);
+                                       work_file = work_file->next;
+                                       }
+                               g_list_free(f);
+                               g_list_free(d);
+                               }
+                       }
+               else
+                       {
+                       dw->add_files_queue = g_list_prepend(dw->add_files_queue, fd);
+                       }
                }
+       if (dw->add_files_queue_id == 0)
+               {
+               gtk_progress_bar_pulse(GTK_PROGRESS_BAR(dw->extra_label));
+               gtk_progress_bar_set_pulse_step(GTK_PROGRESS_BAR(dw->extra_label), DUPE_PROGRESS_PULSE_STEP);
+               gtk_progress_bar_set_text(GTK_PROGRESS_BAR(dw->extra_label), _("Loading file list"));
 
-       dupe_check_start(dw);
+               dupe_init_list_cache(dw);
+               dw->add_files_queue_id = g_idle_add(dupe_files_add_queue_cb, dw);
+               gtk_widget_set_sensitive(dw->controls_box, FALSE);
+               }
 }
 
 static void dupe_item_update(DupeWindow *dw, DupeItem *di)
@@ -2178,7 +3231,7 @@ static void dupe_window_remove_selection(DupeWindow *dw, GtkWidget *listview)
                if (di) list = g_list_prepend(list, di);
                work = work->next;
                }
-       g_list_foreach(slist, (GFunc)gtk_tree_path_free, NULL);
+       g_list_foreach(slist, (GFunc)tree_path_free_wrapper, NULL);
        g_list_free(slist);
 
        dw->color_frozen = TRUE;
@@ -2330,7 +3383,7 @@ static void dupe_menu_delete_cb(GtkWidget *widget, gpointer data)
        DupeWindow *dw = data;
 
        options->file_ops.safe_delete_enable = FALSE;
-       file_util_delete(NULL, dupe_listview_get_selection(dw, dw->listview), dw->window);
+       file_util_delete_notify_done(NULL, dupe_listview_get_selection(dw, dw->listview), dw->window, delete_finished_cb, dw);
 }
 
 static void dupe_menu_move_to_trash_cb(GtkWidget *widget, gpointer data)
@@ -2338,7 +3391,7 @@ static void dupe_menu_move_to_trash_cb(GtkWidget *widget, gpointer data)
        DupeWindow *dw = data;
 
        options->file_ops.safe_delete_enable = TRUE;
-       file_util_delete(NULL, dupe_listview_get_selection(dw, dw->listview), dw->window);
+       file_util_delete_notify_done(NULL, dupe_listview_get_selection(dw, dw->listview), dw->window, delete_finished_cb, dw);
 }
 
 static void dupe_menu_copy_path_cb(GtkWidget *widget, gpointer data)
@@ -2880,9 +3933,9 @@ static void dupe_menu_setup(DupeWindow *dw)
        dupe_menu_add_item(store, _("Dimensions"), DUPE_MATCH_DIM, dw);
        dupe_menu_add_item(store, _("Checksum"), DUPE_MATCH_SUM, dw);
        dupe_menu_add_item(store, _("Path"), DUPE_MATCH_PATH, dw);
-       dupe_menu_add_item(store, _("Similarity (high)"), DUPE_MATCH_SIM_HIGH, dw);
-       dupe_menu_add_item(store, _("Similarity"), DUPE_MATCH_SIM_MED, dw);
-       dupe_menu_add_item(store, _("Similarity (low)"), DUPE_MATCH_SIM_LOW, dw);
+       dupe_menu_add_item(store, _("Similarity (high - 95)"), DUPE_MATCH_SIM_HIGH, dw);
+       dupe_menu_add_item(store, _("Similarity (med. - 90)"), DUPE_MATCH_SIM_MED, dw);
+       dupe_menu_add_item(store, _("Similarity (low - 85)"), DUPE_MATCH_SIM_LOW, dw);
        dupe_menu_add_item(store, _("Similarity (custom)"), DUPE_MATCH_SIM_CUSTOM, dw);
        dupe_menu_add_item(store, _("Name ≠ content"), DUPE_MATCH_NAME_CONTENT, dw);
        dupe_menu_add_item(store, _("Name case-insensitive ≠ content"), DUPE_MATCH_NAME_CI_CONTENT, dw);
@@ -3161,7 +4214,7 @@ static gboolean dupe_window_keypress_cb(GtkWidget *widget, GdkEventKey *event, g
                gtk_tree_model_get_iter(store, &iter, tpath);
                gtk_tree_model_get(store, &iter, DUPE_COLUMN_POINTER, &di, -1);
                }
-       g_list_foreach(slist, (GFunc)gtk_tree_path_free, NULL);
+       g_list_foreach(slist, (GFunc)tree_path_free_wrapper, NULL);
        g_list_free(slist);
 
        if (event->state & GDK_CONTROL_MASK)
@@ -3365,6 +4418,8 @@ void dupe_window_close(DupeWindow *dw)
 
        file_data_unregister_notify_func(dupe_notify_cb, dw);
 
+       g_thread_pool_free(dw->dupe_comparison_thread_pool, TRUE, TRUE);
+
        g_free(dw);
 }
 
@@ -3501,6 +4556,7 @@ DupeWindow *dupe_window_new()
        GtkWidget *scrolled;
        GtkWidget *frame;
        GtkWidget *status_box;
+       GtkWidget *controls_box;
        GtkWidget *button_box;
        GtkWidget *label;
        GtkWidget *button;
@@ -3512,6 +4568,8 @@ DupeWindow *dupe_window_new()
        layout_valid(&lw);
 
        dw = g_new0(DupeWindow, 1);
+       dw->add_files_queue = NULL;
+       dw->add_files_queue_id = 0;
 
        dw->match_mask = DUPE_MATCH_NAME;
        if (options->duplicates_match == DUPE_MATCH_NAME) dw->match_mask = DUPE_MATCH_NAME;
@@ -3671,54 +4729,55 @@ DupeWindow *dupe_window_new()
        gtk_box_pack_start(GTK_BOX(status_box), dw->extra_label, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(dw->extra_label);
 
-       status_box = pref_box_new(vbox, FALSE, GTK_ORIENTATION_HORIZONTAL, 0);
+       controls_box = pref_box_new(vbox, FALSE, GTK_ORIENTATION_HORIZONTAL, 0);
+       dw->controls_box = controls_box;
 
        dw->button_thumbs = gtk_check_button_new_with_label(_("Thumbnails"));
        dw->show_thumbs = options->duplicates_thumbnails;
        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(dw->button_thumbs), dw->show_thumbs);
        g_signal_connect(G_OBJECT(dw->button_thumbs), "toggled",
                         G_CALLBACK(dupe_window_show_thumb_cb), dw);
-       gtk_box_pack_start(GTK_BOX(status_box), dw->button_thumbs, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_box_pack_start(GTK_BOX(controls_box), dw->button_thumbs, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(dw->button_thumbs);
 
        label = gtk_label_new(_("Compare by:"));
-       gtk_box_pack_start(GTK_BOX(status_box), label, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_box_pack_start(GTK_BOX(controls_box), label, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(label);
 
        dupe_menu_setup(dw);
-       gtk_box_pack_start(GTK_BOX(status_box), dw->combo, FALSE, FALSE, 0);
+       gtk_box_pack_start(GTK_BOX(controls_box), dw->combo, FALSE, FALSE, 0);
        gtk_widget_show(dw->combo);
 
        label = gtk_label_new(_("Custom Threshold"));
-       gtk_box_pack_start(GTK_BOX(status_box), label, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_box_pack_start(GTK_BOX(controls_box), label, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(label);
        dw->custom_threshold = gtk_spin_button_new_with_range(1, 100, 1);
-       gtk_widget_set_tooltip_text(GTK_WIDGET(dw->custom_threshold), "Custom similarity threshold");
+       gtk_widget_set_tooltip_text(GTK_WIDGET(dw->custom_threshold), "Custom similarity threshold\n(Use tab key to set value)");
        gtk_spin_button_set_value(GTK_SPIN_BUTTON(dw->custom_threshold), options->duplicates_similarity_threshold);
        g_signal_connect(G_OBJECT(dw->custom_threshold), "value_changed", G_CALLBACK(dupe_window_custom_threshold_cb), dw);
-       gtk_box_pack_start(GTK_BOX(status_box), dw->custom_threshold, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_box_pack_start(GTK_BOX(controls_box), dw->custom_threshold, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(dw->custom_threshold);
 
        button = gtk_check_button_new_with_label(_("Sort"));
        gtk_widget_set_tooltip_text(GTK_WIDGET(button), "Sort by group totals");
        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(button), options->sort_totals);
        g_signal_connect(G_OBJECT(button), "toggled", G_CALLBACK(dupe_sort_totals_toggle_cb), dw);
-       gtk_box_pack_start(GTK_BOX(status_box), button, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_box_pack_start(GTK_BOX(controls_box), button, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(button);
 
-       dw->button_rotation_invariant = gtk_check_button_new_with_label(_("Ignore Rotation"));
+       dw->button_rotation_invariant = gtk_check_button_new_with_label(_("Ignore Orientation"));
        gtk_widget_set_tooltip_text(GTK_WIDGET(dw->button_rotation_invariant), "Ignore image orientation");
        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(dw->button_rotation_invariant), options->rot_invariant_sim);
        g_signal_connect(G_OBJECT(dw->button_rotation_invariant), "toggled",
                         G_CALLBACK(dupe_window_rotation_invariant_cb), dw);
-       gtk_box_pack_start(GTK_BOX(status_box), dw->button_rotation_invariant, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_box_pack_start(GTK_BOX(controls_box), dw->button_rotation_invariant, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(dw->button_rotation_invariant);
 
        button = gtk_check_button_new_with_label(_("Compare two file sets"));
        gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(button), dw->second_set);
        g_signal_connect(G_OBJECT(button), "toggled",
                         G_CALLBACK(dupe_second_set_toggle_cb), dw);
-       gtk_box_pack_start(GTK_BOX(status_box), button, FALSE, FALSE, PREF_PAD_SPACE);
+       gtk_box_pack_start(GTK_BOX(controls_box), button, FALSE, FALSE, PREF_PAD_SPACE);
        gtk_widget_show(button);
 
        button_box = gtk_hbox_new(FALSE, 0);
@@ -3772,6 +4831,10 @@ DupeWindow *dupe_window_new()
 
        file_data_register_notify_func(dupe_notify_cb, dw, NOTIFY_PRIORITY_MEDIUM);
 
+       g_mutex_init(&dw->thread_count_mutex);
+       g_mutex_init(&dw->search_matches_mutex);
+       dw->dupe_comparison_thread_pool = g_thread_pool_new(dupe_comparison_func, dw, -1, FALSE, NULL);
+
        return dw;
 }
 
@@ -3910,6 +4973,13 @@ static void dupe_dnd_data_get(GtkWidget *widget, GdkDragContext *context,
        GList *list = NULL;
        GList *work;
 
+       if (dw->add_files_queue_id > 0)
+               {
+               warning_dialog(_("Find duplicates"), _("Please wait for the current file selection to be loaded."), GTK_STOCK_DIALOG_INFO, dw->window);
+
+               return;
+               }
+
        source = gtk_drag_get_source_widget(context);
        if (source == dw->listview || source == dw->second_listview) return;
 
@@ -4065,7 +5135,7 @@ static void dupe_notify_cb(FileData *fd, NotifyType type, gpointer data)
                case FILEDATA_CHANGE_COPY:
                        break;
                case FILEDATA_CHANGE_DELETE:
-                       while (dupe_item_remove_by_path(dw, fd->path));
+                       /* Update the UI only once, after the operation finishes */
                        break;
                case FILEDATA_CHANGE_UNSPECIFIED:
                case FILEDATA_CHANGE_WRITE_METADATA:
@@ -4074,6 +5144,29 @@ static void dupe_notify_cb(FileData *fd, NotifyType type, gpointer data)
 
 }
 
+/**
+ * @brief Refresh window after a file delete operation
+ * @param success (ud->phase != UTILITY_PHASE_CANCEL) #file_util_dialog_run
+ * @param dest_path Not used
+ * @param data #DupeWindow
+ * 
+ * If the window is refreshed after each file of a large set is deleted,
+ * the UI slows to an unacceptable level. The #FileUtilDoneFunc is used
+ * to call this function once, when the entire delete operation is completed.
+ */
+static void delete_finished_cb(gboolean success, const gchar *dest_path, gpointer data)
+{
+       DupeWindow *dw = data;
+       GList *work;
+
+       if (!success)
+               {
+               return;
+               }
+
+       dupe_window_remove_selection(dw, dw->listview);
+}
+
 /*
  *-------------------------------------------------------------------
  * Export duplicates data