Fix #880: Optimize file loading when looking for duplicates
authorFrej Drejhammar <frej.drejhammar@gmail.com>
Sun, 28 Mar 2021 11:05:15 +0000 (12:05 +0100)
committerColin Clark <colin.clark@cclark.uk>
Sun, 28 Mar 2021 11:05:15 +0000 (12:05 +0100)
https://github.com/BestImageViewer/geeqie/pull/880

src/dupe.c
src/dupe.h

index 09dd34b..95db214 100644 (file)
@@ -107,6 +107,11 @@ static void dupe_notify_cb(FileData *fd, NotifyType type, gpointer data);
 
 static GtkWidget *submenu_add_export(GtkWidget *menu, GtkWidget **menu_item, GCallback func, gpointer data);
 static void dupe_pop_menu_export_cb(GtkWidget *widget, gpointer data);
+
+static void dupe_init_list_cache(DupeWindow *dw);
+static void dupe_destroy_list_cache(DupeWindow *dw);
+static gboolean dupe_insert_in_list_cache(DupeWindow *dw, FileData *fd);
+
 /*
  * ------------------------------------------------------------------
  * Window updates
@@ -1893,6 +1898,7 @@ static void dupe_check_stop(DupeWindow *dw)
                {
                g_source_remove(dw->add_files_queue_id);
                dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
                gtk_widget_set_sensitive(dw->controls_box, TRUE);
                if (g_list_length(dw->add_files_queue) > 0)
                        {
@@ -2353,9 +2359,10 @@ static gboolean dupe_files_add_queue_cb(gpointer data)
 
        gtk_progress_bar_pulse(GTK_PROGRESS_BAR(dw->extra_label));
 
-       if (g_list_length(queue) == 0)
+       if (queue == NULL)
                {
                dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
                g_idle_add(dupe_check_start_cb, dw);
                gtk_widget_set_sensitive(dw->controls_box, TRUE);
                return FALSE;
@@ -2375,25 +2382,11 @@ static gboolean dupe_files_add_queue_cb(gpointer data)
 
                        if (filelist_read(fd, &f, &d))
                                {
-                               GList *work;
-
                                f = filelist_filter(f, FALSE);
                                d = filelist_filter(d, TRUE);
 
-                               work = f;
-                               while (work)
-                                       {
-                                       dw->add_files_queue = g_list_prepend(dw->add_files_queue, work->data);
-                                       work = work->next;
-                                       }
-                               g_list_free(f);
-                               work = d;
-                               while (work)
-                                       {
-                                       dw->add_files_queue = g_list_prepend(dw->add_files_queue, work->data);
-                                       work = work->next;
-                                       }
-                               g_list_free(d);
+                               dw->add_files_queue = g_list_concat(f, dw->add_files_queue);
+                               dw->add_files_queue = g_list_concat(d, dw->add_files_queue);
                                }
                        }
                else
@@ -2414,37 +2407,10 @@ static gboolean dupe_files_add_queue_cb(gpointer data)
        dupe_item_read_cache(di);
 
        /* Ensure images in the lists have unique FileDatas */
-       GList *work;
-       DupeItem *di_list;
-       work = g_list_first(dw->list);
-       while (work)
-               {
-               di_list = work->data;
-               if (di_list->fd == di->fd)
-                       {
-                       return TRUE;
-                       }
-               else
-                       {
-                       work = work->next;
-                       }
-               }
-
-       if (dw->second_list)
+       if (!dupe_insert_in_list_cache(dw, di->fd))
                {
-               work = g_list_first(dw->second_list);
-               while (work)
-                       {
-                       di_list = work->data;
-                       if (di_list->fd == di->fd)
-                               {
-                               return TRUE;
-                               }
-                       else
-                               {
-                               work = work->next;
-                               }
-                       }
+               dupe_item_free(di);
+               return TRUE;
                }
 
        if (dw->second_drop)
@@ -2456,13 +2422,14 @@ static gboolean dupe_files_add_queue_cb(gpointer data)
                dw->list = g_list_prepend(dw->list, di);
                }
 
-       if (g_list_length(dw->add_files_queue) > 0)
+       if (dw->add_files_queue != NULL)
                {
                return TRUE;
                }
        else
                {
                dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
                g_idle_add(dupe_check_start_cb, dw);
                gtk_widget_set_sensitive(dw->controls_box, TRUE);
                return FALSE;
@@ -2560,6 +2527,44 @@ static void dupe_files_add(DupeWindow *dw, CollectionData *collection, CollectIn
                }
 }
 
+static void dupe_init_list_cache(DupeWindow *dw)
+{
+       dw->list_cache = g_hash_table_new(g_direct_hash, g_direct_equal);
+       dw->second_list_cache = g_hash_table_new(g_direct_hash, g_direct_equal);
+
+       for (GList *i = dw->list; i != NULL; i = i->next)
+               {
+                       DupeItem *di = i->data;
+
+                       g_hash_table_add(dw->list_cache, di->fd);
+               }
+
+       for (GList *i = dw->second_list; i != NULL; i = i->next)
+               {
+                       DupeItem *di = i->data;
+
+                       g_hash_table_add(dw->second_list_cache, di->fd);
+               }
+}
+
+static void dupe_destroy_list_cache(DupeWindow *dw)
+{
+       g_hash_table_destroy(dw->list_cache);
+       g_hash_table_destroy(dw->second_list_cache);
+}
+
+/* Return true if the fd was not in the cache */
+static gboolean dupe_insert_in_list_cache(DupeWindow *dw, FileData *fd)
+{
+       GHashTable *table =
+               dw->second_drop ? dw->second_list_cache : dw->list_cache;
+       /* We do this as a lookup + add as we don't want to overwrite
+          items as that would leak the old value. */
+       if (g_hash_table_lookup(table, fd) != NULL)
+               return TRUE;
+       return g_hash_table_add(table, fd);
+}
+
 void dupe_window_add_collection(DupeWindow *dw, CollectionData *collection)
 {
        CollectInfo *info;
@@ -2613,6 +2618,7 @@ void dupe_window_add_files(DupeWindow *dw, GList *list, gboolean recurse)
                gtk_progress_bar_set_pulse_step(GTK_PROGRESS_BAR(dw->extra_label), DUPE_PROGRESS_PULSE_STEP);
                gtk_progress_bar_set_text(GTK_PROGRESS_BAR(dw->extra_label), _("Loading file list"));
 
+               dupe_init_list_cache(dw);
                dw->add_files_queue_id = g_idle_add(dupe_files_add_queue_cb, dw);
                gtk_widget_set_sensitive(dw->controls_box, FALSE);
                }
index 9744e1b..eaacd67 100644 (file)
@@ -100,6 +100,8 @@ struct _DupeWindow
        GtkWidget *custom_threshold;
        GList *add_files_queue;
        guint add_files_queue_id;
+       GHashTable *list_cache; /* Caches the DupeItems of all items in list */
+       GHashTable *second_list_cache; /* Caches the DupeItems of all items in second_list */
        GtkWidget *controls_box;
 
        gboolean show_thumbs;