dupe: Eliminate O(n^2) code in dupe_files_add_queue_cb()
authorFrej Drejhammar <frej.drejhammar@gmail.com>
Fri, 12 Mar 2021 20:02:16 +0000 (21:02 +0100)
committerFrej Drejhammar <frej.drejhammar@gmail.com>
Sat, 27 Mar 2021 12:52:38 +0000 (13:52 +0100)
Geeqie is, compared to its ancestor Gqview, embarrassingly slow while
loading a set of files to look for duplicates (most noticeable when
looking for duplicate names). Profiling shows that a lot of time is
spent in dupe_files_add_queue_cb(). The culprit turns out to be two
loops, which for each newly loaded file, traverses the list of already
loaded images. As this gives a time complexity of O(n^2) where n is
the number of images, loading times become very large when the number
of files increases.

This patch adds a GHashTable-based cache which is created when a
loading operation starts and destroyed when it completes. The cache is
initialized with the loaded images on creation (an O(n)
operation). Using the cache, checking for duplicates in the lists can
be done in constant time, thus making the loading time linear in the
number of images loaded. With this change Geeqie now matches the
performance of Gqview.

As a side effect, this patch also fixes a memory leak when an attempt
is made to re-add an already added file, as the unmodified version
never freed the DupeItem allocated for the twice added file.

src/dupe.c
src/dupe.h

index e2c5057..95db214 100644 (file)
@@ -107,6 +107,11 @@ static void dupe_notify_cb(FileData *fd, NotifyType type, gpointer data);
 
 static GtkWidget *submenu_add_export(GtkWidget *menu, GtkWidget **menu_item, GCallback func, gpointer data);
 static void dupe_pop_menu_export_cb(GtkWidget *widget, gpointer data);
+
+static void dupe_init_list_cache(DupeWindow *dw);
+static void dupe_destroy_list_cache(DupeWindow *dw);
+static gboolean dupe_insert_in_list_cache(DupeWindow *dw, FileData *fd);
+
 /*
  * ------------------------------------------------------------------
  * Window updates
@@ -1893,6 +1898,7 @@ static void dupe_check_stop(DupeWindow *dw)
                {
                g_source_remove(dw->add_files_queue_id);
                dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
                gtk_widget_set_sensitive(dw->controls_box, TRUE);
                if (g_list_length(dw->add_files_queue) > 0)
                        {
@@ -2356,6 +2362,7 @@ static gboolean dupe_files_add_queue_cb(gpointer data)
        if (queue == NULL)
                {
                dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
                g_idle_add(dupe_check_start_cb, dw);
                gtk_widget_set_sensitive(dw->controls_box, TRUE);
                return FALSE;
@@ -2400,37 +2407,10 @@ static gboolean dupe_files_add_queue_cb(gpointer data)
        dupe_item_read_cache(di);
 
        /* Ensure images in the lists have unique FileDatas */
-       GList *work;
-       DupeItem *di_list;
-       work = g_list_first(dw->list);
-       while (work)
-               {
-               di_list = work->data;
-               if (di_list->fd == di->fd)
-                       {
-                       return TRUE;
-                       }
-               else
-                       {
-                       work = work->next;
-                       }
-               }
-
-       if (dw->second_list)
+       if (!dupe_insert_in_list_cache(dw, di->fd))
                {
-               work = g_list_first(dw->second_list);
-               while (work)
-                       {
-                       di_list = work->data;
-                       if (di_list->fd == di->fd)
-                               {
-                               return TRUE;
-                               }
-                       else
-                               {
-                               work = work->next;
-                               }
-                       }
+               dupe_item_free(di);
+               return TRUE;
                }
 
        if (dw->second_drop)
@@ -2449,6 +2429,7 @@ static gboolean dupe_files_add_queue_cb(gpointer data)
        else
                {
                dw->add_files_queue_id = 0;
+               dupe_destroy_list_cache(dw);
                g_idle_add(dupe_check_start_cb, dw);
                gtk_widget_set_sensitive(dw->controls_box, TRUE);
                return FALSE;
@@ -2546,6 +2527,44 @@ static void dupe_files_add(DupeWindow *dw, CollectionData *collection, CollectIn
                }
 }
 
+static void dupe_init_list_cache(DupeWindow *dw)
+{
+       dw->list_cache = g_hash_table_new(g_direct_hash, g_direct_equal);
+       dw->second_list_cache = g_hash_table_new(g_direct_hash, g_direct_equal);
+
+       for (GList *i = dw->list; i != NULL; i = i->next)
+               {
+                       DupeItem *di = i->data;
+
+                       g_hash_table_add(dw->list_cache, di->fd);
+               }
+
+       for (GList *i = dw->second_list; i != NULL; i = i->next)
+               {
+                       DupeItem *di = i->data;
+
+                       g_hash_table_add(dw->second_list_cache, di->fd);
+               }
+}
+
+static void dupe_destroy_list_cache(DupeWindow *dw)
+{
+       g_hash_table_destroy(dw->list_cache);
+       g_hash_table_destroy(dw->second_list_cache);
+}
+
+/* Return true if the fd was not in the cache */
+static gboolean dupe_insert_in_list_cache(DupeWindow *dw, FileData *fd)
+{
+       GHashTable *table =
+               dw->second_drop ? dw->second_list_cache : dw->list_cache;
+       /* We do this as a lookup + add as we don't want to overwrite
+          items as that would leak the old value. */
+       if (g_hash_table_lookup(table, fd) != NULL)
+               return TRUE;
+       return g_hash_table_add(table, fd);
+}
+
 void dupe_window_add_collection(DupeWindow *dw, CollectionData *collection)
 {
        CollectInfo *info;
@@ -2599,6 +2618,7 @@ void dupe_window_add_files(DupeWindow *dw, GList *list, gboolean recurse)
                gtk_progress_bar_set_pulse_step(GTK_PROGRESS_BAR(dw->extra_label), DUPE_PROGRESS_PULSE_STEP);
                gtk_progress_bar_set_text(GTK_PROGRESS_BAR(dw->extra_label), _("Loading file list"));
 
+               dupe_init_list_cache(dw);
                dw->add_files_queue_id = g_idle_add(dupe_files_add_queue_cb, dw);
                gtk_widget_set_sensitive(dw->controls_box, FALSE);
                }
index 9744e1b..eaacd67 100644 (file)
@@ -100,6 +100,8 @@ struct _DupeWindow
        GtkWidget *custom_threshold;
        GList *add_files_queue;
        guint add_files_queue_id;
+       GHashTable *list_cache; /* Caches the DupeItems of all items in list */
+       GHashTable *second_list_cache; /* Caches the DupeItems of all items in second_list */
        GtkWidget *controls_box;
 
        gboolean show_thumbs;