]> git.0d.be Git - empathy.git/commitdiff
Use GRegex instead of custom code and use a new regex to detect URIs
authorXavier Claessens <xclaesse@src.gnome.org>
Mon, 1 Dec 2008 10:21:55 +0000 (10:21 +0000)
committerXavier Claessens <xclaesse@src.gnome.org>
Mon, 1 Dec 2008 10:21:55 +0000 (10:21 +0000)
svn path=/trunk/; revision=1932

libempathy-gtk/empathy-theme.c
libempathy/empathy-utils.c
libempathy/empathy-utils.h

index e2da4e494223f03ee66f173fb644fca81e3f80b0..d68b72f0233e9df42705c97d164308eba907d1ba 100644 (file)
 /* Number of seconds between timestamps when using normal mode, 5 minutes. */
 #define TIMESTAMP_INTERVAL 300
 
+#define SHEMES "(https?|ftps?|nntp|news|javascript|about|ghelp|apt|telnet|"\
+              "file|webcal|mailto)"
+#define SEPARATOR "([^,;\?><()\\ ])"
+#define BODY "([^\\ ]*(\\\\ )?)+"
+#define URI_REGEX "("SHEMES"://"BODY SEPARATOR")" \
+                 "|((mailto:)?"BODY"@"BODY"."BODY SEPARATOR")"\
+                 "|((www|ftp)."BODY SEPARATOR")"
+static GRegex *uri_regex = NULL;
+
 #define GET_PRIV(obj) EMPATHY_GET_PRIV (obj, EmpathyTheme)
+
 typedef struct {
        EmpathySmileyManager *smiley_manager;
        gboolean show_avatars;
@@ -254,8 +264,11 @@ empathy_theme_append_text (EmpathyTheme        *theme,
        GtkTextIter      start_iter, end_iter;
        GtkTextMark     *mark;
        GtkTextIter      iter;
-       gint             num_matches, i;
-       GArray          *start, *end;
+       GMatchInfo      *match_info;
+       gboolean         match;
+       gint             last = 0;
+       gint             s = 0, e = 0;
+       gchar           *tmp;
 
        priv = GET_PRIV (theme);
        buffer = gtk_text_view_get_buffer (GTK_TEXT_VIEW (view));
@@ -263,57 +276,17 @@ empathy_theme_append_text (EmpathyTheme        *theme,
        gtk_text_buffer_get_end_iter (buffer, &start_iter);
        mark = gtk_text_buffer_create_mark (buffer, NULL, &start_iter, TRUE);
 
-       start = g_array_new (FALSE, FALSE, sizeof (gint));
-       end = g_array_new (FALSE, FALSE, sizeof (gint));
-
-       num_matches = empathy_regex_match (EMPATHY_REGEX_ALL, body, start, end);
-
-       if (num_matches == 0) {
-               gtk_text_buffer_get_end_iter (buffer, &iter);
-               theme_insert_text_with_emoticons (buffer, &iter, body, priv->smiley_manager);
-       } else {
-               gint   last = 0;
-               gint   s = 0, e = 0;
-               gchar *tmp;
-
-               for (i = 0; i < num_matches; i++) {
-                       s = g_array_index (start, gint, i);
-                       e = g_array_index (end, gint, i);
-
-                       if (s > last) {
-                               tmp = empathy_substring (body, last, s);
-
-                               gtk_text_buffer_get_end_iter (buffer, &iter);
-                               theme_insert_text_with_emoticons (buffer,
-                                                                 &iter,
-                                                                 tmp,
-                                                                 priv->smiley_manager);
-                               g_free (tmp);
-                       }
-
-                       tmp = empathy_substring (body, s, e);
-
-                       gtk_text_buffer_get_end_iter (buffer, &iter);
-                       if (!link_tag) {
-                               gtk_text_buffer_insert (buffer, &iter,
-                                                       tmp, -1);
-                       } {
-                               gtk_text_buffer_insert_with_tags_by_name (buffer,
-                                                                         &iter,
-                                                                         tmp,
-                                                                         -1,
-                                                                         link_tag,
-                                                                         "link",
-                                                                         NULL);
-                       }
-
-                       g_free (tmp);
+       if (!uri_regex) {
+               uri_regex = g_regex_new (URI_REGEX, 0, 0, NULL);
+       }
 
-                       last = e;
-               }
+       for (match = g_regex_match (uri_regex, body, 0, &match_info); match;
+            match = g_match_info_next (match_info, NULL)) {
+               if (!g_match_info_fetch_pos (match_info, 0, &s, &e))
+                       continue;
 
-               if (e < strlen (body)) {
-                       tmp = empathy_substring (body, e, strlen (body));
+               if (s > last) {
+                       tmp = empathy_substring (body, last, s);
 
                        gtk_text_buffer_get_end_iter (buffer, &iter);
                        theme_insert_text_with_emoticons (buffer,
@@ -322,10 +295,35 @@ empathy_theme_append_text (EmpathyTheme        *theme,
                                                          priv->smiley_manager);
                        g_free (tmp);
                }
+
+               tmp = empathy_substring (body, s, e);
+
+               gtk_text_buffer_get_end_iter (buffer, &iter);
+               if (!link_tag) {
+                       gtk_text_buffer_insert (buffer, &iter,
+                                               tmp, -1);
+               } else {
+                       gtk_text_buffer_insert_with_tags_by_name (buffer,
+                                                                 &iter,
+                                                                 tmp,
+                                                                 -1,
+                                                                 link_tag,
+                                                                 "link",
+                                                                 NULL);
+               }
+
+               g_free (tmp);
+               last = e;
        }
+       g_match_info_free (match_info);
 
-       g_array_free (start, TRUE);
-       g_array_free (end, TRUE);
+       if (last < strlen (body)) {
+               gtk_text_buffer_get_end_iter (buffer, &iter);
+               theme_insert_text_with_emoticons (buffer,
+                                                 &iter,
+                                                 body + last,
+                                                 priv->smiley_manager);
+       }
 
        gtk_text_buffer_get_end_iter (buffer, &iter);
        gtk_text_buffer_insert (buffer, &iter, "\n", 1);
index b5bdb9ff71f9bde377d3f1bdfb8cc28628c65674..671595e68b55cbf1be3a200d80bed8c610cf5517 100644 (file)
@@ -28,7 +28,6 @@
 #include <string.h>
 #include <time.h>
 #include <sys/types.h>
-#include <regex.h>
 
 #include <glib/gi18n.h>
 
@@ -44,8 +43,6 @@
 #define DEBUG_FLAG EMPATHY_DEBUG_OTHER
 #include "empathy-debug.h"
 
-static void regex_init (void);
-
 gchar *
 empathy_substring (const gchar *str,
                  gint         start,
@@ -54,129 +51,6 @@ empathy_substring (const gchar *str,
        return g_strndup (str + start, end - start);
 }
 
-/*
- * Regular Expression code to match urls.
- */
-#define APTCHARS  "-A-Za-z0-9,-."
-#define USERCHARS "-A-Za-z0-9"
-#define PASSCHARS "-A-Za-z0-9,?;.:/!%$^*&~\"#'"
-#define HOSTCHARS "-A-Za-z0-9_"
-#define PATHCHARS "-A-Za-z0-9_$.+!*(),;:@&=?/~#%"
-#define SCHEME    "(news:|telnet:|nntp:|file:/|https?:|ftps?:|webcal:)"
-#define USER      "[" USERCHARS "]+(:["PASSCHARS "]+)?"
-#define URLPATH   "/[" PATHCHARS "]*[^]'.}>) \t\r\n,\\\"]"
-
-static regex_t dingus[EMPATHY_REGEX_ALL];
-
-static void
-regex_init (void)
-{
-       static gboolean  inited = FALSE;
-       const gchar     *expression;
-       gint             i;
-
-       if (inited) {
-               return;
-       }
-
-       for (i = 0; i < EMPATHY_REGEX_ALL; i++) {
-               switch (i) {
-               case EMPATHY_REGEX_AS_IS:
-                       expression =
-                               SCHEME "//(" USER "@)?[" HOSTCHARS ".]+"
-                               "(:[0-9]+)?(" URLPATH ")?";
-                       break;
-               case EMPATHY_REGEX_BROWSER:
-                       expression =
-                               "(www|ftp)[" HOSTCHARS "]*\\.[" HOSTCHARS ".]+"
-                               "(:[0-9]+)?(" URLPATH ")?";
-                       break;
-               case EMPATHY_REGEX_APT:
-                       expression =
-                               "apt://[" APTCHARS "]*";
-                       break;
-               case EMPATHY_REGEX_EMAIL:
-                       expression =
-                               "(mailto:)?[a-z0-9][a-z0-9._-]*@[a-z0-9]"
-                               "[a-z0-9-]*(\\.[a-z0-9][a-z0-9-]*)+";
-                       break;
-               case EMPATHY_REGEX_OTHER:
-                       expression =
-                               "news:[-A-Z\\^_a-z{|}~!\"#$%&'()*+,./0-9;:=?`]+"
-                               "@[" HOSTCHARS ".]+(:[0-9]+)?";
-                       break;
-               default:
-                       /* Silence the compiler. */
-                       expression = NULL;
-                       continue;
-               }
-
-               memset (&dingus[i], 0, sizeof (regex_t));
-               regcomp (&dingus[i], expression, REG_EXTENDED | REG_ICASE);
-       }
-
-       inited = TRUE;
-}
-
-gint
-empathy_regex_match (EmpathyRegExType  type,
-                   const gchar     *msg,
-                   GArray          *start,
-                   GArray          *end)
-{
-       regmatch_t matches[1];
-       gint       ret = 0;
-       gint       num_matches = 0;
-       gint       offset = 0;
-       gint       i;
-
-       g_return_val_if_fail (type >= 0 || type <= EMPATHY_REGEX_ALL, 0);
-
-       regex_init ();
-
-       while (!ret && type != EMPATHY_REGEX_ALL) {
-               ret = regexec (&dingus[type], msg + offset, 1, matches, 0);
-               if (ret == 0) {
-                       gint s;
-
-                       num_matches++;
-
-                       s = matches[0].rm_so + offset;
-                       offset = matches[0].rm_eo + offset;
-
-                       g_array_append_val (start, s);
-                       g_array_append_val (end, offset);
-               }
-       }
-
-       if (type != EMPATHY_REGEX_ALL) {
-               DEBUG ("Found %d matches for regex type:%d", num_matches, type);
-               return num_matches;
-       }
-
-       /* If EMPATHY_REGEX_ALL then we run ALL regex's on the string. */
-       for (i = 0; i < EMPATHY_REGEX_ALL; i++, ret = 0) {
-               while (!ret) {
-                       ret = regexec (&dingus[i], msg + offset, 1, matches, 0);
-                       if (ret == 0) {
-                               gint s;
-
-                               num_matches++;
-
-                               s = matches[0].rm_so + offset;
-                               offset = matches[0].rm_eo + offset;
-
-                               g_array_append_val (start, s);
-                               g_array_append_val (end, offset);
-                       }
-               }
-       }
-
-       DEBUG ("Found %d matches for ALL regex types", num_matches);
-
-       return num_matches;
-}
-
 gint
 empathy_strcasecmp (const gchar *s1,
                   const gchar *s2)
index a320c6246325c646bf1dd9ff335a92968b456915..51ddd231f710cf8c762152357c14ed00a1f109e8 100644 (file)
 
 #include "empathy-contact.h"
 
-G_BEGIN_DECLS
-
 #define EMPATHY_GET_PRIV(obj,type) ((type##Priv*) ((type*)obj)->priv)
 #define G_STR_EMPTY(x) ((x) == NULL || (x)[0] == '\0')
 
-typedef enum {
-       EMPATHY_REGEX_AS_IS,
-       EMPATHY_REGEX_BROWSER,
-       EMPATHY_REGEX_APT,
-       EMPATHY_REGEX_EMAIL,
-       EMPATHY_REGEX_OTHER,
-       EMPATHY_REGEX_ALL,
-} EmpathyRegExType;
+G_BEGIN_DECLS
 
-/* Regular expressions */
+/* Strings */
 gchar *      empathy_substring                      (const gchar     *str,
                                                    gint             start,
                                                    gint             end);
-gint         empathy_regex_match                    (EmpathyRegExType  type,
-                                                   const gchar     *msg,
-                                                   GArray          *start,
-                                                   GArray          *end);
-
-/* Strings */
 gint         empathy_strcasecmp                     (const gchar     *s1,
                                                    const gchar     *s2);
 gint         empathy_strncasecmp                    (const gchar     *s1,