Implementation - MIME Info Parser

Ideas for the new Implementation

Requirements

The basic requirement is that the loader is able to actually load the interesting data from the MIME-type files, which is:

  • The comment for the MIME-type
  • The sub-class information (if any)

Besides that, the following requirements apply:

  1. The loader should be fast and lightweight
  2. The loader should ignore unknown XML tags

Therefore the loader should be implemented using a SAX-style parser - most probably using the GLib Markup Parser - which can ignore unknown tags and use only known tags (that is <comment> and <sub-class-of> elements).

Old Implementation (from Filer)

This is a sample implementation for a MIME Info Parser, which is used to load the comment for a given MIME-type, as described in the Shared MIME Database specification.

The source code below has been tested already and it is known to work properly for all test cases, but it doesn’t support sub-classing and aliasing, and should therefore be considered sample code only.

typedef enum
{
  PARSER_STATE_START,
  PARSER_STATE_MIMETYPE,
  PARSER_STATE_COMMENT,
} ParserState;
 
typedef XFCE_GENERIC_STACK(ParserState) ParserStateStack;
 
typedef struct
{
  ParserStateStack *stack;
  guint             comment_match;
  gboolean          comment_use;
  GString          *comment;
  const gchar      *locale;
} Parser;
 
 
 
static void         start_element_handler     (GMarkupParseContext  *context,
                                               const gchar          *element_name,
                                               const gchar         **attribute_names,
                                               const gchar         **attribute_values,
                                               gpointer              user_data,
                                               GError              **error);
static void         end_element_handler       (GMarkupParseContext  *context,
                                               const gchar          *element_name,
                                               gpointer              user_data,
                                               GError              **error);
static void         text_handler              (GMarkupParseContext  *context,
                                               const gchar          *text,
                                               gsize                 text_len,
                                               gpointer              user_data,
                                               GError             **error);
 
 
 
static GObjectClass *parent_class;
 
static GMarkupParser markup_parser =
{
  start_element_handler,
  end_element_handler,
  text_handler,
  NULL,
  NULL,
};
 
 
 
static void
start_element_handler (GMarkupParseContext  *context,
                       const gchar          *element_name,
                       const gchar         **attribute_names,
                       const gchar         **attribute_values,
                       gpointer              user_data,
                       GError              **error)
{
  Parser *parser = (Parser *) user_data;
  guint   match;
  guint   n;
 
  switch (xfce_stack_top (parser->stack))
    {
    case PARSER_STATE_START:
      if (strcmp (element_name, "mime-type") == 0)
        {
          xfce_stack_push (parser->stack, PARSER_STATE_MIMETYPE);
        }
      else
        goto unknown_element;
      break;
 
    case PARSER_STATE_MIMETYPE:
      if (strcmp (element_name, "comment") == 0)
        {
          for (n = 0; attribute_names[n] != NULL; ++n)
            if (strcmp (attribute_names[n], "xml:lang") == 0)
              break;
 
          if (G_UNLIKELY (attribute_names[n] == NULL))
            {
              parser->comment_use = (parser->comment_match <= XFCE_LOCALE_NO_MATCH);
            }
          else
            {
              match = xfce_locale_match (parser->locale, attribute_values[n]);
              if (parser->comment_match < match)
                {
                  parser->comment_match = match;
                  parser->comment_use = TRUE;
                }
              else
                {
                  parser->comment_use = FALSE;
                }
            }
 
          if (parser->comment_use)
            g_string_truncate (parser->comment, 0);
 
          xfce_stack_push (parser->stack, PARSER_STATE_COMMENT);
        }
      else
        goto unknown_element;
      break;
 
    default:
      goto unknown_element;
    }
 
  return;
 
unknown_element:
  g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
               "Unknown element <%s>", element_name);
  return;
}
 
 
 
static void
end_element_handler (GMarkupParseContext  *context,
                     const gchar          *element_name,
                     gpointer              user_data,
                     GError              **error)
{
  Parser *parser = (Parser *) user_data;
 
  switch (xfce_stack_top (parser->stack))
    {
    case PARSER_STATE_START:
      g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT,
                   "End element handler called while in root context");
      return;
 
    case PARSER_STATE_MIMETYPE:
      if (strcmp (element_name, "mime-type") != 0)
        goto unknown_element;
      break;
 
    case PARSER_STATE_COMMENT:
      if (strcmp (element_name, "comment") != 0)
        goto unknown_element;
      break;
 
    default:
      goto unknown_element;
    }
 
  xfce_stack_pop (parser->stack);
  return;
 
unknown_element:
  g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
               "Unknown closing element <%s>", element_name);
  return;
}
 
 
 
static void
text_handler (GMarkupParseContext  *context,
              const gchar          *text,
              gsize                 text_len,
              gpointer              user_data,
              GError             **error)
{
  Parser *parser = (Parser *) user_data;
 
  switch (xfce_stack_top (parser->stack))
    {
    case PARSER_STATE_COMMENT:
      if (parser->comment_use)
        g_string_append_len (parser->comment, text, text_len);
      break;
 
    default:
      break;
    }
}
 
 
 
static gchar*
load_comment_from_file (const gchar *filename,
                        GError     **error)
{
  GMarkupParseContext *context;
  Parser               parser;
  gchar               *content;
  gsize                content_len;
  gboolean             comment_free = TRUE;
  gchar               *comment = NULL;
 
  if (!g_file_get_contents (filename, &content, &content_len, error))
    return NULL;
 
  parser.comment_match = XFCE_LOCALE_NO_MATCH;
  parser.comment = g_string_new ("");
  parser.locale = setlocale (LC_MESSAGES, NULL);
 
  parser.stack = xfce_stack_new (ParserStateStack);
  xfce_stack_push (parser.stack, PARSER_STATE_START);
 
  context = g_markup_parse_context_new (&markup_parser, 0, &parser, NULL);
 
  if (!g_markup_parse_context_parse (context, content, content_len, error))
    goto done;
 
  if (!g_markup_parse_context_end_parse (context, error))
    goto done;
 
  comment = parser.comment->str;
  comment_free = FALSE;
 
done:
  g_markup_parse_context_free (context);
  g_string_free (parser.comment, comment_free);
  xfce_stack_free (parser.stack);
  g_free (content);
 
  return comment;
}
 
  implementation/mime-info-parser.txt · Last modified: 2005/02/19 20:41 by 217.85.191.78 (benny)
 
Recent changes RSS feed Creative Commons License Driven by DokuWiki