Code Search for Developers
 
 
  

extensions.c from Gtk-Gnutella at Krugle


Show extensions.c syntax highlighted

/*
 * $Id: extensions.c 13831 2007-06-16 01:22:39Z cbiere $
 *
 * Copyright (c) 2002-2003, Raphael Manfredi
 *
 *----------------------------------------------------------------------
 * This file is part of gtk-gnutella.
 *
 *  gtk-gnutella is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  gtk-gnutella is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with gtk-gnutella; if not, write to the Free Software
 *  Foundation, Inc.:
 *      59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *----------------------------------------------------------------------
 */

/**
 * @ingroup core
 * @file
 *
 * Gnutella message extension handling.
 *
 * @author Raphael Manfredi
 * @date 2002-2003
 */

#include "common.h"

RCSID("$Id: extensions.c 13831 2007-06-16 01:22:39Z cbiere $")

#include <zlib.h>

#include "extensions.h"
#include "ggep.h"

#include "lib/atoms.h"
#include "lib/misc.h"
#include "lib/walloc.h"
#include "lib/override.h"		/* Must be the last header included */

#include "if/gnet_property_priv.h"

#define HUGE_FS		0x1CU		/**< Field separator (HUGE) */

#define GGEP_MAXLEN	65535		/**< Maximum decompressed length */
#define GGEP_GROW	512			/**< Minimum chunk growth when resizing */

/**
 * An extension descriptor.
 *
 * The extension block is structured thustly:
 *
 *    - <.................len.......................>
 *    - <..headlen.><..........paylen...............>
 *    - +-----------+-------------------------------+
 *    - |   header  |      extension payload        |
 *    - +-----------+-------------------------------+
 *    - ^           ^
 *    - base        payload
 *
 * The "<headlen>" part is simply "<len>" - "<paylen>" so it is not stored.
 * Likewise, we store only the beginning of the payload, the base can be
 * computed if needed.
 *
 * All those pointers refer DIRECTLY to the message we received, so naturally
 * one MUST NOT alter the data we can read or we would corrupt the messages
 * before forwarding them.
 *
 * There is a slight complication introduced with GGEP extensions, since the
 * data there can be COBS encoded, and even deflated.  Therefore, reading
 * directly data from ext_phys_payload could yield compressed data, not
 * something really usable.
 *
 * Therefore, the extension structure is mostly private, and routines are
 * provided to access the data.  Decompression and decoding of COBS is lazily
 * performed when they wish to access the extension data.
 *
 * The ext_phys_xxx fields refer to the physical information about the
 * extension.  The ext_xxx() routines allow access to the virtual information
 * after decompression and COBS decoding.  Naturally, if the extension is
 * not compressed nor COBS-encoded, the ext_xxx() routine will return the
 * physical data.
 *
 * The structure here refers to the opaque data that is dynamically allocated
 * each time a new extension is found.
 */
typedef struct extdesc {
	const gchar *ext_phys_payload;	/**< Start of payload buffer */
	const gchar *ext_payload;		/**< "virtual" payload */
	guint16 ext_phys_len;		/**< Extension length (header + payload) */
	guint16 ext_phys_paylen;	/**< Extension payload length */
	guint16 ext_paylen;			/**< "virtual" payload length */
	guint16 ext_rpaylen;		/**< Length of buffer for "virtual" payload */

	union {
		struct {
			gboolean extu_cobs;			/**< Payload is COBS-encoded */
			gboolean extu_deflate;		/**< Payload is deflated */
			const gchar *extu_id;		/**< Extension ID */
		} extu_ggep;
	} ext_u;

} extdesc_t;

#define ext_phys_headlen(d)	((d)->ext_phys_len - (d)->ext_phys_paylen)
#define ext_phys_base(d)	((d)->ext_phys_payload - ext_phys_headlen(d))

/*
 * Union access shortcuts.
 */

#define ext_ggep_cobs		ext_u.extu_ggep.extu_cobs
#define ext_ggep_deflate	ext_u.extu_ggep.extu_deflate
#define ext_ggep_id			ext_u.extu_ggep.extu_id

static const gchar * const extype[] = {
	"UNKNOWN",					/**< EXT_UNKNOWN */
	"XML",						/**< EXT_XML */
	"HUGE",						/**< EXT_HUGE */
	"GGEP",						/**< EXT_GGEP */
	"NONE",						/**< EXT_NONE */
};

/***
 *** Extension name screener.
 ***/

/**
 * Reserved word description.
 */
struct rwtable {
	const gchar *rw_name;	/**< Representation */
	ext_token_t rw_token;	/**< Token value */
};

/** URN name table (sorted) */
static const struct rwtable urntable[] =
{
	{ "bitprint",		EXT_T_URN_BITPRINT },
	{ "sha1",			EXT_T_URN_SHA1 },
};

/** GGEP extension table (sorted) */
static const struct rwtable ggeptable[] =
{
#define GGEP_ID(x) { #x, EXT_T_GGEP_ ## x }
#define GGEP_GTKG_ID(x) { "GTKG." #x, EXT_T_GGEP_GTKG_ ## x }

	{ "<", EXT_T_GGEP_LIME_XML }, /**< '<' is less that 'A' */
	GGEP_ID(A),			/**< Same as GGEP ALT but used in HEAD Pongs */
	GGEP_ID(ALT),		/**< Alt-locs in qhits */
	GGEP_ID(ALT_TLS),	/**< TLS-capability bitmap for GGEP ALT */
	GGEP_ID(BH),		/**< Browseable host indication */
	GGEP_ID(C),			/**< Result Code in HEAD Pongs */
	GGEP_ID(CT),		/**< Resource creation time */
	GGEP_ID(DU),		/**< Average servent uptime */
	GGEP_ID(F),			/**< Flags in HEAD Pongs */
	GGEP_ID(FW),		/**< Firewalled-to-Firewalled protocol version */
	GGEP_ID(GGEP),		/**< GGEP extension names known, NUL-separated */
	GGEP_GTKG_ID(IPV6),	/**< GTKG IPv6 address */
	GGEP_GTKG_ID(TLS),	/**< GTKG TLS support indication */
	GGEP_ID(GTKGV1),	/**< GTKG complete version number (binary) */
	GGEP_ID(GUE),		/**< GUESS support */
	GGEP_ID(H),			/**< Hashes in binary form */
	GGEP_ID(HNAME),		/**< Hostname */
	GGEP_ID(IP),		/**< Ip:Port in ping and pongs (F2F) */
	GGEP_ID(IPP),		/**< IP:Port in pongs (UHC) */
	GGEP_ID(IPP_TLS),	/**< TLS-capability bitmap for GGEP IPP */
	GGEP_ID(LF),		/**< Large file size in qhits */
	GGEP_ID(LOC),		/**< Locale preferences, for clustering  */
	GGEP_ID(NP),		/**< do Not Proxy (queries; OOB) */
	GGEP_ID(P),			/**< Push alt-locs in HEAD Pongs */
	GGEP_ID(PATH),		/**< Shared file path, in query hits */
	GGEP_ID(PHC),		/**< Packed host caches (UHC) in pongs */
	GGEP_ID(PUSH),		/**< Push proxy info, in qhits */
	GGEP_ID(PUSH_TLS),	/**< TLS-capability bitmap for GGEP PUSH */
	GGEP_ID(Q),			/**< Queue status in HEAD Pongs */
	GGEP_ID(SCP),		/**< Supports cached pongs, in pings (UHC) */
	GGEP_ID(SO),		/**< Secure OOB */
	GGEP_ID(T),			/**< Same as ALT_TLS but for HEAD Pongs */
	GGEP_ID(TLS),		/**< TLS support indication */
	GGEP_ID(UA),		/**< User-Agent string */
	GGEP_ID(UDPHC),		/**< Is an UDP hostcache (UHC) , in pongs */
	GGEP_ID(UP),		/**< Ultrapeer information about free slots */
	GGEP_ID(V),			/**< Vendor code, in HEAD Pongs */
	GGEP_ID(VC),		/**< Vendor code, in pongs */
	GGEP_ID(VMSG),		/**< Array of supported vendor message codes */
	GGEP_ID(u),			/**< HUGE URN in ASCII */

#undef GGEP_ID
};

/**
 * Perform a dichotomic search for keywords in the reserved-word table.
 * The `case_sensitive' parameter governs whether lookup is done with or
 * without paying attention to case.
 *
 * @return the keyword token value upon success, EXT_T_UNKNOWN if not found.
 * If keyword was found, its static shared string is returned in `retkw'.
 */
static ext_token_t
rw_screen(gboolean case_sensitive,
	const struct rwtable *table, size_t size,
	const gchar *word, const gchar **retkw)
{
	g_assert(retkw);

#define GET_KEY(i) (table[(i)].rw_name)
#define FOUND(i) \
	G_STMT_START { \
		*retkw = table[(i)].rw_name; \
	   	return table[(i)].rw_token; \
		/* NOTREACHED */ \
	} G_STMT_END

	if (case_sensitive)
		BINARY_SEARCH(const gchar *, word, size,
				strcmp, GET_KEY, FOUND);
	else
		BINARY_SEARCH(const gchar *, word, size,
				ascii_strcasecmp, GET_KEY, FOUND);

#undef FOUND
#undef GET_KEY

	*retkw = NULL;
	return EXT_T_UNKNOWN;
}

/**
 * Ensure the reserved-word table is lexically sorted.
 */
static void
rw_is_sorted(const gchar *name,
	const struct rwtable *table, size_t size)
{
	size_t i;

	/* Skip the first to have a previous element, tables with a single
	 * element are sorted anyway. */
	for (i = 1; i < size; i++) {
		const struct rwtable *prev = &table[i - 1], *e = &table[i];

		if (
			prev->rw_token >= e->rw_token ||
			strcmp(prev->rw_name, e->rw_name) >= 0
		)
			g_error("reserved word table \"%s\" unsorted (near item \"%s\")",
				name, e->rw_name);

		if (ggeptable == table) {
			const gchar *s;

		   	s = ext_ggep_name(e->rw_token);
			if (0 != strcmp(s, e->rw_name)) {
				g_error("table \"%s\" has wrong GGEP ID (near item \"%s\")",
					name, e->rw_name);
			}
		}
	}
}

/**
 * @return the GGEP token value upon success, EXT_T_UNKNOWN_GGEP if not found.
 * If keyword was found, its static shared string is returned in `retkw'.
 */
static ext_token_t
rw_ggep_screen(gchar *word, const gchar **retkw)
{
	ext_token_t t;

	t = rw_screen(TRUE, ggeptable, G_N_ELEMENTS(ggeptable), word, retkw);

	return (t == EXT_T_UNKNOWN) ? EXT_T_UNKNOWN_GGEP : t;
}

/**
 * @return the URN token value upon success, EXT_T_UNKNOWN if not found.
 * If keyword was found, its static shared string is returned in `retkw'.
 */
static ext_token_t
rw_urn_screen(const gchar *word, const gchar **retkw)
{
	return rw_screen(FALSE, urntable, G_N_ELEMENTS(urntable), word, retkw);
}

/***
 *** Extension name atoms.
 ***/

static GHashTable *ext_names = NULL;

/**
 * Transform the name into a printable form.
 *
 * @return an atom string of that printable form.
 */
static gchar *
ext_name_atom(const gchar *name)
{
	gchar *key;
	gchar *atom;

	/*
	 * Look whether we already known about this name.
	 */

	atom = g_hash_table_lookup(ext_names, name);

	if (atom != NULL)
		return atom;

	/*
	 * The key is always the raw name we're given.
	 *
	 * The value is always a printable form of the name, where non-printable
	 * chars are shown as hexadecimal escapes: \xhh.  However, if there is
	 * no escaping, then the name is also the key (same object).
	 */

	key = wcopy(name, 1 + strlen(name));
	atom = hex_escape(key, TRUE); /* strict escaping */

	g_hash_table_insert(ext_names, key, atom);

	return atom;
}

/**
 * Callback for freeing entries in the `ext_names' hash table.
 */
static gboolean
ext_names_kv_free(gpointer key, gpointer value, gpointer unused_udata)
{
	(void) unused_udata;

	if (0 != strcmp(key, value)) {
		G_FREE_NULL(value);
	}
    wfree(key, 1 + strlen(key));

	return TRUE;
}

/***
 *** Extension parsing.
 ***
 *** All the ext_xxx_parse routines share the same signature and behaviour:
 ***
 *** They extract one extension, as guessed by the leading byte introducing
 *** those extensions and return the amount of entries they added to the
 *** supplied extension vector (this will be typically 1 but for GGEP which
 *** is structured and can therefore grab more than one extension in one call).
 ***
 *** Upon entry, `*retp' points to the start of the extension, and there are
 *** `len' bytes to parse.  There are `exvcnt' slots available in the extension
 *** vector, starting at `exv'.
 ***
 *** On exit, `p' is updated to the first byte following the last successfully
 *** parsed byte.  If the returned value is 0, then `p' is not updated.
 ***/

/**
 * Parses a GGEP block (can hold several extensions).
 */
static gint
ext_ggep_parse(const gchar **retp, gint len, extvec_t *exv, gint exvcnt)
{
	const gchar *p = *retp;
	const gchar *end = &p[len];
	const gchar *lastp = p;				/* Last parsed point */
	gint count;

	for (count = 0; count < exvcnt && p < end; /* empty */) {
		guchar flags;
		gchar id[GGEP_F_IDLEN + 1];
		guint id_len, data_length, i;
		gboolean length_ended = FALSE;
		const gchar *name;
		extdesc_t *d;

		g_assert(exv->opaque == NULL);

		/*
		 * First byte is GGEP flags.
		 */

		flags = (guchar) *p++;

		if (flags & GGEP_F_MBZ)		/* A byte that Must Be Zero is set */
			goto abort;

		id_len = flags & GGEP_F_IDLEN;
		g_assert(id_len < sizeof id);

		if (id_len == 0)
			goto abort;

		if ((size_t) (end - p) < id_len) /* Not enough bytes to store the ID! */
			goto abort;

		/*
		 * Read ID, and NUL-terminate it.
		 *
		 * As a safety precaution, only allow ASCII IDs, and nothing in
		 * the control space.  It's not really in the GGEP specs, but it's
		 * safer that way, and should protect us if we parse garbage starting
		 * with 0xC3....
		 *		--RAM, 2004-11-12
		 */

		for (i = 0; i < id_len; i++) {
			gint c = *p++;
			if (c == '\0' || !isascii(c) || is_ascii_cntrl(c))
				goto abort;
			id[i] = c; 
		}
		id[i] = '\0';

		/*
		 * Read the payload length (maximum of 3 bytes).
		 */

		data_length = 0;
		for (i = 0; i < 3 && p < end; i++) {
			guchar b = *p++;

			/*
			 * Either GGEP_L_CONT or GGEP_L_LAST must be set, thereby
			 * ensuring that the byte cannot be NUL.
			 */

			if (((b & GGEP_L_XFLAGS) == GGEP_L_XFLAGS) || !(b & GGEP_L_XFLAGS))
				goto abort;

			data_length = (data_length << GGEP_L_VSHIFT) | (b & GGEP_L_VALUE);

			if (b & GGEP_L_LAST) {
				length_ended = TRUE;
				break;
			}
		}

		if (!length_ended)
			goto abort;

		/*
		 * Ensure we have enough bytes left for the payload.  If not, it
		 * means the length is garbage.
		 */

		/* Check whether there are enough bytes for the payload */
		if ((size_t) (end - p) < data_length)
			goto abort;

		/*
		 * Some sanity checks:
		 *
		 * A COBS-encoded buffer can be trivially validated.
		 * A deflated payload must be at least 6 bytes with a valid header.
		 */

		if (flags & (GGEP_F_COBS|GGEP_F_DEFLATE)) {
			guint d_len = data_length;

			if (flags & GGEP_F_COBS) {
				if (d_len == 0 || !cobs_is_valid(p, d_len))
					goto abort;
				d_len--;					/* One byte of overhead */
			}

			if (flags & GGEP_F_DEFLATE) {
				guint offset = 0;

				if (d_len < 6)
					goto abort;

				/*
				 * If COBS-ed, since neither the first byte nor the
				 * second byte of the raw deflated payload can be NUL,
				 * the leading COBS code will be at least 3.  Then
				 * the next 2 bytes are the raw deflated header.
				 *
				 * If not COBS-ed, check whether payload holds a valid
				 * deflated header.
				 */

				if (flags & GGEP_F_COBS) {
					if ((guchar) *p < 3)
						goto abort;
					offset = 1;			/* Skip leading byte */
				}

				if (!zlib_is_valid_header(p + offset, d_len))
					goto abort;
			}
		}

		/*
		 * OK, at this point we have validated the GGEP header.
		 */

		d = walloc(sizeof *d);

		d->ext_phys_payload = p;
		d->ext_phys_paylen = data_length;
		d->ext_phys_len = (p - lastp) + data_length;
		d->ext_ggep_cobs = flags & GGEP_F_COBS;
		d->ext_ggep_deflate = flags & GGEP_F_DEFLATE;

		if (0 == (flags & (GGEP_F_COBS|GGEP_F_DEFLATE))) {
			d->ext_payload = d->ext_phys_payload;
			d->ext_paylen = d->ext_phys_paylen;
		} else
			d->ext_payload = NULL;		/* Will lazily compute, if accessed */

		exv->opaque = d;

		g_assert(ext_phys_headlen(d) >= 0);

		/*
		 * Look whether we know about this extension.
		 *
		 * If we do, the name is the ID as well.  Otherwise, for tracing
		 * and debugging purposes, save the name away, once.
		 */

		exv->ext_type = EXT_GGEP;
		exv->ext_token = rw_ggep_screen(id, &name);
		exv->ext_name = name;

		if (name != NULL)
			d->ext_ggep_id = name;
		else
			d->ext_ggep_id = ext_name_atom(id);

		/*
		 * One more entry, prepare next iteration.
		 */

		exv++;
		count++;
		lastp = p + data_length;
		p = lastp;

		/*
		 * Was this the last extension?
		 */

		if (flags & GGEP_F_LAST)
			break;
	}

	*retp = lastp;	/* Points to first byte after what we parsed */

	return count;

abort:
	/*
	 * Cleanup any extension we already parsed.
	 */

	while (count--) {
		exv--;
		wfree(exv->opaque, sizeof(extdesc_t));
		exv->opaque = NULL;
	}

	return 0;		/* Cannot be a GGEP block: leave parsing pointer intact */
}

static gint
ext_urn_bad_parse(const gchar **retp, gint len, extvec_t *exv, gint exvcnt)
{
	const gchar *p = *retp;
	const gchar *lastp = p;				/* Last parsed point */
	extdesc_t *d;

	g_assert(exvcnt > 0);
	g_assert(exv->opaque == NULL);

	if (len != 3)
		return 0;
	p = is_strcaseprefix(p, "urn");
	if (!p)
		return 0;

	/*
	 * Encapsulate as one big opaque chunk.
	 */

	d = walloc(sizeof(*d));

	d->ext_phys_payload = lastp;
	d->ext_phys_len = d->ext_phys_paylen = p - lastp;
	d->ext_payload = d->ext_phys_payload;
	d->ext_paylen = d->ext_phys_paylen;

	exv->opaque = d;
	exv->ext_type = EXT_NONE;
	exv->ext_name = NULL;
	exv->ext_token = EXT_T_URN_BAD;

	g_assert(p - lastp == d->ext_phys_len);

	*retp = p;			/* Points to first byte after what we parsed */

	return 1;
}


/**
 * Parses a URN block (one URN only).
 */
static gint
ext_huge_parse(const gchar **retp, gint len, extvec_t *exv, gint exvcnt)
{
	const gchar *p = *retp;
	const gchar *end = &p[len];
	const gchar *lastp = p;				/* Last parsed point */
	ext_token_t token;
	const gchar *payload_start = NULL;
	gint data_length;
	const gchar *name = NULL;
	extdesc_t *d;

	g_assert(exvcnt > 0);
	g_assert(exv->opaque == NULL);

	/*
	 * Make sure we can at least read "urn:", i.e. that we have 4 chars.
	 */

	if (len < 4)
		return ext_urn_bad_parse(retp, len, exv, exvcnt);

	/*
	 * Recognize "urn:".
	 */

	p = is_strcaseprefix(p, "urn:");
	if (!p)
		return 0;

	/*
	 * Maybe it's simply a "urn:" empty specification?
	 */

	if (p == end || *p == '\0' || *p == HUGE_FS) {
		token = EXT_T_URN_EMPTY;
		payload_start = p;
		data_length = 0;
		goto found;
	}

	/*
	 * Look for the end of the name, identified by ':'.
	 */

	{
		const gchar *name_start, *name_end;
		size_t name_len;
		gchar name_buf[16];

		name_start = p;
		name_end = memchr(p, ':', end - name_start);
		name_len = name_end ? name_end - name_start : 0;

		/* Not found, empty name or too long */
		if (0 == name_len || name_len >= sizeof name_buf)
			return 0;

		memcpy(name_buf, name_start, name_len);
		name_buf[name_len] = '\0';

		/*
		 * Lookup the token.
		 */

		token = rw_urn_screen(name_buf, &name);
		p = &name_end[1];
	}

	/*
	 * Now extract the payload (must be made of alphanum chars),
	 * until we reach a delimiter (NUL byte, GGEP header, GEM separator).
	 * NB: of those, only GGEP_MAGIC could be "alnum" under some locales.
	 */

	payload_start = p;
	for (/* NOTHING*/; p < end; p++) {
		guchar c = *p;
		if (!is_ascii_alnum(c) || c == GGEP_MAGIC) {
			break;
		}
	}
	data_length = p - payload_start;

found:
	g_assert(payload_start);

	d = walloc(sizeof(*d));

	d->ext_phys_payload = payload_start;
	d->ext_phys_paylen = data_length;
	d->ext_phys_len = (payload_start - lastp) + data_length;
	d->ext_payload = d->ext_phys_payload;
	d->ext_paylen = d->ext_phys_paylen;

	exv->opaque = d;
	exv->ext_type = EXT_HUGE;
	exv->ext_name = name;
	exv->ext_token = token;

	g_assert(ext_phys_headlen(d) >= 0);
	g_assert(p - lastp == d->ext_phys_len);

	*retp = p;	/* Points to first byte after what we parsed */

	return 1;
}

/**
 * Parses a XML block (grabs the whole xml up to the first NUL or separator).
 */
static gint
ext_xml_parse(const gchar **retp, gint len, extvec_t *exv, gint exvcnt)
{
	const gchar *p = *retp;
	const gchar *end = &p[len];
	const gchar *lastp = p;				/* Last parsed point */
	extdesc_t *d;

	g_assert(exvcnt > 0);
	g_assert(exv->opaque == NULL);

	for (/* NOTHING */; p != end; p++) {
		guchar c = *p;
		if (c == '\0' || c == HUGE_FS) {
			break;
		}
	}

	/*
	 * We don't analyze the XML, encapsulate as one big opaque chunk.
	 */

	d = walloc(sizeof(*d));

	d->ext_phys_payload = lastp;
	d->ext_phys_len = d->ext_phys_paylen = p - lastp;
	d->ext_payload = d->ext_phys_payload;
	d->ext_paylen = d->ext_phys_paylen;

	exv->opaque = d;
	exv->ext_type = EXT_XML;
	exv->ext_name = NULL;
	exv->ext_token = EXT_T_XML;

	g_assert(p - lastp == d->ext_phys_len);

	*retp = p;			/* Points to first byte after what we parsed */

	return 1;
}

/**
 * Parses an unknown block, attempting to resynchronize on a known separator.
 * Everything up to the resync point is wrapped as an "unknown" extension.
 *
 * If `skip' is TRUE, we don't resync on the first resync point.
 */
static gint
ext_unknown_parse(const gchar **retp, gint len, extvec_t *exv,
	gint exvcnt, gboolean skip)
{
	const gchar *p = *retp;
	const gchar *lastp = p;				/* Last parsed point */
	extdesc_t *d;

	g_assert(exvcnt > 0);
	g_assert(exv->opaque == NULL);

	/*
	 * Try to resync on a NUL byte, the HUGE_FS separator, "urn:" or what
	 * could appear to be the start of a GGEP block or XML.
	 */

	for (/* NOTHING*/; len > 0; p++, len--) {
		gboolean found;

		switch ((guchar) *p) {
		case '\0':
		case HUGE_FS:
		case GGEP_MAGIC:
			found = TRUE;
			break;
		case 'u':
		case 'U':
			found = len >= 4 && is_strcaseprefix(p, "urn:");
			break;
		case '<':
			found = len >= 2 && is_ascii_alpha((guchar) p[1]);
			break;
		default:
			found = FALSE;
		}
		
		if (found) {
			if (skip) {
				skip = FALSE;
			} else {
				break;
			}
		}
	}

	/*
	 * Encapsulate as one big opaque chunk.
	 */

	d = walloc(sizeof(*d));

	d->ext_phys_payload = lastp;
	d->ext_phys_len = d->ext_phys_paylen = p - lastp;
	d->ext_payload = d->ext_phys_payload;
	d->ext_paylen = d->ext_phys_paylen;

	exv->opaque = d;
	exv->ext_type = EXT_UNKNOWN;
	exv->ext_name = NULL;
	exv->ext_token = EXT_T_UNKNOWN;

	g_assert(p - lastp == d->ext_phys_len);

	*retp = p;			/* Points to first byte after what we parsed */

	return 1;
}

/**
 * Parses a "no extension" block, made of NUL bytes or HUGE field separators
 * exclusively.  Obviously, this is unneeded stuff that simply accounts
 * for overhead!
 *
 * If more that one separator in a row is found, they are all wrapped as a
 * "none" extension.
 */
static gint
ext_none_parse(const gchar **retp, gint len, extvec_t *exv, gint exvcnt)
{
	const gchar *p = *retp;
	const gchar *end = &p[len];
	const gchar *lastp = p;				/* Last parsed point */
	extdesc_t *d;

	g_assert(exvcnt > 0);
	g_assert(exv->opaque == NULL);

	for (/* NOTHING */; p != end; p++) {
		guchar c = *p;
		if (c != '\0' && c != HUGE_FS)
			break;
	}

	/*
	 * If we're still at the beginning, it means there was no separator
	 * at all, so we did not find any "null" extension.
	 */

	if (p == lastp)
		return 0;

	/*
	 * Encapsulate as one big opaque chunk.
	 */

	d = walloc(sizeof(*d));

	d->ext_phys_payload = lastp;
	d->ext_phys_len = d->ext_phys_paylen = p - lastp;
	d->ext_payload = d->ext_phys_payload;
	d->ext_paylen = d->ext_phys_paylen;

	exv->opaque = d;
	exv->ext_type = EXT_NONE;
	exv->ext_name = NULL;
	exv->ext_token = EXT_T_OVERHEAD;

	g_assert(p - lastp == d->ext_phys_len);

	*retp = p;			/* Points to first byte after what we parsed */

	return 1;
}

/**
 * Merge two consecutive extensions `exv' and `next' into one big happy
 * extension, in `exv'.   The resulting extension type is that of `exv'.
 */
static void
ext_merge_adjacent(extvec_t *exv, extvec_t *next)
{
	const gchar *end;
	const gchar *nend;
	const gchar *nbase;
	guint16 added;
	extdesc_t *d = exv->opaque;
	extdesc_t *nd = next->opaque;

	g_assert(exv->opaque != NULL);
	g_assert(next->opaque != NULL);

	end = d->ext_phys_payload + d->ext_phys_paylen;
	nbase = ext_phys_base(nd);
	nend = nd->ext_phys_payload + nd->ext_phys_paylen;

	g_assert(nbase + nd->ext_phys_len == nend);
	g_assert(nend > end);

	/*
	 * Extensions are adjacent, but can be separated by a single NUL or other
	 * one byte separator.
	 */

	g_assert(nbase == end || nbase == (end + 1));

	added = nend - end;			/* Includes any separator between the two */

	/*
	 * By incrementing the total length and the payload length of `exv',
	 * we catenate `next' at the tail of `exv'.
	 */

	d->ext_phys_len += added;
	d->ext_phys_paylen += added;

	if (d->ext_payload != NULL) {
		g_assert(d->ext_payload == d->ext_phys_payload);

		d->ext_paylen += added;
	}

	/*
	 * Get rid of the `next' opaque descriptor.
	 * We should not have computed any "virtual" payload at this point.
	 */

	g_assert(
		nd->ext_payload == NULL || nd->ext_payload == nd->ext_phys_payload);

	wfree(nd, sizeof(*nd));
	next->opaque = NULL;
}

/**
 * Parse extension block of `len' bytes starting at `buf' and fill the
 * supplied extension vector `exv', whose size is `exvcnt' entries.
 *
 * @return the number of filled entries.
 */
gint
ext_parse(const gchar *buf, gint len, extvec_t *exv, gint exvcnt)
{
	const gchar *p = buf, *end = &buf[len];
	gint cnt = 0;

	g_assert(buf);
	g_assert(len > 0);
	g_assert(exv);
	g_assert(exvcnt > 0);
	g_assert(exv->opaque == NULL);

	while (p < end && exvcnt > 0) {
		const gchar *old_p = p;
		gint found = 0;

		g_assert(len > 0);

		/*
		 * From now on, all new Gnutella extensions will be done via GGEP.
		 * However, we have to be backward compatible with legacy extensions
		 * that predate GGEP (HUGE and XML) and were not properly encapsulated.
		 */

		switch ((guchar) *p) {
		case GGEP_MAGIC:
			p++;
			if (p == end)
				goto out;
			found = ext_ggep_parse(&p, len-1, exv, exvcnt);
			break;
		case 'u':
		case 'U':
			found = ext_huge_parse(&p, len, exv, exvcnt);
			break;
		case '<':
			found = ext_xml_parse(&p, len, exv, exvcnt);
			break;
		case HUGE_FS:
		case '\0':
			p++;
			if (p == end)
				goto out;
			found = ext_none_parse(&p, len-1, exv, exvcnt);
			if (!found) {
				len--;
				continue;			/* Single separator, no bloat then */
			}
			break;
		default:
			found = ext_unknown_parse(&p, len, exv, exvcnt, FALSE);
			break;
		}

		/*
		 * If parsing did not advance one bit, grab as much as we can as
		 * an "unknown" extension.
		 */

		g_assert(found == 0 || p != old_p);

		if (found == 0) {
			g_assert((guchar) *old_p == GGEP_MAGIC || p == old_p);

			/*
			 * If we were initially on a GGEP magic byte, and since we did
			 * not find any valid GGEP extension, go back one byte.  We're
			 * about to skip the first synchronization point...
			 */

			if ((guchar) *old_p == GGEP_MAGIC) {
				p--;
				g_assert(p == old_p);
			}

			found = ext_unknown_parse(&p, len, exv, exvcnt, TRUE);
		}

		g_assert(found > 0);
		g_assert(found <= exvcnt);
		g_assert(p != old_p);

		len -= p - old_p;

		/*
		 * If we found an "unknown" or "none" extension, and the previous
		 * extension was "unknown", merge them.  The result will be "unknown".
		 */

		if (
			found == 1 && cnt > 0 &&
			(exv->ext_type == EXT_UNKNOWN || exv->ext_type == EXT_NONE)
		) {
			extvec_t *prev = exv - 1;
			if (prev->ext_type == EXT_UNKNOWN) {
				ext_merge_adjacent(prev, exv);
				continue;					/* Don't move `exv' */
			}
		}

		exv += found;
		exvcnt -= found;
		cnt += found;
	}

out:
	return cnt;
}

/**
 * Inflate `len' bytes starting at `buf', up to GGEP_MAXLEN bytes.
 * The payload `name' is given only in case there is an error to report.
 *
 * @returns the allocated inflated buffer, and its inflated length in `retlen'.
 * @returns NULL on error.
 */
static gchar *
ext_ggep_inflate(const gchar *buf, gint len, guint16 *retlen, const gchar *name)
{
	gchar *result;					/* Inflated buffer */
	gint rsize;						/* Result's buffer size */
	z_streamp inz;
	gint ret;
	gint inflated;					/* Amount of inflated data so far */
	gboolean failed = FALSE;

	g_assert(buf);
	g_assert(len > 0);
	g_assert(retlen);

	/*
	 * Allocate decompressor.
	 */

	inz = walloc(sizeof(*inz));

	inz->zalloc = zlib_alloc_func;
	inz->zfree = zlib_free_func;
	inz->opaque = NULL;

	ret = inflateInit(inz);

	if (ret != Z_OK) {
		wfree(inz, sizeof(*inz));
		g_warning("unable to setup decompressor for GGEP payload \"%s\": %s",
			name, zlib_strerror(ret));
		return NULL;
	}

	rsize = len * 2;				/* Assume a 50% compression ratio */
	rsize = MIN(rsize, GGEP_MAXLEN);
	result = g_malloc(rsize);

	/*
	 * Prepare call to inflate().
	 */

	inz->next_in = (gpointer) buf;
	inz->avail_in = len;

	inflated = 0;

	for (;;) {
		/*
		 * Resize output buffer if needed.
		 * Never grow the result buffer to more than MAX_PAYLOAD_LEN bytes.
		 */

		if (rsize == inflated) {
			rsize += MAX(len, GGEP_GROW);
			rsize = MIN(rsize, GGEP_MAXLEN);

			if (rsize == inflated) {		/* Reached maximum size! */
				g_warning("GGEP payload \"%s\" would be larger than %d bytes",
					name, GGEP_MAXLEN);
				failed = TRUE;
				break;
			}

			g_assert(rsize > inflated);

			result = g_realloc(result, rsize);
		}

		inz->next_out = (guchar *) result + inflated;
		inz->avail_out = rsize - inflated;

		/*
		 * Decompress data.
		 */

		ret = inflate(inz, Z_SYNC_FLUSH);
		inflated += rsize - inflated - inz->avail_out;

		g_assert(inflated <= rsize);

		if (ret == Z_STREAM_END)				/* All done! */
			break;

		if (ret != Z_OK) {
			g_warning("decompression of GGEP payload \"%s\" failed: %s",
				name, zlib_strerror(ret));
			failed = TRUE;
			break;
		}
	}

	/*
	 * Dispose of decompressor.
	 */

	ret = inflateEnd(inz);
	if (ret != Z_OK)
		g_warning("while freeing decompressor for GGEP payload \"%s\": %s",
			name, zlib_strerror(ret));

	wfree(inz, sizeof(*inz));

	/*
	 * @return NULL on error, fill `retlen' if OK.
	 */

	if (failed) {
		G_FREE_NULL(result);
		return NULL;
	}

	*retlen = inflated;

	g_assert(*retlen == inflated);	/* Make sure it was not truncated */

	return result;					/* OK, successfully inflated */
}

/**
 * Decode the GGEP payload pointed at by `e', allocating a new buffer capable
 * of holding the decoded data.
 *
 * This is performed only when the GGEP payload is either COBS-encoded or
 * deflated.
 */
static void
ext_ggep_decode(const extvec_t *e)
{
	const gchar *pbase;				/* Current payload base */
	size_t plen;					/* Curernt payload length */
	gchar *uncobs = NULL;			/* COBS-decoded buffer */
	size_t uncobs_len = 0;			/* Length of walloc()'ed buffer */
	size_t result;					/* Decoded length */
	extdesc_t *d;

	g_assert(e);
	g_assert(e->ext_type == EXT_GGEP);
	g_assert(e->opaque != NULL);

	d = e->opaque;

	g_assert(d->ext_ggep_cobs || d->ext_ggep_deflate);
	g_assert(d->ext_payload == NULL);

	pbase = d->ext_phys_payload;
	plen = d->ext_phys_paylen;

	if (plen == 0)
		goto out;

	/*
	 * COBS decoding must be performed before inflation, if any.
	 */

	if (d->ext_ggep_cobs) {
		uncobs = walloc(plen);		/* At worse slightly oversized */
		uncobs_len = plen;

		if (!d->ext_ggep_deflate) {
			if (!cobs_decode_into(pbase, plen, uncobs, plen, &result)) {
				if (GNET_PROPERTY(ggep_debug))
					g_warning("unable to decode COBS buffer");
				goto out;
			}

			g_assert(result <= plen);

			d->ext_payload = uncobs;
			d->ext_paylen = result;
			d->ext_rpaylen = plen;		/* Signals it was walloc()'ed */

			return;
		} else {
			if (!cobs_decode_into(pbase, plen, uncobs, plen, &result)) {
				if (GNET_PROPERTY(ggep_debug))
					g_warning("unable to decode COBS buffer");
				goto out;
			}

			g_assert(result <= plen);

			/*
			 * Replace current payload base/length with the COBS buffer.
			 */

			pbase = uncobs;
			plen = result;
		}

		if (plen == 0)		/* 0 bytes cannot be a valid deflated payload */
			goto out;

		/* FALL THROUGH */
	}

	/*
	 * Payload is deflated, inflate it.
	 */

	g_assert(d->ext_ggep_deflate);

	d->ext_rpaylen = 0;			/* Signals it was malloc()'ed */
	d->ext_payload =
		ext_ggep_inflate(pbase, plen, &d->ext_paylen, d->ext_ggep_id);

	/* FALL THROUGH */
out:
	if (uncobs != NULL)
		wfree(uncobs, uncobs_len);

	/*
	 * If something went wrong, setup a zero-length payload so that we
	 * don't go through this whole decoding again.
	 */

	if (d->ext_payload == NULL) {
		if (GNET_PROPERTY(dbg) || GNET_PROPERTY(ggep_debug))
			g_warning("unable to get GGEP \"%s\" %d-byte payload (%s)",
				d->ext_ggep_id, d->ext_phys_paylen,
				(d->ext_ggep_deflate && d->ext_ggep_cobs) ? "COBS + deflated" :
				d->ext_ggep_cobs ? "COBS" : "deflated");

		d->ext_paylen = 0;
		d->ext_payload = d->ext_phys_payload;
	}
}

/**
 * @returns a pointer to the extension's payload.
 */
gconstpointer
ext_payload(const extvec_t *e)
{
	extdesc_t *d = e->opaque;

	g_assert(e->opaque != NULL);

	if (NULL == d->ext_payload) {
		/*
		 * GGEP payload is COBS-ed and/or deflated.
		 */
		ext_ggep_decode(e);
	}
	return d->ext_payload;
}

/**
 * @returns a pointer to the extension's payload length.
 */
guint16
ext_paylen(const extvec_t *e)
{
	extdesc_t *d = e->opaque;

	g_assert(e->opaque != NULL);

	if (NULL == d->ext_payload) {
		/*
		 * GGEP payload is COBS-ed and/or deflated.
		 */
		ext_ggep_decode(e);
	}
	return d->ext_paylen;
}

/**
 * @returns a pointer to the extension's header.
 *
 * @warning the actual "virtual" payload may not be contiguous to the end
 * of the header: don't read past the ext_headlen() first bytes of the
 * header.
 */
const gchar *
ext_base(const extvec_t *e)
{
	extdesc_t *d = e->opaque;

	g_assert(e->opaque != NULL);

	return ext_phys_base(d);
}

/**
 * @returns the length of the extensions's header.
 */
guint16
ext_headlen(const extvec_t *e)
{
	extdesc_t *d = e->opaque;

	g_assert(e->opaque != NULL);

	return ext_phys_headlen(d);
}

/**
 * @returns the total length of the extension (payload + extension header).
 */
guint16
ext_len(const extvec_t *e)
{
	extdesc_t *d = e->opaque;
	gint headlen;

	g_assert(e->opaque != NULL);

	headlen = ext_phys_headlen(d);

	if (d->ext_payload != NULL)
		return headlen + d->ext_paylen;

	return headlen + ext_paylen(e);		/* Will decompress / COBS decode */
}

/**
 * @returns extension's GGEP ID, or "" if not a GGEP one.
 */
const gchar *
ext_ggep_id_str(const extvec_t *e)
{
	extdesc_t *d = e->opaque;

	g_assert(e->opaque != NULL);

	if (e->ext_type != EXT_GGEP)
		return "";

	return d->ext_ggep_id;
}

/**
 * @return TRUE if extension is printable.
 */
gboolean
ext_is_printable(const extvec_t *e)
{
	const guchar *p = ext_payload(e);
	size_t len;

	for (len = ext_paylen(e); len > 0; len--, p++) {
		if (!isprint(*p))
			return FALSE;
	}
	return TRUE;
}

/**
 * @return TRUE if extension is ASCII.
 */
gboolean
ext_is_ascii(const extvec_t *e)
{
	const guchar *p = ext_payload(e);
	size_t len;

	for (len = ext_paylen(e); len > 0; len--, p++) {
		if (!isascii(*p))
			return FALSE;
	}
	return TRUE;
}

/**
 * @return TRUE if extension is ASCII and contains at least a character.
 */
gboolean
ext_has_ascii_word(const extvec_t *e)
{
	const guchar *p = ext_payload(e);
	size_t len;
	gboolean has_alnum = FALSE;

	for (len = ext_paylen(e); len > 0; len--, p++) {
		if (!isascii(*p))
			return FALSE;
		has_alnum |= is_ascii_alnum(*p);
	}
	return has_alnum;
}

/**
 * Dump an extension to specified stdio stream.
 */
static void
ext_dump_one(FILE *f, const extvec_t *e, const gchar *prefix,
	const gchar *postfix, gboolean payload)
{
	guint16 paylen;

	g_assert(e->ext_type < EXT_TYPE_COUNT);
	g_assert(e->opaque != NULL);

	if (prefix)
		fputs(prefix, f);

	fputs(extype[e->ext_type], f);
	fprintf(f, " (token=%d) ", e->ext_token);

	if (e->ext_name)
		fprintf(f, "\"%s\" ", e->ext_name);

	paylen = ext_paylen(e);

	fprintf(f, "%d byte%s", paylen, paylen == 1 ? "" : "s");

	if (e->ext_type == EXT_GGEP) {
		extdesc_t *d = e->opaque;
		fprintf(f, " (ID=\"%s\", COBS: %s, deflate: %s)",
			d->ext_ggep_id,
			d->ext_ggep_cobs ? "yes" : "no",
			d->ext_ggep_deflate ? "yes" : "no");
	}

	if (postfix)
		fputs(postfix, f);

	if (payload && paylen > 0) {
		if (ext_is_printable(e)) {
			if (prefix)
				fputs(prefix, f);

			fputs("Payload: ", f);
			fwrite(ext_payload(e), paylen, 1, f);

			if (postfix)
				fputs(postfix, f);
		} else
			dump_hex(f, "Payload", ext_payload(e), paylen);
	}

	fflush(f);
}

/**
 * Dump all extensions in vector to specified stdio stream.
 *
 * The `prefix' and `postfix' strings, if non-NULL, are emitted before and
 * after the extension summary.
 *
 * If `payload' is true, the payload is dumped in hexadecimal if it contains
 * non-printable characters, as text otherwise.
 */
void
ext_dump(FILE *fd, const extvec_t *exv, gint exvcnt,
	const gchar *prefix, const gchar *postfix, gboolean payload)
{
	gint i;

	for (i = 0; i < exvcnt; i++)
		ext_dump_one(fd, &exv[i], prefix, postfix, payload);
}

/**
 * Prepare the vector for parsing, by ensuring the `opaque' pointers are
 * all set to NULL.
 */
void
ext_prepare(extvec_t *exv, gint exvcnt)
{
	gint i;

	for (i = 0; i < exvcnt; i++)
		exv[i].opaque = NULL;
}

/**
 * Reset an extension vector by disposing of the opaque structures
 * and of any allocated "virtual" payload.
 */
void
ext_reset(extvec_t *exv, gint exvcnt)
{
	gint i;
	
	for (i = 0; i < exvcnt; i++) {
		extvec_t *e = &exv[i];
		extdesc_t *d;

		if (e->opaque == NULL)		/* No more allocated extensions */
			break;

		d = e->opaque;

		if (d->ext_payload != NULL && d->ext_payload != d->ext_phys_payload) {
			gpointer p = deconstify_gpointer(d->ext_payload);
			if (d->ext_rpaylen == 0) {
				g_free(p);
			} else {
				wfree(p, d->ext_rpaylen);
			}
			d->ext_payload = NULL;
		}

		wfree(d, sizeof(*d));
		e->opaque = NULL;
	}
}

const gchar *
ext_ggep_name(ext_token_t id)
{
	size_t i;

	g_assert(id < EXT_T_TOKEN_COUNT);
	g_assert(id >= ggeptable[0].rw_token);

	i = id - ggeptable[0].rw_token;
	g_assert(i < G_N_ELEMENTS(ggeptable));
	g_assert(id == ggeptable[i].rw_token);

	return ggeptable[i].rw_name;
}

/***
 *** Init & Shutdown
 ***/

/**
 * Initialize the extension subsystem.
 */
void
ext_init(void)
{
	ext_names = g_hash_table_new(g_str_hash, g_str_equal);

	rw_is_sorted("ggeptable", ggeptable, G_N_ELEMENTS(ggeptable));
	rw_is_sorted("urntable", urntable, G_N_ELEMENTS(urntable));
}

/**
 * Free resources used by the extension subsystem.
 */
void
ext_close(void)
{
	g_hash_table_foreach_remove(ext_names, ext_names_kv_free, NULL);
	g_hash_table_destroy(ext_names);
}

/* vi: set ts=4 sw=4 cindent: */




See more files for this project here

Gtk-Gnutella

A GTK+ Gnutella client for Unix, efficient, reliable and fast, written in C. It has been optimized for speed and scalability, with low-memory consumption. It is meant to be left running 24x7, using little CPU and only the configured bandwidth.

Project homepage: http://sourceforge.net/projects/gtk-gnutella
Programming language(s): C
License: other

  Jmakefile
  Makefile.SH
  alive.c
  alive.h
  ban.c
  ban.h
  bh_download.c
  bh_download.h
  bh_upload.c
  bh_upload.h
  bitzi.c
  bitzi.h
  bogons.c
  bogons.h
  bsched.c
  bsched.h
  clock.c
  clock.h
  dh.c
  dh.h
  dime.c
  dime.h
  dmesh.c
  dmesh.h
  downloads.c
  downloads.h
  dq.c
  dq.h
  extensions.c
  extensions.h
  features.c
  features.h
  file_object.c
  file_object.h
  fileinfo.c
  fileinfo.h
  geo_ip.c
  geo_ip.h
  ggep.c
  ggep.h
  ggep_type.c
  ggep_type.h
  gmsg.c
  gmsg.h
  gnet_stats.c
  gnet_stats.h
  gnutella.h
  guid.c
  guid.h
  hcache.c
  hcache.h
  hostiles.c
  hostiles.h
  hosts.c
  hosts.h
  hsep.c
  hsep.h
  http.c
  http.h
  huge.c
  huge.h
  ignore.c
  ignore.h
  inet.c
  inet.h
  ioheader.c
  ioheader.h
  local_shell.c
  local_shell.h
  matching.c
  matching.h
  mime_types.h
  move.c
  move.h
  mq.c
  mq.h
  mq_tcp.c
  mq_tcp.h
  mq_udp.c
  mq_udp.h
  namesize.c
  namesize.h
  nodes.c
  nodes.h
  ntp.c
  ntp.h
  oob.c
  oob.h
  oob_proxy.c
  oob_proxy.h
  parq.c
  parq.h
  pcache.c
  pcache.h
  pmsg.c
  pmsg.h
  pproxy.c
  pproxy.h
  qhit.c
  qhit.h
  qrp.c
  qrp.h