Google Protocol Buffers - Decoding VarInt

AceInfinity

Emeritus, Contributor
Joined
Feb 21, 2012
Posts
1,728
Location
Canada
Here's a bit of C code that I wrote to demonstrate decoding a binary stream encoded as a varint based on the google protocol buffers documentation.

Code:
[plain]#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>

#define MAX_BUF 2048

#ifdef _MSC_VER
typedef __int8  int8;
typedef __int16 int16;
typedef __int32 int32;
typedef __int64 int64;
typedef unsigned __int8  uint8;
typedef unsigned __int16 uint16;
typedef unsigned __int32 uint32;
typedef unsigned __int64 uint64;
#else /* !_MSC_VER */
typedef int8_t  int8;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
typedef uint8_t  uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;
#endif

#define encode_sint32(n) (((n) << 1) ^ ((n) >> 31))
#define encode_sint64(n) (((n) << 1) ^ ((n) >> 63))
#define decode_sint(n) (((n) >> 1) ^ (-((n) & 1)))

#define decode_varint(T) decode_varint_f##T
#define def_decode_varint(T)                                       \
  void *decode_varint_f##T(const unsigned char *pbytes,             \
                          void *pbuf, int sint_flag)               \
  {                                                                \
    int e = 0;                                                     \
    T *varint = (T *)pbuf;                                         \
    *varint = 0;                                                   \
    for (;;)                                                       \
    {                                                              \
      *varint |= (*pbytes & 0x7F) << e;                            \
      if (!((*pbytes >> 7) & 1)) break;                            \
      e |= 7;                                                      \
      ++pbytes;                                                    \
    }                                                              \
    if (sint_flag)                                                 \
    {                                                              \
      *((T *)pbuf) = decode_sint(*((T *)pbuf));                    \
    }                                                              \
    return pbuf;                                                   \
  }

def_decode_varint(int32)
def_decode_varint(int64)

#define decode_varint_int32(pbytes, pbuf) decode_varint(int32)(pbytes, pbuf, 0)
#define decode_varint_int64(pbytes, pbuf) decode_varint(int64)(pbytes, pbuf, 0)
#define decode_varint_sint32(pbytes, pbuf) decode_varint(int32)(pbytes, pbuf, 1)
#define decode_varint_sint64(pbytes, pbuf) decode_varint(int64)(pbytes, pbuf, 1)

#define WIRE_TYPE_INVALID              -1
#define WIRE_TYPE_VARINT              0x0 /* int32, int64, uing32, uint64, sint32, sint64, bool, enum */
#define WIRE_TYPE_64BIT               0x1 /* fixed64, sfixed64, double */
#define WIRE_TYPE_LENGTH_DELIMITED    0x2 /* string, bytes, embedded messages, packed repeated fields */
#define WIRE_TYPE_START_GROUP         0x3 /* groups (deprecated) */
#define WIRE_TYPE_END_GROUP           0x4 /* groups (deprecated) */
#define WIRE_TYPE_32BIT               0x5 /* fixed32, sfixed32, floag */

#define GET_WIRE_TYPE(v) ((v) & 0x7)
#define GET_FIELD_NUM(v) ((v) >> 0x3)

int main(void)
{
  unsigned char bytes[] = { 0x08, 0x96, 0x01 },
                          *pbyte = bytes;
  int wire_type, field_number;
  int key = *pbyte++;

  unsigned char buf[MAX_BUF] = { 0 };
  printf("wire_type: %d\n", (wire_type = GET_WIRE_TYPE(key)));
  printf("field_number: %d\n", (field_number = GET_FIELD_NUM(key)));

  if (wire_type == WIRE_TYPE_INVALID)
  {
    fputs("ERROR: Invalid wire_type\n", stderr);
    exit(1);
  }

  /* testing */
  decode_varint_int32(pbyte, buf);
  printf("int32 varint: %d\n", *((int *)buf));
  decode_varint_int64(pbyte, buf);
  printf("int64 varint: %d\n", *((int *)buf));
  decode_varint_sint32(pbyte, buf);
  printf("sint32 varint: %d\n", *((int *)buf));
  decode_varint_sint64(pbyte, buf);
  printf("sint64 varint: %d\n", *((int *)buf));

  exit(0);
}[/plain]

There currently is no parsing for .proto file message formats, and I'm also missing the encoding/decoding of a few other wire types, and probably the last couple subtypes of the varint wire type. The problem here without parsing the .proto file is that from a binary stream there is no way to tell whether a varint was originally intended to be an int32/64 or an sint32/64. This is an issue in this case because you'll never know whether you have to decode the zigzag encoding or not, and you will end up with a completely different decoded value than what the original value was if you determine it to be the wrong type. This is mainly the case with similar datatypes, but between entirely different wire types, it is sometimes easy to determine the exact type based on the byte stream, as long as there is no ambiguity when it comes down to a possibility of more than a single data type that matches the byte stream structure. However, I only wrote this bit of code to demonstrate the process of decoding the byte stream, and specifically for varints: int32, int64, sint32, and sint64.

More information here: https://developers.google.com/protocol-buffers/docs/encoding
 

Has Sysnative Forums helped you? Please consider donating to help us support the site!

Back
Top