The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#include <assert.h>
#include <ctype.h>
#include <search.h>

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "ppport.h"

#include "const-c.inc"

// for base64 decoding
static unsigned char alphabet[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static char decoder[256];

#define COMPLEX_TYPES \
    _R(dict) \
    _R(array)

#define NUMERICAL_TYPES \
    _R(real) \
    _R(integer) \
    _R(true) \
    _R(false)

#define SIMPLE_TYPES \
    _R(data) \
    _R(date) \
    _R(string) \
    NUMERICAL_TYPES

#define ALL_TYPES \
    COMPLEX_TYPES \
    SIMPLE_TYPES

enum type {
#define _R(X) T_##X,
    ALL_TYPES
#undef _R
    T_max
};

enum hash_value {
#define _R(Hash,Key) HASH_FOR_##Key = Hash,

_R(  0, real    )
_R(  3, key     )
_R(  4, dict    )
_R(  5, true    )
_R(  9, date    )
_R( 10, integer )
_R( 14, data    )
_R( 15, string  )
_R( 19, false   )
_R( 20, array   )
_R( 25, plist   )

#undef _R
};

enum ctx {
    S_EMPTY,
    S_TOP,
    S_FREE,
    S_DICT  = HASH_FOR_dict,
    S_ARRAY = HASH_FOR_array,
    S_KEY,
    S_TEXT
};

static const char *context_names[] = {
#define _R(X) [S_##X] = #X
    _R(EMPTY),
    _R(TOP),
    _R(FREE),
    _R(DICT),
    _R(ARRAY),
    _R(KEY),
    _R(TEXT),
#undef _R
};

struct state {
    SV *parser;
    SV *accum;
    // TODO keep everything in the stack to reduce copying
    struct node {
        enum ctx context;
        SV *key;
        SV *val;
    } base;
    struct stack {
        struct node node;
        struct stack *next;
    } *stack;
};

/// root of search tree of states
void *statetree;

#define countof(X) (sizeof (X) / sizeof (X)[0])

/// TODO use our own ::XS namespace
#define PACKAGE_PREFIX "Mac::PropertyList::SAX::"
// long enough to cover any type name
#define TYPE_FILLER    "XXXXXXXX"

static inline unsigned int hash(register const char *str)
{
    static const unsigned char asso_values[] = {
         0, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 10, 26, 26,
         4,  5,  9, 26, 26,  5, 26,  3,  0, 26,
        26, 26, 15, 26,  0, 10,  0, 26, 26, 26,
        26, 26, 26, 26, 26, 26, 26, 26
    };

    return asso_values[(unsigned char)str[3]] + asso_values[(unsigned char)str[0]];
}

static int _find_parser(const void *a, const void *b)
{
    assert(a != NULL);
    assert(b != NULL);

    const struct state *x = a;
    const struct state *y = b;

    assert(x->parser != NULL);
    assert(y->parser != NULL);

    return SvRV(x->parser) - SvRV(y->parser);
}

static struct state* state_for_parser(SV *parser)
{
    struct state st = { .parser = parser };
    struct state **result = tfind(&st, &statetree, _find_parser);
    if (*result == NULL)
        croak("Failed to look up state object by parser argument");
    return *result;
}

// base64 decode
// from http://ftp.riken.jp/net/mail/vm/base64-decode.c
static int base64_decode(size_t ilen, const char in[ilen], size_t olen, char out[olen])
{
    int i = 0, j = 0, char_count = 0, bits = 0;

    for (i = 0; in[i]; i++) {
        unsigned char c = in[i];
        if (c == '=') break;
        if (isspace(c)) continue;
        // maybe handle bogus data better ?
        bits += decoder[c];
        char_count++;
        if (char_count == 4) {
            out[j++] = bits >> 16;
            out[j++] = (bits >> 8) & 0xff;
            out[j++] = bits & 0xff;
            bits = 0;
            char_count = 0;
        } else {
            bits <<= 6;
        }
    }

    if (in[i]) { /* c == '=' */
        switch (char_count) {
          case 1: return -1;
          case 2:
            out[j++] = bits >> 10;
            break;
          case 3:
            out[j++] = bits >> 16;
            out[j++] = (bits >> 8) & 0xff;
            break;
        }
    }

    return j;
}

#define _R(X) case HASH_FOR_##X:
#define _S(T) \
    static inline int is_##T##_type(const char *name) \
    { switch (hash(name)) T##_TYPES return 1; return 0; }

_S(ALL)
_S(COMPLEX)
_S(SIMPLE)

#undef _S
#undef _R

MODULE = Mac::PropertyList::XS		PACKAGE = Mac::PropertyList::XS		

PROTOTYPES: ENABLE

void
handle_start(SV *expat, SV *element, ...)
    CODE:
        struct state *st = state_for_parser(expat);
        const char *name = SvPVX(element);
        if (st->base.context == S_EMPTY && hash(name) == HASH_FOR_plist) {
            st->base.context = S_TOP;
        } else if (st->base.context == S_TOP || hash(name) == HASH_FOR_key || is_ALL_type(name)) {
            struct stack *old = st->stack;
            Newxz(st->stack, 1, struct stack);
            st->stack->node = st->base;
            st->stack->next = old;

            if (is_COMPLEX_type(name)) {
                char temp[] = PACKAGE_PREFIX TYPE_FILLER;
                strcpy(&temp[sizeof PACKAGE_PREFIX - 1], name);

                PUSHMARK(SP);
                XPUSHs(sv_2mortal(newSVpv(temp, 0)));
                PUTBACK;
                int count = call_method("new", G_SCALAR);
                SPAGAIN;
                if (count != 1) croak("Failed new() call");

                st->base.val = POPs;
                SvREFCNT_inc(st->base.val);
                // enum values are selected to overlap so no lookup is needed
                st->base.context = hash(name);
                st->base.key = NULL;
            } else if (is_SIMPLE_type(name)) {
                st->base.context = S_TEXT;
            } else if (hash(name) == HASH_FOR_key) {
                if (st->base.context == S_DICT) {
                    st->base.context = S_KEY;
                } else {
                    croak("<key/> in improper context '%s'", context_names[st->base.context]);
                }
            } else {
                croak("Top-level element '%s' in plist is not recognized", name);
            }
        } else {
            croak("Received invalid start element '%s'", name);
        }

void
handle_end(SV *expat, SV *element)
    CODE:
        struct state *st = state_for_parser(expat);
        const char *name = SvPVX(element);
        if (hash(name) != HASH_FOR_plist) { // discard plist element
            struct node *elt = &st->stack->node;
            struct stack *temp = st->stack;
            st->stack = st->stack->next;

            SV *val = st->base.val;
            st->base = *elt;

            if (is_SIMPLE_type(name)) {
                char pv[] = PACKAGE_PREFIX TYPE_FILLER;
                strcpy(&pv[sizeof PACKAGE_PREFIX - 1], name);

                PUSHMARK(SP);
                XPUSHs(sv_2mortal(newSVpv(pv, 0)));
                if (st->accum) {
                    if (hash(name) == HASH_FOR_data) {
                        size_t len;
                        const char *what = SvPV(st->accum, len);
                        char out[len];
                        int total = base64_decode(len, what, len, out);

                        XPUSHs(sv_2mortal(newSVpvn(out, total)));
                    } else {
                        XPUSHs(sv_2mortal(st->accum));
                    }
                } else {
                    XPUSHs(sv_2mortal(newSVpv("", 0)));
                }

                PUTBACK;
                int count = call_method("new", G_SCALAR);
                SPAGAIN;
                if (count != 1) croak("Failed new() call");

                val = POPs;
                SvREFCNT_inc(val);

                st->accum = NULL;
            } else if (hash(name) == HASH_FOR_key) {
                SvREFCNT_dec(st->base.key);
                st->base.key = st->accum;
                st->accum = NULL;
                Safefree(temp);
                return;
            }

            switch (st->base.context) {
                case S_DICT:
                    hv_store_ent((HV*)SvRV(st->base.val), st->base.key, val, 0);
                    break;
                case S_ARRAY:
                    av_push((AV*)SvRV(st->base.val), val);
                    break;
                case S_TOP:
                    st->base.val = val;
                    break;
                default:
                    croak("Bad context '%s'", context_names[st->base.context]);
            }

            Safefree(temp);
        }

void
handle_char(SV *expat, SV *string)
    CODE:
        struct state *st = state_for_parser(expat);
        if (st->base.context == S_TEXT || st->base.context == S_KEY) {
            if (!st->accum)
                st->accum = newSVpvn("", 0);
            sv_catsv(st->accum, string);
        }

void
handle_init(SV *expat)
    CODE:
        struct state *st;
        Newxz(st, 1, struct state);
        st->parser = expat;
        // this is fragile : what if the expat object moves ? can it ?
        tsearch(st, &statetree, _find_parser);

        if (!decoder[0]) {
            int i;
            for (i = (sizeof alphabet) - 1; i >= 0 ; i--)
                decoder[alphabet[i]] = i;
        }


SV *
handle_final(SV *expat)
    CODE:
        struct state *st = state_for_parser(expat);
        tdelete(st, &statetree, _find_parser);
        RETVAL = st->base.val;
        while (st->stack) {
            struct stack *temp = st->stack;
            st->stack = st->stack->next;
            Safefree(temp);
        }
        Safefree(st);
    OUTPUT:
        RETVAL

INCLUDE: const-xs.inc