#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <ctype.h>
/* ****************************************************************************
* CHARACTER CLASS METHODS
* ****************************************************************************
*/
int charIsSpace(char ch) {
if (ch == ' ') return 1;
if (ch == '\t') return 1;
return 0;
}
int charIsEndspace(char ch) {
if (ch == '\n') return 1;
if (ch == '\r') return 1;
if (ch == '\f') return 1;
return 0;
}
int charIsWhitespace(char ch) {
return charIsSpace(ch) || charIsEndspace(ch);
}
int charIsIdentifier(char ch) {
if ((ch >= 'a') && (ch <= 'z')) return 1;
if ((ch >= 'A') && (ch <= 'Z')) return 1;
if ((ch >= '0') && (ch <= '9')) return 1;
if (ch == '-') return 1;
if (ch == '_') return 1;
if (ch == '.') return 1;
if (ch == '#') return 1;
if (ch == '@') return 1;
return 0;
}
int charIsInfix(char ch) {
/* WS before+after these characters can be removed */
if (ch == '{') return 1;
if (ch == '}') return 1;
if (ch == ';') return 1;
if (ch == ':') return 1;
if (ch == ',') return 1;
if (ch == '+') return 1;
return 0;
}
int charIsPrefix(char ch) {
/* WS after these characters can be removed */
if (ch == '(') return 1; /* requires leading WS when used in @media */
return charIsInfix(ch);
}
int charIsPostfix(char ch) {
/* WS before these characters can be removed */
if (ch == ')') return 1; /* requires trailing WS for MSIE */
return charIsInfix(ch);
}
/* ****************************************************************************
* TYPE DEFINITIONS
* ****************************************************************************
*/
typedef enum {
NODE_EMPTY,
NODE_WHITESPACE,
NODE_BLOCKCOMMENT,
NODE_IDENTIFIER,
NODE_LITERAL,
NODE_SIGIL
} NodeType;
struct _Node;
typedef struct _Node Node;
struct _Node {
/* linked list pointers */
Node* prev;
Node* next;
/* node internals */
char* contents;
size_t length;
NodeType type;
int can_prune;
};
typedef struct {
/* linked list pointers */
Node* head;
Node* tail;
/* doc internals */
const char* buffer;
size_t length;
size_t offset;
} CssDoc;
/* ****************************************************************************
* NODE CHECKING MACROS/FUNCTIONS
* ****************************************************************************
*/
/* checks to see if the node is the given string, case INSENSITIVELY */
int nodeEquals(Node* node, const char* string) {
return (strcasecmp(node->contents, string) == 0);
}
/* checks to see if the node contains the given string, case INSENSITIVELY */
int nodeContains(Node* node, const char* string) {
const char* haystack = node->contents;
size_t len = strlen(string);
char ul_start[2] = { tolower(*string), toupper(*string) };
/* if node is shorter we know we're not going to have a match */
if (len > node->length)
return 0;
/* find the needle in the haystack */
while (haystack && *haystack) {
/* find first char of needle */
haystack = strpbrk( haystack, ul_start );
if (haystack == NULL)
return 0;
/* check if the rest matches */
if (strncasecmp(haystack, string, len) == 0)
return 1;
/* nope, move onto next character in the haystack */
haystack ++;
}
/* no match */
return 0;
}
/* checks to see if the node begins with the given string, case INSENSITIVELY.
*/
int nodeBeginsWith(Node* node, const char* string) {
size_t len = strlen(string);
if (len > node->length)
return 0;
return (strncasecmp(node->contents, string, len) == 0);
}
/* checks to see if the node ends with the given string, case INSENSITVELY. */
int nodeEndsWith(Node* node, const char* string) {
size_t len = strlen(string);
size_t off = node->length - len;
if (len > node->length)
return 0;
return (strncasecmp(node->contents+off, string, len) == 0);
}
/* macros to help see what kind of node we've got */
#define nodeIsWHITESPACE(node) ((node->type == NODE_WHITESPACE))
#define nodeIsBLOCKCOMMENT(node) ((node->type == NODE_BLOCKCOMMENT))
#define nodeIsIDENTIFIER(node) ((node->type == NODE_IDENTIFIER))
#define nodeIsLITERAL(node) ((node->type == NODE_LITERAL))
#define nodeIsSIGIL(node) ((node->type == NODE_SIGIL))
#define nodeIsEMPTY(node) ((node->type == NODE_EMPTY) || ((node->length==0) || (node->contents==NULL)))
#define nodeIsMACIECOMMENTHACK(node) (nodeIsBLOCKCOMMENT(node) && nodeEndsWith(node,"\\*/"))
#define nodeIsPREFIXSIGIL(node) (nodeIsSIGIL(node) && charIsPrefix(node->contents[0]))
#define nodeIsPOSTFIXSIGIL(node) (nodeIsSIGIL(node) && charIsPostfix(node->contents[0]))
#define nodeIsCHAR(node,ch) ((node->contents[0]==ch) && (node->length==1))
/* ****************************************************************************
* NODE MANIPULATION FUNCTIONS
* ****************************************************************************
*/
/* allocates a new node */
Node* CssAllocNode() {
Node* node;
Newxz(node, 1, Node);
node->prev = NULL;
node->next = NULL;
node->contents = NULL;
node->length = 0;
node->type = NODE_EMPTY;
node->can_prune = 1;
return node;
}
/* frees the memory used by a node */
void CssFreeNode(Node* node) {
if (node->contents)
Safefree(node->contents);
Safefree(node);
}
void CssFreeNodeList(Node* head) {
while (head) {
Node* tmp = head->next;
CssFreeNode(head);
head = tmp;
}
}
/* clears the contents of a node */
void CssClearNodeContents(Node* node) {
if (node->contents)
Safefree(node->contents);
node->contents = NULL;
node->length = 0;
}
/* sets the contents of a node */
void CssSetNodeContents(Node* node, const char* string, size_t len) {
CssClearNodeContents(node);
node->length = len;
/* allocate string, fill with NULLs, and copy */
Newxz(node->contents, (len+1), char);
strncpy( node->contents, string, len );
}
/* removes the node from the list and discards it entirely */
void CssDiscardNode(Node* node) {
if (node->prev)
node->prev->next = node->next;
if (node->next)
node->next->prev = node->prev;
CssFreeNode(node);
}
/* appends the node to the given element */
void CssAppendNode(Node* element, Node* node) {
if (element->next)
element->next->prev = node;
node->next = element->next;
node->prev = element;
element->next = node;
}
/* collapses a node to a single whitespace character. If the node contains any
* endspace characters, that is what we're collapsed to.
*/
void CssCollapseNodeToWhitespace(Node* node) {
if (node->contents) {
char ws = node->contents[0];
size_t idx;
for (idx=0; idx<node->length; idx++) {
if (charIsEndspace(node->contents[idx])) {
ws = node->contents[idx];
break;
}
}
CssSetNodeContents(node, &ws, 1);
}
}
/* ****************************************************************************
* TOKENIZING FUNCTIONS
* ****************************************************************************
*/
/* extracts a quoted literal string */
void _CssExtractLiteral(CssDoc* doc, Node* node) {
const char* buf = doc->buffer;
size_t offset = doc->offset;
char delimiter = buf[offset];
/* skip start of literal */
offset ++;
/* search for end of literal */
while (offset < doc->length) {
if (buf[offset] == '\\') {
/* escaped character; skip */
offset ++;
}
else if (buf[offset] == delimiter) {
const char* start = buf + doc->offset;
size_t length = offset - doc->offset + 1;
CssSetNodeContents(node, start, length);
node->type = NODE_LITERAL;
return;
}
/* move onto next character */
offset ++;
}
croak( "unterminated quoted string literal" );
}
/* extracts a block comment */
void _CssExtractBlockComment(CssDoc* doc, Node* node) {
const char* buf = doc->buffer;
size_t offset = doc->offset;
/* skip start of comment */
offset ++; /* skip "/" */
offset ++; /* skip "*" */
/* search for end of comment block */
while (offset < doc->length) {
if (buf[offset] == '*') {
if (buf[offset+1] == '/') {
const char* start = buf + doc->offset;
size_t length = offset - doc->offset + 2;
CssSetNodeContents(node, start, length);
node->type = NODE_BLOCKCOMMENT;
return;
}
}
/* move onto next character */
offset ++;
}
croak( "unterminated block comment" );
}
/* extracts a run of whitespace characters */
void _CssExtractWhitespace(CssDoc* doc, Node* node) {
const char* buf = doc->buffer;
size_t offset = doc->offset;
while ((offset < doc->length) && charIsWhitespace(buf[offset]))
offset ++;
CssSetNodeContents(node, doc->buffer+doc->offset, offset-doc->offset);
node->type = NODE_WHITESPACE;
}
/* extracts an identifier */
void _CssExtractIdentifier(CssDoc* doc, Node* node) {
const char* buf = doc->buffer;
size_t offset = doc->offset;
while ((offset < doc->length) && charIsIdentifier(buf[offset]))
offset++;
CssSetNodeContents(node, doc->buffer+doc->offset, offset-doc->offset);
node->type = NODE_IDENTIFIER;
}
/* extracts a -single- symbol/sigil */
void _CssExtractSigil(CssDoc* doc, Node* node) {
CssSetNodeContents(node, doc->buffer+doc->offset, 1);
node->type = NODE_SIGIL;
}
/* tokenizes the given string and returns the list of nodes */
Node* CssTokenizeString(const char* string) {
CssDoc doc;
/* initialize our CSS document object */
doc.head = NULL;
doc.tail = NULL;
doc.buffer = string;
doc.length = strlen(string);
doc.offset = 0;
/* parse the CSS */
while ((doc.offset < doc.length) && (doc.buffer[doc.offset])) {
/* allocate a new node */
Node* node = CssAllocNode();
if (!doc.head)
doc.head = node;
if (!doc.tail)
doc.tail = node;
/* parse the next node out of the CSS */
if ((doc.buffer[doc.offset] == '/') && (doc.buffer[doc.offset+1] == '*'))
_CssExtractBlockComment(&doc, node);
else if ((doc.buffer[doc.offset] == '"') || (doc.buffer[doc.offset] == '\''))
_CssExtractLiteral(&doc, node);
else if (charIsWhitespace(doc.buffer[doc.offset]))
_CssExtractWhitespace(&doc, node);
else if (charIsIdentifier(doc.buffer[doc.offset]))
_CssExtractIdentifier(&doc, node);
else
_CssExtractSigil(&doc, node);
/* move ahead to the end of the parsed node */
doc.offset += node->length;
/* add the node to our list of nodes */
if (node != doc.tail)
CssAppendNode(doc.tail, node);
doc.tail = node;
}
/* return the node list */
return doc.head;
}
/* ****************************************************************************
* MINIFICATION FUNCTIONS
* ****************************************************************************
*/
/* collapses all of the nodes to their shortest possible representation */
void CssCollapseNodes(Node* curr) {
int inMacIeCommentHack = 0;
while (curr) {
Node* next = curr->next;
switch (curr->type) {
case NODE_WHITESPACE:
CssCollapseNodeToWhitespace(curr);
break;
case NODE_BLOCKCOMMENT: {
if (!inMacIeCommentHack && nodeIsMACIECOMMENTHACK(curr)) {
/* START of mac/ie hack */
CssSetNodeContents(curr, "/*\\*/", 5);
curr->can_prune = 0;
inMacIeCommentHack = 1;
}
else if (inMacIeCommentHack && !nodeIsMACIECOMMENTHACK(curr)) {
/* END of mac/ie hack */
CssSetNodeContents(curr, "/**/", 4);
curr->can_prune = 0;
inMacIeCommentHack = 0;
}
} break;
default:
break;
}
curr = next;
}
}
/* checks to see whether we can prune the given node from the list.
*
* THIS is the function that controls the bulk of the minification process.
*/
enum {
PRUNE_NO,
PRUNE_PREVIOUS,
PRUNE_CURRENT,
PRUNE_NEXT
};
int CssCanPrune(Node* node) {
Node* prev = node->prev;
Node* next = node->next;
/* only if node is prunable */
if (!node->can_prune)
return PRUNE_NO;
switch (node->type) {
case NODE_EMPTY:
/* prune empty nodes */
return PRUNE_CURRENT;
case NODE_WHITESPACE:
/* remove whitespace before comment blocks */
if (next && nodeIsBLOCKCOMMENT(next))
return PRUNE_CURRENT;
/* remove whitespace after comment blocks */
if (prev && nodeIsBLOCKCOMMENT(prev))
return PRUNE_CURRENT;
/* leading whitespace gets pruned */
if (!prev)
return PRUNE_CURRENT;
/* trailing whitespace gets pruned */
if (!next)
return PRUNE_CURRENT;
/* keep all other whitespace */
return PRUNE_NO;
case NODE_BLOCKCOMMENT:
/* keep comments that contain the word "copyright" */
if (nodeContains(node,"copyright"))
return PRUNE_NO;
/* remove comment blocks */
return PRUNE_CURRENT;
case NODE_IDENTIFIER:
/* keep all identifiers */
return PRUNE_NO;
case NODE_LITERAL:
/* keep all literals */
return PRUNE_NO;
case NODE_SIGIL:
/* remove whitespace after "prefix" sigils */
if (nodeIsPREFIXSIGIL(node) && next && nodeIsWHITESPACE(next))
return PRUNE_NEXT;
/* remove whitespace before "postfix" sigils */
if (nodeIsPOSTFIXSIGIL(node) && prev && nodeIsWHITESPACE(prev))
return PRUNE_PREVIOUS;
/* remove ";" characters at end of selector groups */
if (nodeIsCHAR(node,';') && next && nodeIsSIGIL(next) && nodeIsCHAR(next,'}'))
return PRUNE_CURRENT;
/* keep all other sigils */
return PRUNE_NO;
}
/* keep anything else */
return PRUNE_NO;
}
/* prune nodes from the list */
Node* CssPruneNodes(Node *head) {
Node* curr = head;
while (curr) {
/* see if/how we can prune this node */
int prune = CssCanPrune(curr);
/* prune. each block is responsible for moving onto the next node */
Node* prev = curr->prev;
Node* next = curr->next;
switch (prune) {
case PRUNE_PREVIOUS:
/* discard previous node */
CssDiscardNode(prev);
/* reset "head" if that's what got pruned */
if (prev == head)
head = curr;
break;
case PRUNE_CURRENT:
/* discard current node */
CssDiscardNode(curr);
/* reset "head" if that's what got pruned */
if (curr == head)
head = prev ? prev : next;
/* backup and try again if possible */
curr = prev ? prev : next;
break;
case PRUNE_NEXT:
/* discard next node */
CssDiscardNode(next);
/* stay on current node, and try again */
break;
default:
/* move ahead to next node */
curr = next;
break;
}
}
/* return the (possibly new) head node back to the caller */
return head;
}
/* ****************************************************************************
* Minifies the given CSS, returning a newly allocated string back to the
* caller (YOU'RE responsible for freeing its memory).
* ****************************************************************************
*/
char* CssMinify(const char* string) {
char* results;
/* PASS 1: tokenize CSS into a list of nodes */
Node* head = CssTokenizeString(string);
if (!head) return NULL;
/* PASS 2: collapse nodes */
CssCollapseNodes(head);
/* PASS 3: prune nodes */
head = CssPruneNodes(head);
if (!head) return NULL;
/* PASS 4: re-assemble CSS into single string */
{
Node* curr;
char* ptr;
/* allocate the result buffer to the same size as the original CSS; in
* a worst case scenario that's how much memory we'll need for it.
*/
Newxz(results, (strlen(string)+1), char);
ptr = results;
/* copy node contents into result buffer */
curr = head;
while (curr) {
memcpy(ptr, curr->contents, curr->length);
ptr += curr->length;
curr = curr->next;
}
*ptr = 0;
}
/* free memory used by node list */
CssFreeNodeList(head);
/* return resulting minified CSS back to caller */
return results;
}
MODULE = CSS::Minifier::XS PACKAGE = CSS::Minifier::XS
PROTOTYPES: disable
SV*
minify(string)
SV* string
INIT:
char* buffer = NULL;
RETVAL = &PL_sv_undef;
CODE:
/* minify the CSS */
buffer = CssMinify( SvPVX(string) );
/* hand back the minified CSS (if we had any) */
if (buffer != NULL) {
RETVAL = newSVpv(buffer, 0);
Safefree( buffer );
}
OUTPUT:
RETVAL