package Parse::Flash::Cookie; # $Id: Cookie.pm 154 2008-01-29 20:05:06Z aff $ use strict; use warnings; our $VERSION = '0.08'; use Log::Log4perl; use XML::Writer; # to create XML output use URI::Escape; # to safely display buffer in debug mode use DateTime; use Config; # to determine endianness use constant LENGTH_OF_SHORT => 2; use constant LENGTH_OF_INTEGER => 2; use constant LENGTH_OF_LONG => 4; use constant LENGTH_OF_FLOAT => 8; use constant END_OF_OBJECT => "\x00\x00\x09"; # The below constants are little-endian. Adobe flash cookies are # little-endian even on big-endian platforms. use constant POSITIVE_INFINITY => "\x7F\xF0\x00\x00\x00\x00\x00\x00"; use constant NEGATIVE_INFINITY => "\xFF\xF0\x00\x00\x00\x00\x00\x00"; use constant NOT_A_NUMBER => "\x7F\xF8\x00\x00\x00\x00\x00\x00"; my $conf = q( log4perl.category.sol.parser = WARN, ScreenAppender log4perl.appender.ScreenAppender = Log::Log4perl::Appender::Screen log4perl.appender.ScreenAppender.stderr = 0 log4perl.appender.ScreenAppender.layout = PatternLayout log4perl.appender.ScreenAppender.layout.ConversionPattern=[%p] %m%n ); Log::Log4perl::init( \$conf ); my $log = Log::Log4perl::->get_logger(q(sol.parser)); my $file = undef; my $FH = undef; my $writer = undef; my %datatype = ( 0x0 => 'number', 0x1 => 'boolean', 0x2 => 'string', 0x3 => 'object', 0x5 => 'null', 0x6 => 'undefined', 0x7 => 'pointer', 0x8 => 'array', 0xa => 'raw-array', 0xb => 'date', 0xd => 'object-string-number-boolean-textformat', 0xf => 'object-xml', 0x10 => 'object-customclass', ); # Return true if architecture is little-endian, otherwise false sub _is_little_endian { return ( $Config{byteorder} =~ qr/^1234/ ) ? 1 : 0; } # Add an XML element to current document. Do nothing if $writer is # undef. Return true. sub _addXMLElem { # Skip if not XML mode return unless $writer; my ($type, $name, $value) = @_; $writer->startTag( 'data', 'type' => $type, 'name' => $name, ); $writer->characters($value) if (defined($value)); $writer->endTag(); return 1; } # Parse and return type and value as list. Expects to be called in # list context. Argument name is passed in order to have the # individual subs create XML elements themselves. sub _getTypeAndValue { my $name = shift; $log->logdie("expected to be called in LIST context") if !wantarray(); # Read data type my $value = undef; my $type = _readBytes(1); my $type_as_txt = $datatype{$type}; if (!exists($datatype{$type})) { $log->warn(qq{Missing datatype for '$type'!}) if $log->is_warn(); } # Read element depending on type if($type == 0) { $log->debug(q{float}) if $log->is_debug(); $value = _getFloat($name); } elsif($type == 1){ $log->debug(q{bool}) if $log->is_debug(); $value = _getBool($name); } elsif ($type == 2) { $log->debug(q{string}) if $log->is_debug(); $value = _getString($name); } elsif($type == 3){ $log->debug(q{object}) if $log->is_debug(); $value = _getObject($name); } elsif($type == 5) { # null $log->debug(q{null}) if $log->is_debug(); $value = undef; _addXMLElem('null', $name); } elsif($type == 6) { # undef $log->debug(q{undef}) if $log->is_debug(); $value = undef; _addXMLElem('undef', $name); } elsif($type == 7){ # pointer $log->debug(q{pointer}) if $log->is_debug(); $value = _getPointer($name); } elsif($type == 8){ # array $log->debug(q{array}) if $log->is_debug(); $value = _getArray($name); } elsif($type == 0xb){ # date $value = _getDate($name); } elsif($type == 0xf){ # doublestring $log->logdie("Not implemented yet: doublestring"); } elsif($type == 0x10){ # customclass $log->debug(q{customclass}) if $log->is_debug(); $value = _getObject($name, 1); } else { $log->logdie("Unknown type:$type" ); } return ($type_as_txt, $value); } # Parse object and return contents as comma separated string. sub _getObject { my $name = shift; my $customClass = shift; my @retvals = (); $writer->startTag( 'data', 'type' => 'object', 'name' => $name, ) if $writer; LOOP: while (eof($FH) != 1) { # Read until end flag is detected : 00 00 09 if (_readRaw(3) eq END_OF_OBJECT) { #return join(q{,}, @retvals); last LOOP; } # "un-read" the 3 bytes seek($FH, -3, 1) or $log->logdie("seek failed"); # Read name $name = _readString(); $log->debug(qq{name:$name}) if $log->is_debug(); # Read 2nd name if customClass is set if ($customClass) { push @retvals, q{class_name=} . $name . q{;}; $name = _readString(); $log->debug(qq{name:$name (2nd name - customClass)}) if $log->is_debug(); $customClass = 0; } # Get data type and value my ($type, $value) = _getTypeAndValue($name); { no warnings q{uninitialized}; # allow undefined values $log->debug(qq{type:$type value:$value}) if $log->is_debug(); push @retvals, $name . q{;} . $value; } } $writer->endTag() if $writer; return join(q{,}, @retvals); } # Parse array and return contents as comma separated string. sub _getArray { my $name = shift; my @retvals = (); my $length = _readLong(); if($length == 0) { return _getObject(); } $writer->startTag( 'data', 'type' => 'array', 'length' => $length, 'name' => $name, ) if $writer; ELEMENT: while ($length-- > 0) { $name = _readString(); if (!defined($name)) { last ELEMENT; } my $retval = undef; my ($type, $value) = _getTypeAndValue($name); { no warnings q{uninitialized}; # allow undef values $log->debug(qq{$name;$type;$value}) if $log->is_debug(); $retval = qq{$name;$type;$value}; } push @retvals, $retval; } $writer->endTag() if $writer; # Now expect END_OF_OBJECT tag to be next if (_readRaw(3) eq END_OF_OBJECT) { return join(q{,}, @retvals); } $log->error(q{Did not find expected END_OF_OBJECT! at end of array!}) if $log->is_error(); return; } ################################# # Utility functions - does not generate XML output # Parse and return a given number of bytes (unformatted) sub _readRaw { my $len = shift; $log->logdie("missing length argument") unless $len; my $buffer = undef; my $num = read($FH, $buffer, $len); return $buffer; } # Parse and return a string: The first 2 bytes contains the string # length, succeeded by the string itself. Read length first unless # length is given, otherwise read the given number of bytes. sub _readString { my $len = shift; my $buffer = undef; my $num = undef; $log->debug(qq{len not given as arg}) if $log->is_debug() && !$len; # read length from filehandle unless set $len = join(q{}, _readShort(2)) unless ($len); # return undef if length is zero return unless $len; $log->debug(qq{len:$len}) if $log->is_debug(); $num = read($FH, $buffer, $len); if ($log->is_debug()) { $log->debug(qq{buffer:} . uri_escape($buffer)); } return $buffer; } # Parse and return a given number of bytes sub _readBytes { my $len = shift || 1; my $buffer = undef; my $num = read($FH, $buffer, $len); return unpack 'C*', $buffer; # An unsigned char (octet) value. } # Parse and return signed short (integer) number, default 2 bytes sub _readSignedShort { my $len = shift || LENGTH_OF_SHORT; my $buffer = undef; my $num = read($FH, $buffer, $len); (_is_little_endian()) ? return unpack 's*', reverse $buffer : return unpack 's*', $buffer; } # Parse and return short (integer) number, default 2 bytes sub _readShort { my $len = shift || LENGTH_OF_SHORT; my $buffer = undef; my $num = read($FH, $buffer, $len); (_is_little_endian()) ? return unpack 'S*', reverse $buffer : return unpack 'S*', $buffer; } # Parse and return integer number, default 2 bytes sub _readInt { my $len = shift || LENGTH_OF_INTEGER; my $buffer = undef; my $num = read($FH, $buffer, $len); return unpack 'C*', reverse $buffer; } # Parse and return long integer number, default 4 bytes sub _readLong { my $len = shift || LENGTH_OF_LONG; my $buffer = undef; my $num = read($FH, $buffer, $len); return unpack 'C*', reverse $buffer; } # Parse and return floating point number: default 8 bytes sub _readFloat { my $len = shift || LENGTH_OF_FLOAT; my $buffer = undef; my $num = read($FH, $buffer, $len); # Check special numbers - do not rely on OS/compiler to tell the # truth. if ($buffer eq POSITIVE_INFINITY) { return q{inf}; } elsif ($buffer eq NEGATIVE_INFINITY) { return q{-inf}; } elsif ($buffer eq NOT_A_NUMBER) { return q{nan}; } (_is_little_endian()) ? return unpack 'd*', reverse $buffer : return unpack 'd*', $buffer; } ################################# ### Functions that gets data and creates XML output # Get next boolean element. Return 1 if the element's value is # non-zero, otherwise 0. Add XML node if in XML mode. sub _getBool { my $name = shift; my $value = _readBytes(1); if ($value !~ qr/^[01]$/) { my $orgval = $value; $value = ($value) ? 1 : 0; $log->warn(qq{Unexpected boolean value '$orgval' was converted to $value}) if $log->is_warn(); } _addXMLElem('boolean', $name, $value); return $value; } # Get next string element. Return the element's value. Add XML node # if in XML mode sub _getString { my $name = shift; my $value = _readString(); _addXMLElem('string', $name, $value); return $value; } # Return floating point number - create XML sub _getFloat { my $name = shift; my $value = _readFloat(); _addXMLElem('number', $name, $value); # Yes it's called number, not float return $value; } # Return a date object - create XML sub _getDate { my $name = shift; # Date consists of a float (8 bytes) value followed by a signed short (2 # bytes) UTC offset my $msec = _readFloat(); my $utcoffset = - _readSignedShort(2) / 60; $log->debug(qq{msec:$msec utcoffset:$utcoffset}) if $log->is_debug(); # Create datetime object starting on Jan 1st 1970 and add msec to # get the given date my $dt = DateTime->from_epoch( epoch => 0 )->add( seconds => $msec / 1000 ); $writer->comment("DateObject:Milliseconds Count From Jan. 1, 1970; Timezone UTC + Offset.") if $writer; $writer->startTag( 'data', 'type' => 'date', 'name' => $name, 'msec' => $msec, 'date' => $dt->ymd() . q{ } . $dt->hms(), 'utcoffset' => $utcoffset, ) if $writer; $writer->endTag() if $writer; my $retval = undef; { no warnings q{uninitialized}; # allow undef values $log->debug(qq{date;$msec;$utcoffset}) if $log->is_debug(); $retval = qq{date;$msec;$utcoffset}; } return $retval; } # Return a pointer. The value read indicates the element index of the # element pointed to. sub _getPointer { my $name = shift; my $value =_readShort(); $log->debug(qq{name:$name value:$value}) if $log->is_debug(); _addXMLElem('pointer', $name, $value); # Yes it's called number, not float return $value; } ################################################################## # Parse and return file header - 16 bytes in total. Return name if # file starts with sol header, otherwise undef. Failure means the # 'TCSO' tag is missing. sub _getHeader { # skip first 6 bytes $log->debug(q{header: skip first 6 bytes}) if $log->is_debug(); _readString(6); # next 4 bytes should contain 'TSCO' tag if (_readString(4) ne q{TCSO}) { $log->error("missing TCSO - not a sol file") if $log->is_error(); return; # failure } # Skip next 7 bytes $log->debug(q{header: skip next 7 bytes}) if $log->is_debug(); _readString(7); # Read next byte (length of name) + the name my $name = _readString(_readInt(1)); $log->debug("name:$name") if $log->is_debug(); # Skip next 4 bytes $log->debug(q{header: skip next 4 bytes}) if $log->is_debug(); _readString(4); return $name; # ok } # Parse and return an element. In scalar context, return element # content as semi colon separated string, in list context return # element's name, type and value as a list. sub _getElem { my $retval = undef; # Read element length and name my $name = _readString(_readInt(2)); #$log->debug(qq{element name:$name}) if $log->is_debug(); # Read data type and value my ($type, $value) = _getTypeAndValue($name); # Read trailer (single byte) my $trailer = _readBytes(1); if ($trailer != 0) { $log->warn(qq{Expected 00 trailer, got '$trailer'}) if $log->is_warn(); } { no warnings q{uninitialized}; # allow undef values $log->info(qq{$name;$type;$value}) if $log->is_info(); # Context sensitive return if (wantarray()) { return ($name, $type, $value); } else { return qq{$name;$type;$value}; } } } # parse file and return contents as a textual list sub to_text { my $file = shift; $log->logdie( q{Missing argument file.}) if (!$file); $log->logdie(qq{No such file '$file'}) if (! -f $file); $log->debug("start") if $log->is_debug(); open($FH,"< $file") || $log->logdie("Error opening file $file"); $log->debug(qq{file:$file}) if $log->is_debug(); binmode($FH); my @retvals = (); # Read header my $name = _getHeader() or $log->logdie("Invalid sol header"); push @retvals, $name; # Read data elements while (eof($FH) != 1) { $log->debug(q{read element}) if $log->is_debug(); my $string = _getElem(); push @retvals, $string; } close($FH) or $log->logdie(q{failed to close filehandle!}); return @retvals; } # Parse file and return contents as a scalar containing XML # representing the file's content sub to_xml { my $file = shift; $log->logdie( q{Missing argument file.}) if (!$file); $log->logdie(qq{No such file '$file'}) if (! -f $file); $log->debug("start") if $log->is_debug(); open($FH,"< $file") || $log->logdie("Error opening file $file"); $log->debug(qq{file:$file}) if $log->is_debug(); binmode($FH); my $output = undef; $writer = new XML::Writer(OUTPUT => \$output, DATA_MODE => 1, DATA_INDENT => 4 ); # Read header my $headername = _getHeader() or $log->logdie("Invalid sol header"); $writer->startTag( 'sol', 'name' => $headername, 'created_by' => __PACKAGE__, 'version' => $VERSION ); # Read data elements while (eof($FH) != 1) { $log->debug(q{read element}) if $log->is_debug(); my ($name, $type, $value) = _getElem(); } close($FH) or $log->logdie(q{failed to close filehandle!}); $writer->endTag('sol'); $writer->end(); return $output; } 1; __END__ =pod =head1 NAME Parse::Flash::Cookie - A flash cookie parser. =head1 SYNOPSIS use Parse::Flash::Cookie; my @content = Parse::Flash::Cookie::to_text("settings.sol"); print join("\n", @content); my $xml = Parse::Flash::Cookie::to_xml("settings.sol"); print $xml; =head1 DESCRIPTION Local Shared Object (LSO), sometimes known as flash cookies, is a cookie-like data entity used by Adobe Flash Player. LSOs are stored as files on the local file system with the I<.sol> extension. This module reads a Local Shared Object file and return content as a list. =head1 FUNCTIONS =over =item to_text Parses file and return contents as a textual list. =back =over =item to_xml Parses file and return contents as a scalar containing XML representing the file's content. =back =head1 SOL DATA FORMAT The SOL files use a binary encoding that is I regardless of platform architecture. This means the SOL files are platform independent, but they have to be interpreted differently on I and I platforms. See L for more. It consists of a header and any number of elements. Both header and the elements have variable lengths. =head2 Header The header has the following structure: =over =item * 6 bytes (discarded) =item * 4 bytes that should contain the string 'TSCO' =item * 7 bytes (discarded) =item * 1 byte that signifies the length of name (X bytes) =item * X bytes name =item * 4 bytes (discarded) =back =head2 Element Each element has the following structure: =over =item * 2 bytes length of element name (Y bytes) =item * Y bytes element name =item * 1 byte data type =item * Z bytes data (depending on the data type) =item * 1 byte trailer =back =head1 TODO =head2 Pointer Resolve the value of object being pointed at for datatype I (instead of index). =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc Parse::Flash::Cookie You can also look for information at: =over 4 =item * RT: CPAN's request tracker L =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * Search CPAN L =back =head1 SEE ALSO =head2 L =head2 Local Shared Object http://en.wikipedia.org/wiki/Local_Shared_Object =head2 Flash coders Wiki doc on .Sol File Format http://sourceforge.net/docman/?group_id=131628 =head1 ALTERNATIVE IMPLEMENTATIONS http://objection.mozdev.org/ (Firefox extension, Javascript, by Trevor Hobson) http://www.sephiroth.it/python/solreader.php (PHP, by Alessandro Crugnola) http://osflash.org/s2x (Python, by Aral Balkan) =head1 COPYRIGHT & LICENSE Copyright 2007 Andreas Faafeng, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut