#!/usr/bin/pugs use v6; # External packages used by packages in this file, that don't export symbols: # (None Yet) ########################################################################### ########################################################################### # Constant values used by packages in this file: my Str $EMPTY_STR is readonly = q{}; ########################################################################### ########################################################################### class Relation-0.0.1 { # External packages used by the Relation class, that do export symbols: # (None Yet) # Attributes of every Relation object: has Set of Str $!heading; # Set of Str # Each Set member is a name of one of this Relation's attributes. # Note that it is valid for a Relation to have zero attributes. has Set of Mapping(Str) of Any $!body; # Set of Mapping(Str) of Any # Each Set member is a tuple of this Relation, where the tuple # is represented as a Mapping; each key of that Mapping is the name # of one of this Relation's attributes and the value corresponding # to that is the value for that attribute for that tuple. # Note that it is valid for a Relation to have zero tuples. has Bool $!is_mutable; # Bool # Says whether this Relation is mutable or not; # depending on the implementing class, it can default to True or # False, but once it has a value of False, it can not be changed. ########################################################################### submethod BUILD (Set of Str :$heading? = set(), Set of Mapping(Str) of Any :$body? = set()) { for $body.values -> $tuple { die "The keys of a member of arg :$body? do not match the members" ~ " of arg :$heading?; the header of a tuple in :$body?" ~ " does not match the header of this relation." if !(all( $tuple.keys ) === $heading); } $!heading = $heading; $!body = $body; $!is_mutable = Bool::False; return; } ########################################################################### method export_as_hash () returns Hash { return { 'heading' => $!heading, 'body' => $!body, }; } ########################################################################### method is_mutable () returns Bool { return $!is_mutable; } ########################################################################### method heading () returns Set of Str { return $!heading; } method body () returns Set of Mapping(Str) of Any { return $!body; } method size () returns Int { return $!body.size(); } method exists (Mapping(Str) of Any $tuple!) returns Bool { return $tuple === any($!body); } ########################################################################### method equal (Relation $other!) returns Bool { return $other!heading === $?SELF!heading and $other!body === $?SELF!body; } method not_equal (Relation $other!) returns Bool { return !$?SELF.equal( $other ); } ########################################################################### method rename (Mapping(Str) of Str $mapping!) returns Relation { die "Some keys of $mapping! do not match this relation's attributes." if !(all( $mapping.keys ) === any( $!heading )); die "Some values of $mapping! duplicate each other." if +all( $mapping.values ) != +$mapping.values; die "Some values of $mapping! duplicate attribute names of this" ~ " relation that aren't being renamed." if any( $mapping.values ) === any( $!heading.difference( all( $mapping.keys ) ) ); my %full_map = { $!heading.values.map:{ $_ => $_ }, $mapping.pairs }; return Relation.new( heading => set( %full_map.values ), body => set( $!body.values.map:{ mapping( $_.pairs.map:{ %full_map{$_.key} => $_.value } ) }), ); } ########################################################################### method project (Set of Str $attrs!) returns Relation { if ($attrs.size() == 0) { return Relation.new( heading => set(), body => set( mapping() ) ); } if ($attrs === $!heading) { return $?SELF; } die "Some members of $attrs! do not match this relation's attributes." if !(all( $attrs ) === any( $!heading )); return Relation.new( heading => $attrs, body => set( $!body.values.map:{ mapping( $_.pairs.grep:{ $_.key === any( $attrs ) } ) }), ); } &select ::= &project; method project_all_but (Set of Str $attrs!) returns Relation { return $?SELF.project( $!heading.difference( $attrs ) ); } &select_all_but ::= &project_all_but; ########################################################################### method extend (Mapping(Str) of Code $attrs!) returns Relation { if ($attrs.size() == 0) { return $?SELF; } die "Some keys of $attrs! duplicate attribute names of this relation." if any( $attrs ) === any( $!heading ); return Relation.new( heading => $!heading.union( set( $attrs.keys ) ), body => set( $!body.values.map:{ mapping( $_.pairs, $attrs.pairs.map:{ $_.key => $_.value.( CALLER::<$_> ) } ) }), ); } ########################################################################### method wrap ( Mapping(Str) of Set of Str $mapping! ) returns Relation { # TODO. } ########################################################################### method unwrap ( Set of Str $attrs! ) returns Relation { # TODO. } ########################################################################### method map (Set of Str $heading!, Code $replacements!) returns Relation { return Relation.new( heading => $heading, body => set( $!body.values.map:{ $replacements.( $_ ) }), ); } ########################################################################### method restrict (Code $predicate!) returns Relation { return Relation.new( heading => $!heading, body => set( $!body.values.grep:{ $predicate.( $_ ) }), ); } &where ::= &restrict; &grep ::= &restrict; ########################################################################### multi method union (Relation $other!) returns Relation { die "The heading of the relation in $other does" ~ " not match the heading of the invocant relation." if !($other!heading === $!heading); return $?SELF!_union( $other ); } multi method union (Relation *@others) returns Relation { for @others -> $r2 { die "The heading of at least one given relation in @others does" ~ " not match the heading of the invocant relation." if !($r2!heading === $!heading); } my Relation $r1 = $?SELF; for @others -> $r2 { $r1 = $r1!_union( $r2 ); } return $r1; } &plus ::= &union; ########################################################################### multi method exclusion (Relation $other!) returns Relation { die "The heading of the relation in $other does" ~ " not match the heading of the invocant relation." if !($other!heading === $!heading); return $?SELF!_exclusion( $other ); } multi method exclusion (Relation *@others) returns Relation { for @others -> $r2 { die "The heading of at least one given relation in @others does" ~ " not match the heading of the invocant relation." if !($r2!heading === $!heading); } my Relation $r1 = $?SELF; for @others -> $r2 { $r1 = $r1!_exclusion( $r2 ); } return $r1; } &disjoint_union ::= &exclusion; &d_union ::= &exclusion; &symmetric_difference ::= &exclusion; ########################################################################### multi method intersection (Relation $other!) returns Relation { die "The heading of the relation in $other does" ~ " not match the heading of the invocant relation." if !($other!heading === $!heading); return $?SELF!_intersection( $other ); } multi method intersection (Relation *@others) returns Relation { for @others -> $r2 { die "The heading of at least one given relation in @others does" ~ " not match the heading of the invocant relation." if !($r2!heading === $!heading); } my Relation $r1 = $?SELF; for @others -> $r2 { $r1 = $r1!_intersection( $r2 ); } return $r1; } &intersect ::= &intersection; ########################################################################### method difference (Relation $other!) returns Relation { die "The heading of the relation in $other does" ~ " not match the heading of the invocant relation." if !($other!heading === $!heading); return Relation.new( heading => $!heading, body => $!body.difference( $other!body ), ); } &minus ::= &difference; &except ::= &difference; ########################################################################### method semidifference (Relation $other!) returns Relation { return $?SELF.difference( $?SELF.semijoin( $other ) ); } &semiminus ::= &semidifference; ¬_matching ::= &semidifference; ########################################################################### method semijoin (Relation $other!) returns Relation { die "The heading of the relation in $other is not a full subset" ~ " of the heading of the invocant relation." if !(all( $other!heading ) === any( $!heading )); # First look for trivial cases that are more efficient to do different. if ($!body.size() == 0 or $other!body.size() == 0) { # Both sources have zero tuples; so does result. return Relation.new( heading => $!heading, body => set() ); } if ($other!heading.size() == 0) { # Second source is identity-one tuple; result is first source. return $?SELF; } if ($other!heading === $!heading) { # Both sources have identical headings; result is src intersection. return $?SELF!_intersection( $other ); } # Now, the standard case of a semijoin. return $?SELF!_semijoin( $other ); } &matching ::= &semijoin; ########################################################################### multi method product (Relation $other!) returns Relation { die "The relation in $other has attributes in common with the invocant" ~ " relation." if any( $other!heading ) === any( $!heading ); # First look for trivial cases that are more efficient to do different. if ($!body.size() == 0 or $other!body.size() == 0) { # Both sources have zero tuples; so does result. return Relation.new( heading => $!heading.union( $other!heading ), body => set() ); } if ($!heading.size() == 0) { # First source is identity-one tuple; result is second source. return $other; } if ($other!heading.size() == 0) { # Second source is identity-one tuple; result is first source. return $?SELF; } # Now, the standard case of a cross-join. return $?SELF!_product( $other ); } multi method product (Relation *@others) returns Relation { if (+@others == 0) { return $?SELF; } my Relation @sources = ($?SELF, *@others); while (my $r1 = @sources.shift()) { for @sources -> $r2 { die "The heading of at least one given relation in @others has" ~ " attributes in common with either other relations" ~ " in @others or with the invocant." if any( $r2!heading ) === any( $r1.heading ); } } return $?SELF.join( @others ); } &cartesian_product ::= &product; &cross_product ::= &product; &cross_join ::= &product; ########################################################################### multi method join (Relation $other!) returns Relation { # First look for trivial cases that are more efficient to do different. if ($!body.size() == 0 or $other!body.size() == 0) { # Both sources have zero tuples; so does result. return Relation.new( heading => $!heading.union( $other!heading ), body => set() ); } if ($!heading.size() == 0) { # First source is identity-one tuple; result is second source. return $other; } if ($other!heading.size() == 0) { # Second source is identity-one tuple; result is first source. return $?SELF; } if ($other!heading === $!heading) { # Both sources have identical headings; result is src intersection. return $?SELF!_intersection( $other ); } if (all( $!heading ) === none( $other!heading )) { # Both sources have exclusive headings; result is cross-product. return $?SELF!_product( $other ); } # Both sources have overlapping non-identical headings. if (all( $other!heading ) === any( $!heading )) { # The second source's heading is a proper subset of the # first source's heading, so simplify to a semijoin. return $?SELF!_semijoin( $other ); } if (all( $!heading ) === any( $other!heading )) { # The first source's heading is a proper subset of the # second source's heading, so simplify to a semijoin. return $other!_semijoin( $?SELF ); } # Now, the standard case of a inner join. return $?SELF!_inner_join( $other ); } multi method join (Relation *@others) returns Relation { # First do some optimizing of the order that source relations are # combined, so to try and make the least expensive kinds of combining # occur first, and most expensive later. if (+@others == 0) { return $?SELF; } my Relation @sources = ($?SELF, *@others); my Relation @r_with_zero_tuples; my Relation @r_with_shared_attrs; my Relation @r_with_disjoint_attrs; while (my $r1 = @sources.shift()) { if ($r1!body.size() == 0) { @r_with_zero_tuples.push( $r1 ); } elsif ($r1!heading.size() == 0) { # identity-one tuples can be discarded as they have no effect } else { SWITCH: { for @sources -> $r2 { if (any( $r2!heading ) === any( $r1.heading )) { @r_with_shared_attrs.push( $r1 ); last SWITCH; } } @r_with_disjoint_attrs.push( $r1 ); } } } @r_with_shared_attrs = @r_with_shared_attrs.sort:{ $^b.heading.size() <=> $^a.heading.size() }; # sort widest to narrowest, to help get more semijoins @sources = (@r_with_zero_tuples, @r_with_shared_attrs, @r_with_disjoint_attrs); # TODO: more or better optimization. # Now do the actual combination work. # Start with identity-one relation and join all sources to it. my Relation $r1 .= new( heading => set(), body => set( mapping() ) ); for @sources -> $r2 { $r1 = $r1.join( $r2 ); } return $r1; } &natural_join ::= &join; ########################################################################### my method _union (Relation $other!) returns Relation { return Relation.new( heading => $!heading, body => $!body.union( $other!body ), ); } my method _exclusion (Relation $other!) returns Relation { return Relation.new( heading => $!heading, body => $!body.symmetric_difference( $other!body ), ); } my method _intersection (Relation $other!) returns Relation { return Relation.new( heading => $!heading, body => $!body.intersection( $other!body ), ); } my method _semijoin (Relation $other!) returns Relation { return Relation.new( heading => $!heading, body => set( $!body.values.grep:{ mapping( $_.pairs.map:{ $_.key === any( $other!heading ) } ) === any( $other!body.values ) } ), ); } my method _product (Relation $other!) returns Relation { return Relation.new( heading => $!heading.union( $other!heading ), body => set( gather { for $!body.values -> $t1 { for $other!body.values -> $t2 { take mapping( $t1.pairs, $t2.pairs ); } } } ), ); } my method _inner_join (Relation $other!) returns Relation { # This form takes the form of an ordinary natural join, # where some source attributes are in common, and each # source has attributes not in the other. if ($!body.size() > $other!body.size()) { # Another optimization: # In case it is faster for outer loop to have fewer # iterations rather than the inner loop having fewer. ($?SELF, $other) = ($other, $?SELF); } Set $common_h = $!heading.intersection( $other!heading ); Set $r1only_h = $!heading.difference( $other!heading ); Set $r2only_h = $other!heading.difference( $!heading ); return Relation.new( heading => $!heading.union( $other!heading ), body => set( gather { for $!body.values -> $t1 { $t1common_m = mapping( $t1.pairs.map:{ $_.key === any( $common_h ) } ) $t1only_m = mapping( $t1.pairs.map:{ $_.key === any( $r1only_h ) } ) for $other!body.values -> $t2 { $t2common_m = mapping( $t2.pairs.map:{ $_.key === any( $common_h ) } ) if ($t2common_m === $t1common_m) { $t2only_m = mapping( $t2.pairs.map:{ $_.key === any( $r2only_h ) } ) take mapping( $t1only_m.pairs, $t1common_m.pairs, $t2only_m.pairs, ); } } } } ), ); } ########################################################################### } # class Relation ########################################################################### ########################################################################### =pod =encoding utf8 =head1 NAME Relation - Relations for Perl 6 =head1 VERSION This document describes Relation version 0.0.1. =head1 SYNOPSIS use Relation; I =head1 DESCRIPTION This class implements a Relation data type that corresponds to the "relation" of logic and mathematics and philosophy ("a predicate ranging over more than one argument"), which is also the basis of the relational data model proposed by Edgar. F. Codd, upon which anything in the world can be modelled. A relation is essentially a set of mappings, or a set of logical tuples; a picture of one can look like a table, where each tuple is a row and each relation/tuple attribute is a column, but it is not the same as a table. The intended interface and use of this class in Perl programs is similar to the intended use of a L class; a Relation is like a Set that exists over N dimensions rather than one. The Relation operators are somewhat of a superset of the Set operators. Like the Set data type, the Relation data type is immutable. The value of a Relation object is determined when it is constructed, and the object can not be changed afterwards. If you want something similar but that is more mutable, you can accomplish that manually using a set of mappings, or a multi-dimensional object Hash, or various combinations of other data types. While the implementation can be changed greatly (it isn't what's important; the interface/behaviour is), this Relation data type is proposed to be generally useful, and very basic, and worthy of inclusion in the Perl 6 core, at least as worthy as a Set data type is. =head1 INTERFACE The interface of Relation is entirely object-oriented; you use it by creating objects from its member classes, usually invoking C on the appropriate class name, and then invoking methods on those objects. All of their class/object attributes are private, so you must use accessor methods. Relation does not declare any subroutines or export such. The usual way that Relation indicates a failure is to throw an exception; most often this is due to invalid input. If an invoked routine simply returns, you can assume that it has succeeded. =head2 The Relation Class A Relation object is an unordered set of tuples, each of which is an unordered set of named attributes; all tuples in a Relation are of the same degree, and the attribute names of each tuple are all the same as those of all the other tuples. For purposes of the Relation class' API, a tuple is represented by a Perl Mapping where each Mapping key is an attribute name and each Mapping value is the corresponding attribute value. Every Relation attribute has a name that is distinct from the other attributes, though several attributes may store values of the same class; every Relation tuple must be distinct from the other tuples. All Relation attributes may be individually addressed only by their names, and all Relation tuples may be individually addressed only by their values; neither may be addressed by any ordinal value. Note that it is valid for a Relation to have zero tuples. It is also valid for a Relation to have zero attributes, though such a Relation can have no more than a single tuple. A zero-attribute Relation with zero tuples or one tuple respectively have a special meaning in relational algebra which is analagous to what the identity numbers 0 and 1 mean to normal algebra. A picture of a Relation can look like a table, where each of its tuples is a row, and each attribute is a column, but a Relation is not a table. The Relation class is pure and deterministic, such that all of its class and object methods will each return the same result when invoked on the same object with the same arguments; they do not interact with the outside environment at all. A Relation object has 2 main attributes (implementation details subject to change) plus 1 extra attribute: =over =item C<$!heading> - B Set of Str - This contains zero or more Relation attribute names that define the heading of this Relation. Each attribute name is a character string. =item C<$!body> - B Set of Mapping(Str) of Any - This contains zero or more member tuples of the Relation; each Set member is a Mapping whose keys and values are attribute names and values. Each Mapping key of a Body tuple must match a Set member of Heading, and the value of every tuple in Body must be mutually distinct. =item C<$!is_mutable> - B Bool - This attribute is True if the Relation is allowed to and/or has the ability to mutate, and it is false if not. Looking forward to the near future where "Relation" is a Role rather than a Class, and some implementations are immutable rather than mutable, this property and/or same-named accessor method can be used to see if a Relation-doing object can mutate. Depending on the implementing class, it can default to True or False, but once it has a value of False, it can not be further changed; this class defaults it to False. =back This is the main Relation constructor method: =over =item C This method creates and returns a new Relation object, whose Heading and Body attributes are set respectively from the optional named parameters C<$heading> and C<$body>. If C<$heading> is undefined or an empty Set, the Relation has zero attributes. If C<$body> is undefined or an empty Set, the Relation has zero tuples. If a Relation has zero attributes, then C<$body> may be an Set with a single member that is an empty Mapping. =back A Relation object has these methods: =over =item C This method returns a deep copy of this Relation as a Hash ref of 2 elements, which correspond to the 2 named parameters of C. =item C This method returns this Relation's "is mutable" boolean attribute. =item C This method returns this Relation's heading. =item C This method returns this Relation's body. =item C This method returns a count of this Relation's member tuples as an Int. =item C This method returns a Bool indicating whether the argument C<$tuple> exists in / is a member of this Relation. =item C This method returns a Bool indicating whether the immutable identity of the argument C<$other> equals the immutable identity of the invocant. =item C This method returns the complement of C with the same argument. =item C This method is a generic relational operator that returns a new Relation which is the same as the invocant Relation but that some of its attributes are renamed. The argument C<$mapping> says which attributes are being renamed, with its keys being the old names and its values the new names. This method will fail if any C<$mapping> keys do not match invocant Relation attribute names, or any C<$mapping> values duplicate each other, or duplicate attribute names that aren't being renamed. This method supports renaming attributes to each others' names. =item C This method is a generic relational operator that returns a new Relation which has a subset of the original's attributes; that subset is the same as those attribute names in the argument C<$attrs>. The new Relation has all of the tuples of the original (or rather, the corresponding projection of each tuple), but that any duplicates following the projection have been eliminated. Trivial cases are where C<$attrs> is either empty or equal to the invocant Relation's header, in which case it returns the identity-one Relation or the invocant Relation respectively. This method will fail if any members of C<$attrs> do not match attribute names of the invocant Relation. This method has an alias named C