The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
// Copyright 2010 Google Inc.
// Modified by Yuji Kaneda
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//      http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ------------------------------------------------------------------------

#include <string>

namespace SZaru {
// Structure for estimating of weights of elements in a sequence,
// without actually storing the elements.
// based on the CountSzlSketch algorithm from
// "Finding Frequent Items in Data Streams",
//   Moses Charikar, Kevin Chen, and Martin Farach-Colton.

template <typename Value>
class SzlSketch {
 public:
  // Bounds on the number of tables we create.
  // Always create an odd number of tables so we can compute the median.
  static const int kMinTabs = 15;
  static const int kMaxTabs = 31;

  typedef struct {
    struct {
      unsigned elem;
      unsigned sign;
    } index[kMaxTabs];
  } Index;

  // Return the dimensions of a sketch with approximately totalSize entries.
  static void Dims(int totalSize, int* nTabs, int* tabSize);

  // Build a new sketch with a given table dimensions,
  // which must have been computed by Dims.
  SzlSketch(int nTabs, int tabSize);

  ~SzlSketch();

  // Compute the index into the sketch for a string.
  void ComputeIndex(const std::string& s, Index* index);

  // Adjust the estimated weight for an index.
  void AddSub(Index* index, Value value, int isAdd);

  // Compute the estimated weight for an index.
  void Estimate(Index* index, Value* est);

  // Compute the estimated standard deviation of values in the sketch.
  void StdDeviation(double* deviations);

  // Add another sketch's weight into this sketch.
  void AddSketch(const SzlSketch& sketch);

  // Encode a sketch.
  // void Encode(SzlEncoder* enc);

  // Parse a pickled sketch.
  // bool Decode(SzlDecoder* dec);

  // Return the number of tables in the sketch.
  int nTabs() const { return nTabs_; }

  // Return the size of each table in the sketch.
  int tabSize() const { return tabSize_; }

  // Estimate memory currently allocated.
  // int Memory();

 private:
  // Operations on our weights.
  // const SzlOps& weight_ops_;

  // storage of the sketch.
  Value* weights_;           // 2d array of weights[nTabs][tabsize]
  // double tmp_[kMaxTabs];      // temporary computation array
  int nTabs_;                   // kMinTabs <= nTabs <= kMaxTabs
  int tabSize_;                 // must be pow(2)
  int tabBits_;                 // log2(tabSize_) == tabBits_
};

}

#include "emitters/szlsketch.cc"