restructure project

2020-08-12 05:43:20 +00:00 · 2020-08-12 05:43:20 +00:00 · 960dc907e9
commit 960dc907e9
parent a317d8cce1
87 changed files with 23 additions and 17 deletions
--- a/benchmarks/micro/Makefile
+++ b/benchmarks/micro/Makefile
@ -0,0 +1,3 @@
+NAME = micro-bench
+SRCS = $(shell find src/ -name "*.c" -o -name "*.cc")
+include $(AM_HOME)/Makefile
--- a/benchmarks/micro/include/benchmark.h
+++ b/benchmarks/micro/include/benchmark.h
@ -0,0 +1,113 @@
+#ifndef __BENCHMARK_H__
+#define __BENCHMARK_H__
+
+#include <am.h>
+#include <klib.h>
+#include <klib-macros.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MB * 1024 * 1024
+#define KB * 1024
+
+#define REF_CPU    "i7-7700K @ 4.20GHz"
+#define REF_SCORE  100000
+
+#define REPEAT  1
+
+//                  size |  heap | time |  checksum
+#define QSORT_S {     100,   1 KB,     0, 0x08467105}
+#define QSORT_M {   30000, 128 KB,     0, 0xa3e99fe4}
+#define QSORT_L {  100000, 640 KB,  5114, 0xed8cff89}
+#define QUEEN_S {       8,   0 KB,     0, 0x0000005c}
+#define QUEEN_M {      11,   0 KB,     0, 0x00000a78}
+#define QUEEN_L {      12,   0 KB,  4707, 0x00003778}
+#define    BF_S {       4,  32 KB,     0, 0xa6f0079e}
+#define    BF_M {      25,  32 KB,     0, 0xa88f8a65}
+#define    BF_L {     180,  32 KB, 23673, 0x9221e2b3}
+#define   FIB_S {       2,   1 KB,     0, 0x7cfeddf0}
+#define   FIB_M {      23,  16 KB,     0, 0x94ad8800}
+#define   FIB_L {      91, 256 KB, 28318, 0xebdc5f80}
+#define SIEVE_S {     100,   1 KB,     0, 0x00000019}
+#define SIEVE_M {  200000,  32 KB,     0, 0x00004640}
+#define SIEVE_L {10000000,   2 MB, 39361, 0x000a2403}
+#define  PZ15_S {       0,   1 KB,     0, 0x00000006}
+#define  PZ15_M {       1, 256 KB,     0, 0x0000b0df}
+#define  PZ15_L {       2,   2 MB,  4486, 0x00068b8c}
+#define DINIC_S {      10,   8 KB,     0, 0x0000019c}
+#define DINIC_M {      80, 512 KB,     0, 0x00004f99}
+#define DINIC_L {     128,   1 MB, 10882, 0x0000c248}
+#define  LZIP_S {     128, 128 KB,     0, 0xe05fc832}
+#define  LZIP_M {   50000,   1 MB,     0, 0xdc93e90c}
+#define  LZIP_L { 1048576,   4 MB,  7593, 0x8d62c81f}
+#define SSORT_S {     100,   4 KB,     0, 0x4c555e09}
+#define SSORT_M {   10000, 512 KB,     0, 0x0db7909b}
+#define SSORT_L {  100000,   4 MB,  4504, 0x4f0ab431}
+#define   MD5_S {     100,   1 KB,     0, 0xf902f28f}
+#define   MD5_M {  200000, 256 KB,     0, 0xd4f9bc6d}
+#define   MD5_L {10000000,  16 MB, 17239, 0x27286a42}
+
+#define BENCHMARK_LIST(def) \
+  def(qsort, "qsort", QSORT_S, QSORT_M, QSORT_L, "Quick sort") \
+  def(queen, "queen", QUEEN_S, QUEEN_M, QUEEN_L, "Queen placement") \
+  def(   bf,    "bf",    BF_S,    BF_M,    BF_L, "Brainf**k interpreter") \
+  def(  fib,   "fib",   FIB_S,   FIB_M,   FIB_L, "Fibonacci number") \
+  def(sieve, "sieve", SIEVE_S, SIEVE_M, SIEVE_L, "Eratosthenes sieve") \
+  def( 15pz,  "15pz",  PZ15_S,  PZ15_M,  PZ15_L, "A* 15-puzzle search") \
+  def(dinic, "dinic", DINIC_S, DINIC_M, DINIC_L, "Dinic's maxflow algorithm") \
+  def( lzip,  "lzip",  LZIP_S,  LZIP_M,  LZIP_L, "Lzip compression") \
+  def(ssort, "ssort", SSORT_S, SSORT_M, SSORT_L, "Suffix sort") \
+  def(  md5,   "md5",   MD5_S,   MD5_M,   MD5_L, "MD5 digest") \
+
+// Each benchmark will run REPEAT times
+
+#define DECL(_name, _sname, _s, _m, _l, _desc) \
+  void bench_##_name##_prepare(); \
+  void bench_##_name##_run(); \
+  int bench_##_name##_validate();
+
+BENCHMARK_LIST(DECL)
+
+typedef struct Setting {
+  int size;
+  unsigned long mlim, ref;
+  uint32_t checksum;
+} Setting;
+
+typedef struct Benchmark {
+  void (*prepare)();
+  void (*run)();
+  int (*validate)();
+  const char *name, *desc;
+  Setting settings[3];
+} Benchmark;
+
+extern Benchmark *current;
+extern Setting *setting;
+
+typedef struct Result {
+  int pass;
+  unsigned long tsc, msec;
+} Result;
+
+void prepare(Result *res);
+void done(Result *res);
+
+// memory allocation
+void* bench_alloc(size_t size);
+void bench_free(void *ptr);
+
+// random number generator
+void bench_srand(uint32_t seed);
+uint32_t bench_rand(); // return a random number between 0..32767
+
+// checksum
+uint32_t checksum(void *start, void *end);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/benchmarks/micro/src/15pz/15pz.cc
+++ b/benchmarks/micro/src/15pz/15pz.cc
@ -0,0 +1,88 @@
+#include <benchmark.h>
+#include "puzzle.h"
+#include "heap.h"
+
+const int N = 4;
+
+static int PUZZLE_S[N*N] = {
+  1, 2, 3, 4,
+  5, 6, 7, 8,
+  9, 10, 0, 11,
+  13, 14, 15, 12,
+};
+
+static int PUZZLE_M[N*N] = {
+  1, 2, 3, 4,
+  5, 6, 7, 8,
+  12, 0, 14, 13,
+  11, 15, 10, 9,
+};
+
+static int PUZZLE_L[N*N] = {
+  0, 2, 3, 4,
+  9, 6, 7, 8,
+  5, 11, 10, 12,
+  1, 15, 13, 14,
+};
+
+static int ans;
+
+extern "C" {
+
+void bench_15pz_prepare() {
+}
+
+void bench_15pz_run() {
+  N_puzzle<N> puzzle;
+  int MAXN;
+
+  switch (setting->size) {
+    case 0: puzzle = N_puzzle<N>(PUZZLE_S); MAXN = 10; break;
+    case 1: puzzle = N_puzzle<N>(PUZZLE_M); MAXN = 2048; break;
+    case 2: puzzle = N_puzzle<N>(PUZZLE_L); MAXN = 16384; break;
+    default: assert(0);
+  }
+  assert(puzzle.solvable());
+
+  auto *heap = (Updatable_heap<N_puzzle<N>> *) bench_alloc(sizeof(Updatable_heap<N_puzzle<N>>));
+  heap->init(MAXN);
+  heap->push( puzzle, 0 );
+
+  int n = 0;
+  ans = -1;
+
+  while( heap->size() != 0 && n != MAXN ) {
+    N_puzzle<N> top = heap->pop();
+    ++n;
+
+    if ( top == N_puzzle<N>::solution() ) {
+      // We are done
+      ans = heap->length(top) * n;
+      return;
+    }
+
+    if ( top.tile_left_possible() ) {
+      heap->push( top.tile_left(), heap->length( top ) + 1 );
+    }
+
+    if ( top.tile_right_possible() ) {
+      heap->push( top.tile_right(), heap->length( top ) + 1 );
+    }
+
+    if ( top.tile_up_possible() ) {
+      heap->push( top.tile_up(), heap->length( top ) + 1 );
+    }
+
+    if ( top.tile_down_possible() ) {
+      heap->push( top.tile_down(), heap->length( top ) + 1 );
+    }
+  }
+}
+
+
+int bench_15pz_validate() {
+  return (uint32_t)ans == setting->checksum;
+}
+
+}
+
--- a/benchmarks/micro/src/15pz/heap.h
+++ b/benchmarks/micro/src/15pz/heap.h
@ -0,0 +1,227 @@
+// Author:  Douglas Wilhelm Harder
+// Copyright (c) 2009 by Douglas Wilhelm Harder.  All rights reserved.
+
+template <typename T>
+T max(T a, T b) {
+  return a > b ? a : b;
+}
+
+template <typename T>
+class Updatable_heap {
+  private:
+    int M;
+    class Step;
+    Step **hash_table;
+    Step **heap;
+    int heap_size;
+    int maximum_heap_size;
+
+    void inline swap( int, int );
+    void percolate_down();
+    void percolate_up( int );
+    Step *pointer( T const & ) const;
+
+  public:
+    void init(int m);
+    ~Updatable_heap();
+    T pop();
+    void push( T const &, int );
+    int size() const;
+    int maximum_size() const;
+    int length( T const & ) const;
+};
+
+template <typename T>
+class Updatable_heap<T>::Step {
+  public:
+    T element;
+    Step *next;
+    int heap_index;
+    int path_length;
+    int path_weight;
+    bool visited;
+    Step *previous_step;
+
+    void init( T const &, Step *, int, int );
+    int length() const;
+    int weight() const;
+};
+
+template <typename T>
+void Updatable_heap<T>::init(int m) {
+  M = m;
+  heap = (Step **)bench_alloc(sizeof(void *) * M);
+  hash_table = (Step **)bench_alloc(sizeof(void *) * (M + 1));
+
+  heap_size = 0;
+  maximum_heap_size = 0;
+  for ( int i = 0; i < M; ++i ) {
+    hash_table[i] = 0;
+  }
+}
+
+template <typename T>
+Updatable_heap<T>::~Updatable_heap() {
+  for ( int i = 0; i < M; ++i ) {
+    Step *ptr = hash_table[i];
+
+    while ( ptr != 0 ) {
+      Step *tmp = ptr;
+      ptr = ptr->next;
+    }
+  }
+}
+
+template <typename T>
+T Updatable_heap<T>::pop() {
+  if ( size() == 0 ) {
+    return T();
+  }
+
+  T top = heap[1]->element;
+
+  if ( size() == 1 ) {
+    heap_size = 0;
+  } else {
+    assert( size() > 1 );
+
+    heap[1] = heap[size()];
+    heap[1]->heap_index = 1;
+
+    --heap_size;
+    percolate_down();
+  }
+
+  return top;
+}
+
+template <typename T>
+void inline Updatable_heap<T>::swap( int i, int j ) {
+  Step *tmp = heap[j];
+  heap[j] = heap[i];
+  heap[i] = tmp;
+
+  heap[i]->heap_index = i;
+  heap[j]->heap_index = j;
+}
+
+template <typename T>
+void Updatable_heap<T>::percolate_down() {
+  int n = 1;
+
+  while ( 2*n + 1 <= size() ) {
+    if ( heap[n]->weight() < heap[2*n]->weight() && heap[n]->weight() < heap[2*n + 1]->weight() ) {
+      return;
+    }
+
+    if ( heap[2*n]->weight() < heap[2*n + 1]->weight() ) {
+      swap( n, 2*n );
+      n = 2*n;
+    } else {
+      assert( heap[2*n]->weight() >= heap[2*n + 1]->weight() );
+
+      swap( n, 2*n + 1 );
+      n = 2*n + 1;
+    }
+  }
+
+  if ( 2*n == size() &&  heap[2*n]->weight() < heap[n]->weight() ) {
+    swap( n, 2*n );
+  }
+}
+
+template <typename T>
+void Updatable_heap<T>::percolate_up( int n ) {
+  while ( n != 1 ) {
+    int parent = n/2;
+
+    if ( heap[parent]->weight() > heap[n]->weight() ) {
+      swap( parent, n );
+      n = parent;
+    } else {
+      return;
+    }
+  }
+}
+
+template <typename T>
+void Updatable_heap<T>::push( T const &pz, int path_length ) {
+  Step *ptr = pointer( pz );
+
+  if ( ptr == 0 ) {
+    assert( heap_size <= M );
+    ++heap_size;
+
+    Step *ptr = (Step*)bench_alloc(sizeof(Step));
+    ptr->init( pz, hash_table[pz.hash() & (M - 1)], size(), path_length );
+    hash_table[pz.hash() & (M - 1)] = ptr;
+    heap[size()] = ptr;
+
+    percolate_up( size() );
+
+    maximum_heap_size = max( maximum_heap_size, size() );
+  } else {
+    if ( !ptr->visited ) {
+      if ( path_length + ptr->element.lower_bound() < ptr->weight() ) {
+        ptr->path_weight = path_length + ptr->element.lower_bound();
+        percolate_up( ptr->heap_index );
+      }
+    }
+  }
+}
+
+template <typename T>
+int Updatable_heap<T>::size() const {
+  return heap_size;
+}
+
+template <typename T>
+int Updatable_heap<T>::maximum_size() const {
+  return maximum_heap_size;
+}
+
+template <typename T>
+int Updatable_heap<T>::length( T const &pz ) const {
+  Step *ptr = pointer( pz );
+
+  return ( ptr == 0 ) ? 2147483647 : ptr->length();
+}
+
+template <typename T>
+typename Updatable_heap<T>::Step *Updatable_heap<T>::pointer( T const &pz ) const {
+  for ( Step *ptr = hash_table[pz.hash() & (M - 1)]; ptr != 0; ptr = ptr->next ) {
+    if ( ptr->element == pz ) {
+      return ptr;
+    }
+  }
+
+  return 0;
+}
+
+/****************************************************
+ * ************************************************ *
+ * *                   Iterator                   * *
+ * ************************************************ *
+ ****************************************************/
+
+template <typename T>
+void Updatable_heap<T>::Step::init( T const &pz, Step *n, int hi, int dist ) {
+  element = pz;
+  next = n;
+  heap_index = hi;
+  path_length = dist;
+  path_weight = dist + element.lower_bound();
+  visited = false;
+  previous_step = 0;
+}
+
+template <typename T>
+int Updatable_heap<T>::Step::length() const {
+  return path_length;
+}
+
+template <typename T>
+int Updatable_heap<T>::Step::weight() const {
+  return path_weight;
+}
+
--- a/benchmarks/micro/src/15pz/puzzle.h
+++ b/benchmarks/micro/src/15pz/puzzle.h
@ -0,0 +1,475 @@
+// Author:  Douglas Wilhelm Harder
+// Copyright (c) 2009 by Douglas Wilhelm Harder.  All rights reserved.
+// Url: https://ece.uwaterloo.ca/~dwharder/aads/Algorithms/N_puzzles/
+
+template <int N>
+class N_puzzle {
+  private:
+    bool puzzle_valid;
+    uint8_t zero_i, zero_j;
+    int8_t manhattan_distance;
+    int8_t puzzle[N][N];
+    int hash_value;
+
+    void determine_hash();
+
+    static int abs( int n ) { return ( n < 0 ) ? -n : n; }
+
+  public:
+    N_puzzle();
+    N_puzzle( int array[N*N] );
+    N_puzzle( N_puzzle const & );
+    N_puzzle &operator=( N_puzzle const & );
+
+    bool solvable() const;
+    bool valid() const;
+    int lower_bound() const;
+    unsigned int hash() const;
+
+    bool tile_up_possible() const;
+    bool tile_down_possible() const;
+    bool tile_left_possible() const;
+    bool tile_right_possible() const;
+
+    N_puzzle tile_up() const;
+    N_puzzle tile_down() const;
+    N_puzzle tile_left() const;
+    N_puzzle tile_right() const;
+
+    bool operator==( N_puzzle const & ) const;
+    bool operator!=( N_puzzle const & ) const;
+
+    N_puzzle static solution();
+};
+
+template < int N >
+N_puzzle<N>::N_puzzle():
+puzzle_valid( true ),
+manhattan_distance( 0 ) {
+  int array[N*N];
+
+  for ( int i = 0; i < N*N; ++i ) {
+    array[i] = i;
+  }
+
+  int n = 0;
+
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      int k = bench_rand() % (N*N - n);
+      puzzle[i][j] = array[k];
+
+      if ( array[k] == 0 ) {
+        zero_i = i;
+        zero_j = j;
+      } else {
+        manhattan_distance += abs( ((array[k] - 1) / N) - i );
+        manhattan_distance += abs( ((array[k] - 1) % N) - j );
+      }
+
+      ++n;
+      array[k] = array[N*N - n];
+    }
+  }
+
+  determine_hash();
+}
+
+template < int N >
+N_puzzle<N>::N_puzzle( int array[N*N] ):
+puzzle_valid( true ),
+manhattan_distance( 0 ) {
+  bool check[N*N];
+
+  for ( int i = 0; i < N*N; ++i ) {
+    check[i] = false;
+  }
+
+  int n = 0;
+
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      puzzle[i][j] = array[n];
+      check[array[n]] = true;
+
+      if ( array[n] == 0 ) {
+        zero_i = i;
+        zero_j = j;
+      } else {
+        manhattan_distance += abs( ((array[n] - 1) / N) - i );
+        manhattan_distance += abs( ((array[n] - 1) % N) - j );
+      }
+
+      ++n;
+    }
+  }
+
+  for ( int i = 0; i < N*N; ++i ) {
+    if ( !check[i] ) {
+      puzzle_valid = false;
+      return;
+    }
+  }
+
+  determine_hash();
+}
+
+/*
+ * Determine a hash value for the puzzle.
+ */
+
+template < int N >
+void N_puzzle<N>::determine_hash() {
+  hash_value = 0;
+
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      hash_value = hash_value*1973 + puzzle[i][j];
+    }
+  }
+}
+
+template < int N >
+N_puzzle<N>::N_puzzle( N_puzzle const &pz ):
+puzzle_valid( pz.puzzle_valid ),
+zero_i( pz.zero_i ),
+zero_j( pz.zero_j ),
+manhattan_distance( pz.manhattan_distance ),
+hash_value( pz.hash_value ) {
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      puzzle[i][j] = pz.puzzle[i][j];
+    }
+  }
+}
+
+template < int N >
+N_puzzle<N> &N_puzzle<N>::operator=( N_puzzle const &rhs ) {
+  puzzle_valid = rhs.puzzle_valid;
+  zero_i = rhs.zero_i;
+  zero_j = rhs.zero_j;
+  manhattan_distance = rhs.manhattan_distance;
+  hash_value = rhs.hash_value;
+
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      puzzle[i][j] = rhs.puzzle[i][j];
+    }
+  }
+  return *this;
+}
+
+
+/*
+ *  Moving a tile up is possible as long as
+ *  the blank is not in the last row.
+ */
+
+template <int N>
+bool N_puzzle<N>::tile_up_possible() const {
+  return puzzle_valid && (zero_i != N - 1);
+}
+
+/*
+ *  Moving a tile down is possible as long as
+ *  the blank is not in the first row.
+ */
+
+template <int N>
+bool N_puzzle<N>::tile_down_possible() const {
+  return puzzle_valid && (zero_i != 0);
+}
+
+/*
+ *  Moving a tile left is possible as long as
+ *  the blank is not in the last column.
+ */
+
+template <int N>
+bool N_puzzle<N>::tile_left_possible() const {
+  return puzzle_valid && (zero_j != N - 1);
+}
+
+/*
+ *  Moving a tile right is possible as long as
+ *  the blank is not in the first column.
+ */
+
+template <int N>
+bool N_puzzle<N>::tile_right_possible() const {
+  return puzzle_valid && (zero_j != 0);
+}
+
+template <int N>
+N_puzzle<N> N_puzzle<N>::tile_up() const {
+  if ( !puzzle_valid ) {
+    return *this;
+  }
+
+  N_puzzle result( *this );
+
+  if ( zero_i == N - 1 ) {
+    result.puzzle_valid = false;
+    return result;
+  }
+
+  result.manhattan_distance +=
+    abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - zero_i ) -
+    abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - (zero_i + 1) );
+
+  result.puzzle[zero_i][zero_j] = puzzle[zero_i + 1][zero_j];
+  ++result.zero_i;
+  result.puzzle[result.zero_i][zero_j] = 0;
+
+  result.determine_hash();
+
+  return result;
+}
+
+template <int N>
+N_puzzle<N> N_puzzle<N>::tile_down() const {
+  if ( !puzzle_valid ) {
+    return *this;
+  }
+
+  N_puzzle result( *this );
+
+  if ( zero_i == 0 ) {
+    result.puzzle_valid = false;
+    return result;
+  }
+
+  result.manhattan_distance +=
+    abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - zero_i ) -
+    abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - (zero_i - 1) );
+
+  result.puzzle[zero_i][zero_j] = puzzle[zero_i - 1][zero_j];
+  --result.zero_i;
+  result.puzzle[result.zero_i][zero_j] = 0;
+
+  result.determine_hash();
+
+  return result;
+}
+
+template <int N>
+N_puzzle<N> N_puzzle<N>::tile_left() const {
+  if ( !puzzle_valid ) {
+    return *this;
+  }
+
+  N_puzzle result( *this );
+
+  if ( zero_j == N - 1 ) {
+    result.puzzle_valid = false;
+    return result;
+  }
+
+  result.manhattan_distance +=
+    abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - zero_j ) -
+    abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - (zero_j + 1) );
+
+  result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j + 1];
+  ++result.zero_j;
+  result.puzzle[zero_i][result.zero_j] = 0;
+
+  result.determine_hash();
+
+  return result;
+}
+
+template <int N>
+N_puzzle<N> N_puzzle<N>::tile_right() const {
+  if ( !puzzle_valid ) {
+    return *this;
+  }
+
+  N_puzzle result( *this );
+
+  if ( zero_j == 0 ) {
+    result.puzzle_valid = false;
+    return result;
+  }
+
+  result.manhattan_distance +=
+    abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - zero_j ) -
+    abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - (zero_j - 1) );
+
+  result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j - 1];
+  --result.zero_j;
+  result.puzzle[zero_i][result.zero_j] = 0;
+
+  result.determine_hash();
+
+  return result;
+}
+
+/*
+ *  Check if the puzzle is solvable:  that is, check the
+ *  number of inversions pluse the Manhattan distance of
+ *  the black from the lower-right corner.
+ *
+ *  Run time:   O(n^2)
+ *  Memory:     O(n)
+ */
+
+template <int N>
+bool N_puzzle<N>::solvable() const {
+  if ( !valid() ) {
+    return false;
+  }
+
+  int entries[N*N];
+
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      if ( puzzle[i][j] == 0 ) {
+        entries[N*i + j] = N*N;
+      } else {
+        entries[N*i + j] = puzzle[i][j];
+      }
+    }
+  }
+
+  int parity = 0;
+
+  for ( int i = 0; i < N*N; ++i ) {
+    for ( int j = i + 1; j < N*N; ++j ) {
+      if ( entries[i] > entries[j] ) {
+        ++parity;
+      }
+    }
+  }
+
+  parity += 2*N - 2 - zero_i - zero_j;
+
+  return ( (parity & 1) == 0 );
+}
+
+template <int N>
+bool N_puzzle<N>::valid() const {
+  return puzzle_valid;
+}
+
+/*
+ *  Return either the Manhattan, Hamming, or discrete distance
+ *  between the puzzle and the solution.
+ */
+
+template <int N>
+int N_puzzle<N>::lower_bound() const {
+  // The Manhattan distance
+  return valid() ? manhattan_distance : N*N*N;
+
+  int result = 0;
+  int count = 1;
+
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      if ( puzzle[i][j] != (count % N*N) ) {
+        ++result;
+      }
+
+      ++count;
+    }
+  }
+
+  // The Hamming distance, or
+  return result;
+
+  // The discrete distance:  converts the A* search to Dijkstra's algorithm
+  // return ( result == 0 ) ? 0 : 1;
+}
+
+/*
+ *  puzzle1 == puzzle2
+ *
+ *  Two puzzles are considered to be equal if their entries
+ *  are equal:
+ *    If either puzzle is not valid, return false.
+ *    If the hash values are different, they are different; return false.
+ *    Otherwise, check all entries to see if they are the same.
+ */
+
+template < int N >
+bool N_puzzle<N>::operator==( N_puzzle const &rhs ) const {
+  if ( !valid() || !rhs.valid() || hash() != rhs.hash() ) {
+    return false;
+  }
+
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      if ( puzzle[i][j] != rhs.puzzle[i][j] ) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+/*
+ *  puzzle1 != puzzle2
+ *
+ *  Two puzzles are considered to be unequal if any of the entries
+ *  different:
+ *    If either puzzle is not valid, return false.
+ *    If the hash values are different, they are different; return true.
+ *    Otherwise, check all entries to see if they are the same.
+ */
+
+template < int N >
+bool N_puzzle<N>::operator!=( N_puzzle const &rhs ) const {
+  if ( !valid() || !rhs.valid() ) {
+    return false;
+  }
+
+  if ( hash() != rhs.hash() ) {
+    return true;
+  }
+
+  for ( int i = 0; i < N; ++i ) {
+    for ( int j = 0; j < N; ++j ) {
+      if ( puzzle[i][j] != rhs.puzzle[i][j] ) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+/*
+ * unsigned int hash() const
+ *
+ *   Returns the pre-calculated hash value.
+ */
+
+template < int N >
+unsigned int N_puzzle<N>::hash() const {
+  return valid() ? hash_value : 0;
+}
+
+/*
+ * N_puzzle<N>  solution()
+ *
+ *   Returns the correct solution to the N puzzle:
+ *
+ *       1  2  3         1   2   3   4
+ *  3x3: 4  5  6   4x4:  5   6   7   8
+ *       7  8            9  10  11  12
+ *                      13  14  15
+ */
+
+template <int N>
+N_puzzle<N> N_puzzle<N>::solution() {
+  int array[N*N];
+
+  for ( int i = 0; i < N*N - 1; ++i ) {
+    array[i] = i + 1;
+  }
+
+  array[N*N - 1] = 0;
+
+  return N_puzzle<N>( array );
+}
+
--- a/benchmarks/micro/src/bench.c
+++ b/benchmarks/micro/src/bench.c
@ -0,0 +1,181 @@
+#include <am.h>
+#include <benchmark.h>
+#include <limits.h>
+#include <klib-macros.h>
+
+Benchmark *current;
+Setting *setting;
+
+static char *hbrk;
+
+static uint32_t uptime_ms() { return io_read(AM_TIMER_UPTIME).us / 1000; }
+
+// The benchmark list
+
+#define ENTRY(_name, _sname, _s, _m, _l, _desc) \
+  { .prepare = bench_##_name##_prepare, \
+    .run = bench_##_name##_run, \
+    .validate = bench_##_name##_validate, \
+    .name = _sname, \
+    .desc = _desc, \
+    .settings = {_s, _m, _l}, },
+
+Benchmark benchmarks[] = {
+  BENCHMARK_LIST(ENTRY)
+};
+
+// Running a benchmark
+static void bench_prepare(Result *res) {
+  res->msec = uptime_ms();
+}
+
+static void bench_reset() {
+  hbrk = (void *)ROUNDUP(heap.start, 8);
+}
+
+static void bench_done(Result *res) {
+  res->msec = uptime_ms() - res->msec;
+}
+
+static const char *bench_check(Benchmark *bench) {
+  uintptr_t freesp = (uintptr_t)heap.end - (uintptr_t)heap.start;
+  if (freesp < setting->mlim) {
+    return "(insufficient memory)";
+  }
+  return NULL;
+}
+
+static void run_once(Benchmark *b, Result *res) {
+  bench_reset();       // reset malloc state
+  current->prepare();  // call bechmark's prepare function
+  bench_prepare(res);  // clean everything, start timer
+  current->run();      // run it
+  bench_done(res);     // collect results
+  res->pass = current->validate();
+}
+
+static unsigned long score(Benchmark *b, unsigned long tsc, unsigned long msec) {
+  if (msec == 0) return 0;
+  return (REF_SCORE / 1000) * setting->ref / msec;
+}
+
+int main(const char *args) {
+  const char *setting_name = args;
+  if (args == NULL || strcmp(args, "") == 0) {
+    printf("Empty mainargs. Use \"ref\" by default\n");
+    setting_name = "ref";
+  }
+  int setting_id = -1;
+
+  if      (strcmp(setting_name, "test" ) == 0) setting_id = 0;
+  else if (strcmp(setting_name, "train") == 0) setting_id = 1;
+  else if (strcmp(setting_name, "ref"  ) == 0) setting_id = 2;
+  else {
+    printf("Invalid mainargs: \"%s\"; "
+           "must be in {test, train, ref}\n", setting_name);
+    halt(1);
+  }
+
+  ioe_init();
+
+  printf("======= Running MicroBench [input *%s*] =======\n", setting_name);
+
+  unsigned long bench_score = 0;
+  int pass = 1;
+  uint32_t t0 = uptime_ms();
+
+  for (int i = 0; i < LENGTH(benchmarks); i ++) {
+    Benchmark *bench = &benchmarks[i];
+    current = bench;
+    setting = &bench->settings[setting_id];
+    const char *msg = bench_check(bench);
+    printf("[%s] %s: ", bench->name, bench->desc);
+    if (msg != NULL) {
+      printf("Ignored %s\n", msg);
+    } else {
+      unsigned long msec = ULONG_MAX;
+      int succ = 1;
+      for (int i = 0; i < REPEAT; i ++) {
+        Result res;
+        run_once(bench, &res);
+        printf(res.pass ? "*" : "X");
+        succ &= res.pass;
+        if (res.msec < msec) msec = res.msec;
+      }
+
+      if (succ) printf(" Passed.");
+      else printf(" Failed.");
+
+      pass &= succ;
+
+      unsigned long cur = score(bench, 0, msec);
+
+      printf("\n");
+      if (setting_id != 0) {
+        printf("  min time: %d ms [%d]\n", (unsigned int)msec, (unsigned int)cur);
+      }
+
+      bench_score += cur;
+    }
+  }
+  uint32_t t1 = uptime_ms();
+
+  bench_score /= LENGTH(benchmarks);
+
+  printf("==================================================\n");
+  printf("MicroBench %s", pass ? "PASS" : "FAIL");
+  if (setting_id == 2) {
+    printf("        %d Marks\n", (unsigned int)bench_score);
+    printf("                   vs. %d Marks (%s)\n", REF_SCORE, REF_CPU);
+  } else {
+    printf("\n");
+  }
+  printf("Total time: %d ms\n", t1 - t0);
+  return 0;
+}
+
+// Libraries
+
+void* bench_alloc(size_t size) {
+  size  = (size_t)ROUNDUP(size, 8);
+  char *old = hbrk;
+  hbrk += size;
+  assert((uintptr_t)heap.start <= (uintptr_t)hbrk && (uintptr_t)hbrk < (uintptr_t)heap.end);
+  for (uint64_t *p = (uint64_t *)old; p != (uint64_t *)hbrk; p ++) {
+    *p = 0;
+  }
+  assert((uintptr_t)hbrk - (uintptr_t)heap.start <= setting->mlim);
+  return old;
+}
+
+void bench_free(void *ptr) {
+}
+
+static uint32_t seed = 1;
+
+void bench_srand(uint32_t _seed) {
+  seed = _seed & 0x7fff;
+}
+
+uint32_t bench_rand() {
+  seed = (seed * (uint32_t)214013L + (uint32_t)2531011L);
+  return (seed >> 16) & 0x7fff;
+}
+
+// FNV hash
+uint32_t checksum(void *start, void *end) {
+  const uint32_t x = 16777619;
+  uint32_t h1 = 2166136261u;
+  for (uint8_t *p = (uint8_t*)start; p + 4 < (uint8_t*)end; p += 4) {
+    for (int i = 0; i < 4; i ++) {
+      h1 = (h1 ^ p[i]) * x;
+    }
+  }
+  int32_t hash = (uint32_t)h1;
+  hash += hash << 13;
+  hash ^= hash >> 7;
+  hash += hash << 3;
+  hash ^= hash >> 17;
+  hash += hash << 5;
+  return hash;
+}
--- a/benchmarks/micro/src/bf/bf.c
+++ b/benchmarks/micro/src/bf/bf.c
@ -0,0 +1,151 @@
+/*
+ Brainfuck-C ( http://github.com/kgabis/brainfuck-c )
+ Copyright (c) 2012 Krzysztof Gabis
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+#include <benchmark.h>
+
+static int ARR_SIZE;
+
+#define CODE            ">>+>>>>>,[>+>>,]>+[--[+<<<-]<[<+>-]<[<[->[<<<+>>>>+<-]<<[>>+>[->]<<[<]" \
+                        "<-]>]>>>+<[[-]<[>+<-]<]>[[>>>]+<<<-<[<<[<<<]>>+>[>>>]<-]<<[<<<]>[>>[>>" \
+                        ">]<+<<[<<<]>-]]+<<<]+[->>>]>>]>>[.>>>]"
+
+#define OP_END          0
+#define OP_INC_DP       1
+#define OP_DEC_DP       2
+#define OP_INC_VAL      3
+#define OP_DEC_VAL      4
+#define OP_OUT          5
+#define OP_IN           6
+#define OP_JMP_FWD      7
+#define OP_JMP_BCK      8
+
+#define SUCCESS         0
+#define FAILURE         1
+
+#define PROGRAM_SIZE    4096
+#define STACK_SIZE      512
+#define DATA_SIZE       4096
+
+#define STACK_PUSH(A)   (STACK[SP++] = A)
+#define STACK_POP()     (STACK[--SP])
+#define STACK_EMPTY()   (SP == 0)
+#define STACK_FULL()    (SP == STACK_SIZE)
+
+struct instruction_t {
+  unsigned short operator;
+  unsigned short operand;
+};
+
+static struct instruction_t *PROGRAM;
+static unsigned short *STACK;
+static unsigned int SP;
+static const char *code;
+static char *input;
+
+static int compile_bf() {
+  unsigned short pc = 0, jmp_pc;
+  for (; *code; code ++) {
+    int c = *code;
+    if (pc >= PROGRAM_SIZE) break;
+    switch (c) {
+      case '>': PROGRAM[pc].operator = OP_INC_DP; break;
+      case '<': PROGRAM[pc].operator = OP_DEC_DP; break;
+      case '+': PROGRAM[pc].operator = OP_INC_VAL; break;
+      case '-': PROGRAM[pc].operator = OP_DEC_VAL; break;
+      case '.': PROGRAM[pc].operator = OP_OUT; break;
+      case ',': PROGRAM[pc].operator = OP_IN; break;
+      case '[':
+        PROGRAM[pc].operator = OP_JMP_FWD;
+        if (STACK_FULL()) {
+          return FAILURE;
+        }
+        STACK_PUSH(pc);
+        break;
+      case ']':
+        if (STACK_EMPTY()) {
+          return FAILURE;
+        }
+        jmp_pc = STACK_POP();
+        PROGRAM[pc].operator = OP_JMP_BCK;
+        PROGRAM[pc].operand = jmp_pc;
+        PROGRAM[jmp_pc].operand = pc;
+        break;
+      default: pc--; break;
+    }
+    pc++;
+  }
+  if (!STACK_EMPTY() || pc == PROGRAM_SIZE) {
+    return FAILURE;
+  }
+  PROGRAM[pc].operator = OP_END;
+  return SUCCESS;
+}
+
+static unsigned short *data;
+static char *output;
+static int noutput;
+
+static void execute_bf() {
+  unsigned int pc = 0, ptr = 0;
+  while (PROGRAM[pc].operator != OP_END && ptr < DATA_SIZE) {
+    switch (PROGRAM[pc].operator) {
+      case OP_INC_DP: ptr++; break;
+      case OP_DEC_DP: ptr--; break;
+      case OP_INC_VAL: data[ptr]++; break;
+      case OP_DEC_VAL: data[ptr]--; break;
+      case OP_OUT: output[noutput ++] = data[ptr]; break;
+      case OP_IN: data[ptr] = *(input ++); break;
+      case OP_JMP_FWD: if(!data[ptr]) { pc = PROGRAM[pc].operand; } break;
+      case OP_JMP_BCK: if(data[ptr]) { pc = PROGRAM[pc].operand; } break;
+      default: return;
+    }
+    pc++;
+  }
+}
+
+void bench_bf_prepare() {
+  ARR_SIZE = setting->size;
+  SP = 0;
+  PROGRAM = bench_alloc(sizeof(PROGRAM[0]) * PROGRAM_SIZE);
+  STACK = bench_alloc(sizeof(STACK[0]) * STACK_SIZE);
+  data = bench_alloc(sizeof(data[0]) * DATA_SIZE);
+  code = CODE;
+  input = bench_alloc(ARR_SIZE + 1);
+  output = bench_alloc(DATA_SIZE);
+  noutput = 0;
+
+  bench_srand(1);
+  for (int i = 0; i < ARR_SIZE; i ++) {
+    input[i] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"[bench_rand() % 62];
+  }
+}
+
+void bench_bf_run() {
+  compile_bf();
+  execute_bf();
+}
+
+int bench_bf_validate() {
+  uint32_t cs = checksum(output, output + noutput);
+  return noutput == ARR_SIZE && cs == setting->checksum;
+}
--- a/benchmarks/micro/src/dinic/dinic.cc
+++ b/benchmarks/micro/src/dinic/dinic.cc
@ -0,0 +1,138 @@
+#include <benchmark.h>
+
+static int N;
+const int INF = 0x3f3f3f;
+
+struct Edge {
+  int from, to, cap, flow;
+  Edge(){}
+  Edge(int from, int to, int cap, int flow) {
+    this->from = from;
+    this->to = to;
+    this->cap = cap;
+    this->flow = flow;
+  }
+};
+
+template<typename T>
+static inline T min(T x, T y) {
+  return x < y ? x : y;
+}
+
+struct Dinic {
+  int n, m, s, t;
+  Edge *edges;
+  int *head, *nxt, *d, *cur, *queue;
+  bool *vis;
+
+  void init(int n) {
+    int nold = (n - 2) / 2;
+    int maxm = (nold * nold + nold * 2) * 2;
+
+    edges = (Edge *)bench_alloc(sizeof(Edge) * maxm);
+    head = (int *)bench_alloc(sizeof(int) * n);
+    nxt = (int *)bench_alloc(sizeof(int) * maxm);
+    vis = (bool *)bench_alloc(sizeof(bool) * n);
+    d = (int *)bench_alloc(sizeof(int) * n);
+    cur = (int *)bench_alloc(sizeof(int) * n);
+    queue = (int *)bench_alloc(sizeof(int) * n);
+
+    this->n = n;
+    for (int i = 0; i < n; i ++) {
+      head[i] = -1;
+    }
+    m = 0;
+  }
+
+  void AddEdge(int u, int v, int c) {
+    if (c == 0) return;
+    edges[m] = Edge(u, v, c, 0);
+    nxt[m] = head[u];
+    head[u] = m++;
+    edges[m] = Edge(v, u, 0, 0);
+    nxt[m] = head[v];
+    head[v] = m++;
+  }
+
+  bool BFS() {
+    for (int i = 0; i < n; i ++) vis[i] = 0;
+    int qf = 0, qr = 0;
+    queue[qr ++] = s;
+    d[s] = 0;
+    vis[s] = 1;
+    while (qf != qr) {
+      int x = queue[qf ++];
+      for (int i = head[x]; i != -1; i = nxt[i]) {
+        Edge& e = edges[i];
+        if (!vis[e.to] && e.cap > e.flow) {
+          vis[e.to] = 1;
+          d[e.to] = d[x] + 1;
+          queue[qr ++] = e.to;
+        }
+      }
+    }
+    return vis[t];
+  }
+
+  int DFS(int x, int a) {
+    if (x == t || a == 0) return a;
+    int flow = 0, f;
+    for (int i = cur[x]; i != -1; i = nxt[i]) {
+      Edge& e = edges[i];
+      if (d[x] + 1 == d[e.to] && (f = DFS(e.to, min(a, e.cap-e.flow))) > 0) {
+        e.flow += f;
+        edges[i^1].flow -= f;
+        flow += f;
+        a -= f;
+        if (a == 0) break;
+      }
+    }
+    return flow;
+  }
+
+  int Maxflow(int s, int t) {
+    this -> s = s; this -> t = t;
+    int flow = 0;
+    while (BFS()) {
+      for (int i = 0; i < n; i++)
+        cur[i] = head[i];
+      flow += DFS(s, INF);
+    }
+    return flow;
+  }
+};
+
+
+extern "C" {
+
+
+static Dinic *G;
+static int ans;
+
+void bench_dinic_prepare() {
+  N = setting->size;
+  bench_srand(1);
+  int s = 2 * N, t = 2 * N + 1;
+  G = (Dinic*)bench_alloc(sizeof(Dinic));
+  G->init(2 * N + 2);
+  for (int i = 0; i < N; i ++)
+    for (int j = 0; j < N; j ++) {
+      G->AddEdge(i, N + j, bench_rand() % 10);
+    }
+
+  for (int i = 0; i < N; i ++) {
+    G->AddEdge(s, i, bench_rand() % 1000);
+    G->AddEdge(N + i, t, bench_rand() % 1000);
+  }
+}
+
+void bench_dinic_run() {
+  ans = G->Maxflow(2 * N, 2 * N + 1);
+}
+
+int bench_dinic_validate() {
+  return (uint32_t)ans == setting->checksum;
+}
+}
+
+
--- a/benchmarks/micro/src/fib/fib.c
+++ b/benchmarks/micro/src/fib/fib.c
@ -0,0 +1,64 @@
+#include <benchmark.h>
+
+// f(n) = (f(n-1) + f(n-2) + .. f(n-m)) mod 2^32
+
+#define N 2147483603
+static int M;
+
+static void put(uint32_t *m, int i, int j, uint32_t data) {
+  m[i * M + j] = data;
+}
+
+static uint32_t get(uint32_t *m, int i, int j) {
+  return m[i * M + j];
+}
+
+static inline void mult(uint32_t *c, uint32_t *a, uint32_t *b) {
+  for (int i = 0; i < M; i ++)
+    for (int j = 0; j < M; j ++) {
+      put(c, i, j, 0);
+      for (int k = 0; k < M; k ++) {
+        put(c, i, j, get(c, i, j) + get(a, i, k) * get(b, k, j));
+      }
+    }
+}
+
+static inline void assign(uint32_t *a, uint32_t *b) {
+  for (int i = 0; i < M; i ++)
+    for (int j = 0; j < M; j ++)
+      put(a, i, j, get(b, i, j));
+}
+
+static uint32_t *A, *ans, *T, *tmp;
+
+void bench_fib_prepare() {
+  M = setting->size;
+  int sz = sizeof(uint32_t) * M * M;
+  A = bench_alloc(sz);
+  T = bench_alloc(sz);
+  ans = bench_alloc(sz);
+  tmp = bench_alloc(sz);
+}
+
+void bench_fib_run() {
+  for (int i = 0; i < M; i ++)
+    for (int j = 0; j < M; j ++) {
+      uint32_t x = (i == M - 1 || j == i + 1);
+      put(A, i, j, x);
+      put(T, i, j, x);
+      put(ans, i, j, i == j);
+    }
+
+  for (int n = N; n > 0; n >>= 1) {
+    if (n & 1) {
+      mult(tmp, ans, T);
+      assign(ans, tmp);
+    }
+    mult(tmp, T, T);
+    assign(T, tmp);
+  }
+}
+
+int bench_fib_validate() {
+  return get(ans, M-1, M-1) == setting->checksum;
+}
--- a/benchmarks/micro/src/lzip/lzip.c
+++ b/benchmarks/micro/src/lzip/lzip.c
@ -0,0 +1,29 @@
+#include "quicklz.h"
+#include <benchmark.h>
+
+static int SIZE;
+
+static qlz_state_compress *state;
+static char *blk;
+static char *compress;
+static int len;
+
+void bench_lzip_prepare() {
+  SIZE = setting->size;
+  bench_srand(1);
+  state = bench_alloc(sizeof(qlz_state_compress));
+  blk = bench_alloc(SIZE);
+  compress = bench_alloc(SIZE + 400);
+  for (int i = 0; i < SIZE; i ++) {
+    blk[i] = 'a' + bench_rand() % 26;
+  }
+}
+
+void bench_lzip_run() {
+  len = qlz_compress(blk, compress, SIZE, state);
+}
+
+int bench_lzip_validate() {
+  return checksum(compress, compress + len) == setting->checksum;
+}
+
--- a/benchmarks/micro/src/lzip/quicklz.c
+++ b/benchmarks/micro/src/lzip/quicklz.c
@ -0,0 +1,761 @@
+// Fast data compression library
+// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
+// lar@quicklz.com
+//
+// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
+// released into public must be open source) or under a commercial license if such
+// has been acquired (see http://www.quicklz.com/order.html). The commercial license
+// does not cover derived or ported versions created by third parties under GPL.
+
+// 1.5.0 final
+
+#include "quicklz.h"
+
+#if QLZ_VERSION_MAJOR != 1 || QLZ_VERSION_MINOR != 5 || QLZ_VERSION_REVISION != 0
+	#error quicklz.c and quicklz.h have different versions
+#endif
+
+#define MINOFFSET 2
+#define UNCONDITIONAL_MATCHLEN 6
+#define UNCOMPRESSED_END 4
+#define CWORD_LEN 4
+
+#if QLZ_COMPRESSION_LEVEL == 1 && defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
+	#define OFFSET_BASE source
+	#define CAST (ui32)(size_t)
+#else
+	#define OFFSET_BASE 0
+	#define CAST
+#endif
+
+int qlz_get_setting(int setting)
+{
+	switch (setting)
+	{
+		case 0: return QLZ_COMPRESSION_LEVEL;
+		case 1: return sizeof(qlz_state_compress);
+		case 2: return sizeof(qlz_state_decompress);
+		case 3: return QLZ_STREAMING_BUFFER;
+#ifdef QLZ_MEMORY_SAFE
+		case 6: return 1;
+#else
+		case 6: return 0;
+#endif
+		case 7: return QLZ_VERSION_MAJOR;
+		case 8: return QLZ_VERSION_MINOR;
+		case 9: return QLZ_VERSION_REVISION;
+	}
+	return -1;
+}
+
+#if QLZ_COMPRESSION_LEVEL == 1
+static int same(const unsigned char *src, size_t n)
+{
+	while(n > 0 && *(src + n) == *src)
+		n--;
+	return n == 0 ? 1 : 0;
+}
+#endif
+
+static void reset_table_compress(qlz_state_compress *state)
+{
+	int i;
+	for(i = 0; i < QLZ_HASH_VALUES; i++)
+	{
+#if QLZ_COMPRESSION_LEVEL == 1
+		state->hash[i].offset = 0;
+#else
+		state->hash_counter[i] = 0;
+#endif
+	}
+}
+
+static void reset_table_decompress(qlz_state_decompress *state)
+{
+	int i;
+	(void)state;
+	(void)i;
+#if QLZ_COMPRESSION_LEVEL == 2
+	for(i = 0; i < QLZ_HASH_VALUES; i++)
+	{
+		state->hash_counter[i] = 0;
+	}
+#endif
+}
+
+static __inline ui32 hash_func(ui32 i)
+{
+#if QLZ_COMPRESSION_LEVEL == 2
+	return ((i >> 9) ^ (i >> 13) ^ i) & (QLZ_HASH_VALUES - 1);
+#else
+	return ((i >> 12) ^ i) & (QLZ_HASH_VALUES - 1);
+#endif
+}
+
+static __inline ui32 fast_read(void const *src, ui32 bytes)
+{
+  uint32_t ret = 0;
+	if (bytes >= 1 && bytes <= 4) {
+    for (uint32_t i = 0; i < bytes; i ++) {
+      ret |= ((uint8_t*)src)[i] << (i * 8);
+    }
+  }
+  return ret;
+}
+
+static __inline ui32 hashat(const unsigned char *src)
+{
+	ui32 fetch, hash;
+	fetch = fast_read(src, 3);
+	hash = hash_func(fetch);
+	return hash;
+}
+
+static __inline void fast_write(ui32 f, void *dst, size_t bytes)
+{
+  for (size_t i = 0; i != bytes; i ++) {
+    ((char*)dst)[i] = ((char*)&f)[i];
+  }
+}
+
+
+size_t qlz_size_decompressed(const char *source)
+{
+	ui32 n, r;
+	n = (((*source) & 2) == 2) ? 4 : 1;
+	r = fast_read(source + 1 + n, n);
+	r = r & (0xffffffff >> ((4 - n)*8));
+	return r;
+}
+
+size_t qlz_size_compressed(const char *source)
+{
+	ui32 n, r;
+	n = (((*source) & 2) == 2) ? 4 : 1;
+	r = fast_read(source + 1, n);
+	r = r & (0xffffffff >> ((4 - n)*8));
+	return r;
+}
+
+size_t qlz_size_header(const char *source)
+{
+	size_t n = 2*((((*source) & 2) == 2) ? 4 : 1) + 1;
+	return n;
+}
+
+
+static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui32 n)
+{
+  assert(0); // unaligned memory access
+}
+
+static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s)
+{
+#if QLZ_COMPRESSION_LEVEL == 1
+	ui32 hash;
+	hash = hashat(s);
+	state->hash[hash].offset = s;
+	state->hash_counter[hash] = 1;
+#elif QLZ_COMPRESSION_LEVEL == 2
+	ui32 hash;
+	unsigned char c;
+	hash = hashat(s);
+	c = state->hash_counter[hash];
+	state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = s;
+	c++;
+	state->hash_counter[hash] = c;
+#endif
+	(void)state;
+	(void)s;
+}
+
+#if QLZ_COMPRESSION_LEVEL <= 2
+static void update_hash_upto(qlz_state_decompress *state, unsigned char **lh, const unsigned char *max)
+{
+	while(*lh < max)
+	{
+		(*lh)++;
+		update_hash(state, *lh);
+	}
+}
+#endif
+
+static size_t qlz_compress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_compress *state)
+{
+	const unsigned char *last_byte = source + size - 1;
+	const unsigned char *src = source;
+	unsigned char *cword_ptr = destination;
+	unsigned char *dst = destination + CWORD_LEN;
+	ui32 cword_val = 1U << 31;
+	const unsigned char *last_matchstart = last_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
+	ui32 fetch = 0;
+	unsigned int lits = 0;
+
+	(void) lits;
+
+	if(src <= last_matchstart)
+		fetch = fast_read(src, 3);
+
+	while(src <= last_matchstart)
+	{
+		if ((cword_val & 1) == 1)
+		{
+			// store uncompressed if compression ratio is too low
+			if (src > source + (size >> 1) && dst - destination > src - source - ((src - source) >> 5))
+				return 0;
+
+			fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
+
+			cword_ptr = dst;
+			dst += CWORD_LEN;
+			cword_val = 1U << 31;
+			fetch = fast_read(src, 3);
+		}
+#if QLZ_COMPRESSION_LEVEL == 1
+		{
+			const unsigned char *o;
+			ui32 hash, cached;
+
+			hash = hash_func(fetch);
+			cached = fetch ^ state->hash[hash].cache;
+			state->hash[hash].cache = fetch;
+
+			o = state->hash[hash].offset + OFFSET_BASE;
+			state->hash[hash].offset = CAST(src - OFFSET_BASE);
+
+			if (cached == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6))))
+			{
+				if (*(o + 3) != *(src + 3))
+				{
+					hash <<= 4;
+					cword_val = (cword_val >> 1) | (1U << 31);
+					fast_write((3 - 2) | hash, dst, 2);
+					src += 3;
+					dst += 2;
+				}
+				else
+				{
+					const unsigned char *old_src = src;
+					size_t matchlen;
+					hash <<= 4;
+
+					cword_val = (cword_val >> 1) | (1U << 31);
+					src += 4;
+
+					if(*(o + (src - old_src)) == *src)
+					{
+						src++;
+						if(*(o + (src - old_src)) == *src)
+						{
+							size_t q = last_byte - UNCOMPRESSED_END - (src - 5) + 1;
+							size_t remaining = q > 255 ? 255 : q;
+							src++;
+							while(*(o + (src - old_src)) == *src && (size_t)(src - old_src) < remaining)
+								src++;
+						}
+					}
+
+					matchlen = src - old_src;
+					if (matchlen < 18)
+					{
+						fast_write((ui32)(matchlen - 2) | hash, dst, 2);
+						dst += 2;
+					}
+					else
+					{
+						fast_write((ui32)(matchlen << 16) | hash, dst, 3);
+						dst += 3;
+					}
+				}
+				fetch = fast_read(src, 3);
+				lits = 0;
+			}
+			else
+			{
+				lits++;
+				*dst = *src;
+				src++;
+				dst++;
+				cword_val = (cword_val >> 1);
+				fetch = (fetch >> 8 & 0xffff) | (*(src + 2) << 16);
+			}
+		}
+#elif QLZ_COMPRESSION_LEVEL >= 2
+		{
+			const unsigned char *o, *offset2;
+			ui32 hash, matchlen, k, m, best_k = 0;
+			unsigned char c;
+			size_t remaining = (last_byte - UNCOMPRESSED_END - src + 1) > 255 ? 255 : (last_byte - UNCOMPRESSED_END - src + 1);
+			(void)best_k;
+
+
+			//hash = hashat(src);
+			fetch = fast_read(src, 3);
+			hash = hash_func(fetch);
+
+			c = state->hash_counter[hash];
+
+			offset2 = state->hash[hash].offset[0];
+			if(offset2 < src - MINOFFSET && c > 0 && ((fast_read(offset2, 3) ^ fetch) & 0xffffff) == 0)
+			{
+				matchlen = 3;
+				if(*(offset2 + matchlen) == *(src + matchlen))
+				{
+					matchlen = 4;
+					while(*(offset2 + matchlen) == *(src + matchlen) && matchlen < remaining)
+						matchlen++;
+				}
+			}
+			else
+				matchlen = 0;
+			for(k = 1; k < QLZ_POINTERS && c > k; k++)
+			{
+				o = state->hash[hash].offset[k];
+#if QLZ_COMPRESSION_LEVEL == 3
+				if(((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
+#elif QLZ_COMPRESSION_LEVEL == 2
+				if(*(src + matchlen) == *(o + matchlen)	&& ((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
+#endif
+				{
+					m = 3;
+					while(*(o + m) == *(src + m) && m < remaining)
+						m++;
+#if QLZ_COMPRESSION_LEVEL == 3
+					if ((m > matchlen) || (m == matchlen && o > offset2))
+#elif QLZ_COMPRESSION_LEVEL == 2
+					if (m > matchlen)
+#endif
+					{
+						offset2 = o;
+						matchlen = m;
+						best_k = k;
+					}
+				}
+			}
+			o = offset2;
+			state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
+			c++;
+			state->hash_counter[hash] = c;
+
+#if QLZ_COMPRESSION_LEVEL == 3
+			if(matchlen > 2 && src - o < 131071)
+			{
+				ui32 u;
+				size_t offset = src - o;
+
+				for(u = 1; u < matchlen; u++)
+				{
+					hash = hashat(src + u);
+					c = state->hash_counter[hash]++;
+					state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src + u;
+				}
+
+				cword_val = (cword_val >> 1) | (1U << 31);
+				src += matchlen;
+
+				if(matchlen == 3 && offset <= 63)
+				{
+					*dst = (unsigned char)(offset << 2);
+					dst++;
+				}
+				else if (matchlen == 3 && offset <= 16383)
+				{
+					ui32 f = (ui32)((offset << 2) | 1);
+					fast_write(f, dst, 2);
+					dst += 2;
+				}
+				else if (matchlen <= 18 && offset <= 1023)
+				{
+					ui32 f = ((matchlen - 3) << 2) | ((ui32)offset << 6) | 2;
+					fast_write(f, dst, 2);
+					dst += 2;
+				}
+
+				else if(matchlen <= 33)
+				{
+					ui32 f = ((matchlen - 2) << 2) | ((ui32)offset << 7) | 3;
+					fast_write(f, dst, 3);
+					dst += 3;
+				}
+				else
+				{
+					ui32 f = ((matchlen - 3) << 7) | ((ui32)offset << 15) | 3;
+					fast_write(f, dst, 4);
+					dst += 4;
+				}
+			}
+			else
+			{
+				*dst = *src;
+				src++;
+				dst++;
+				cword_val = (cword_val >> 1);
+			}
+#elif QLZ_COMPRESSION_LEVEL == 2
+
+			if(matchlen > 2)
+			{
+				cword_val = (cword_val >> 1) | (1U << 31);
+				src += matchlen;
+
+				if (matchlen < 10)
+				{
+					ui32 f = best_k | ((matchlen - 2) << 2) | (hash << 5);
+					fast_write(f, dst, 2);
+					dst += 2;
+				}
+				else
+				{
+					ui32 f = best_k | (matchlen << 16) | (hash << 5);
+					fast_write(f, dst, 3);
+					dst += 3;
+				}
+			}
+			else
+			{
+				*dst = *src;
+				src++;
+				dst++;
+				cword_val = (cword_val >> 1);
+			}
+#endif
+		}
+#endif
+	}
+	while (src <= last_byte)
+	{
+		if ((cword_val & 1) == 1)
+		{
+			fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
+			cword_ptr = dst;
+			dst += CWORD_LEN;
+			cword_val = 1U << 31;
+		}
+#if QLZ_COMPRESSION_LEVEL < 3
+		if (src <= last_byte - 3)
+		{
+#if QLZ_COMPRESSION_LEVEL == 1
+			ui32 hash, fetch;
+			fetch = fast_read(src, 3);
+			hash = hash_func(fetch);
+			state->hash[hash].offset = CAST(src - OFFSET_BASE);
+			state->hash[hash].cache = fetch;
+#elif QLZ_COMPRESSION_LEVEL == 2
+			ui32 hash;
+			unsigned char c;
+			hash = hashat(src);
+			c = state->hash_counter[hash];
+			state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
+			c++;
+			state->hash_counter[hash] = c;
+#endif
+		}
+#endif
+		*dst = *src;
+		src++;
+		dst++;
+		cword_val = (cword_val >> 1);
+	}
+
+	while((cword_val & 1) != 1)
+		cword_val = (cword_val >> 1);
+
+	fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
+
+	// min. size must be 9 bytes so that the qlz_size functions can take 9 bytes as argument
+	return dst - destination < 9 ? 9 : dst - destination;
+}
+
+static size_t qlz_decompress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_decompress *state, const unsigned char *history)
+{
+	const unsigned char *src = source + qlz_size_header((const char *)source);
+	unsigned char *dst = destination;
+	const unsigned char *last_destination_byte = destination + size - 1;
+	ui32 cword_val = 1;
+	const unsigned char *last_matchstart = last_destination_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
+	unsigned char *last_hashed = destination - 1;
+	const unsigned char *last_source_byte = source + qlz_size_compressed((const char *)source) - 1;
+	static const ui32 bitlut[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
+
+	(void) last_source_byte;
+	(void) last_hashed;
+	(void) state;
+	(void) history;
+
+	for(;;)
+	{
+		ui32 fetch;
+
+		if (cword_val == 1)
+		{
+#ifdef QLZ_MEMORY_SAFE
+			if(src + CWORD_LEN - 1 > last_source_byte)
+				return 0;
+#endif
+			cword_val = fast_read(src, CWORD_LEN);
+			src += CWORD_LEN;
+		}
+
+#ifdef QLZ_MEMORY_SAFE
+			if(src + 4 - 1 > last_source_byte)
+				return 0;
+#endif
+
+		fetch = fast_read(src, 4);
+
+		if ((cword_val & 1) == 1)
+		{
+			ui32 matchlen;
+			const unsigned char *offset2;
+
+#if QLZ_COMPRESSION_LEVEL == 1
+			ui32 hash;
+			cword_val = cword_val >> 1;
+			hash = (fetch >> 4) & 0xfff;
+			offset2 = (const unsigned char *)(size_t)state->hash[hash].offset;
+
+			if((fetch & 0xf) != 0)
+			{
+				matchlen = (fetch & 0xf) + 2;
+				src += 2;
+			}
+			else
+			{
+				matchlen = *(src + 2);
+				src += 3;
+			}
+
+#elif QLZ_COMPRESSION_LEVEL == 2
+			ui32 hash;
+			unsigned char c;
+			cword_val = cword_val >> 1;
+			hash = (fetch >> 5) & 0x7ff;
+			c = (unsigned char)(fetch & 0x3);
+			offset2 = state->hash[hash].offset[c];
+
+			if((fetch & (28)) != 0)
+			{
+				matchlen = ((fetch >> 2) & 0x7) + 2;
+				src += 2;
+			}
+			else
+			{
+				matchlen = *(src + 2);
+				src += 3;
+			}
+
+#elif QLZ_COMPRESSION_LEVEL == 3
+			ui32 offset;
+			cword_val = cword_val >> 1;
+			if ((fetch & 3) == 0)
+			{
+				offset = (fetch & 0xff) >> 2;
+				matchlen = 3;
+				src++;
+			}
+			else if ((fetch & 2) == 0)
+			{
+				offset = (fetch & 0xffff) >> 2;
+				matchlen = 3;
+				src += 2;
+			}
+			else if ((fetch & 1) == 0)
+			{
+				offset = (fetch & 0xffff) >> 6;
+				matchlen = ((fetch >> 2) & 15) + 3;
+				src += 2;
+			}
+			else if ((fetch & 127) != 3)
+			{
+				offset = (fetch >> 7) & 0x1ffff;
+				matchlen = ((fetch >> 2) & 0x1f) + 2;
+				src += 3;
+			}
+			else
+			{
+				offset = (fetch >> 15);
+				matchlen = ((fetch >> 7) & 255) + 3;
+				src += 4;
+			}
+
+			offset2 = dst - offset;
+#endif
+
+#ifdef QLZ_MEMORY_SAFE
+			if(offset2 < history || offset2 > dst - MINOFFSET - 1)
+				return 0;
+
+			if(matchlen > (ui32)(last_destination_byte - dst - UNCOMPRESSED_END + 1))
+				return 0;
+#endif
+
+			memcpy_up(dst, offset2, matchlen);
+			dst += matchlen;
+
+#if QLZ_COMPRESSION_LEVEL <= 2
+			update_hash_upto(state, &last_hashed, dst - matchlen);
+			last_hashed = dst - 1;
+#endif
+		}
+		else
+		{
+			if (dst < last_matchstart)
+			{
+				unsigned int n = bitlut[cword_val & 0xf];
+				memcpy_up(dst, src, 4);
+				cword_val = cword_val >> n;
+				dst += n;
+				src += n;
+#if QLZ_COMPRESSION_LEVEL <= 2
+				update_hash_upto(state, &last_hashed, dst - 3);
+#endif
+			}
+			else
+			{
+				while(dst <= last_destination_byte)
+				{
+					if (cword_val == 1)
+					{
+						src += CWORD_LEN;
+						cword_val = 1U << 31;
+					}
+#ifdef QLZ_MEMORY_SAFE
+					if(src >= last_source_byte + 1)
+						return 0;
+#endif
+					*dst = *src;
+					dst++;
+					src++;
+					cword_val = cword_val >> 1;
+				}
+
+#if QLZ_COMPRESSION_LEVEL <= 2
+				update_hash_upto(state, &last_hashed, last_destination_byte - 3); // todo, use constant
+#endif
+				return size;
+			}
+
+		}
+	}
+}
+
+size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state)
+{
+	size_t r;
+	ui32 compressed;
+	size_t base;
+
+	if(size == 0 || size > 0xffffffff - 400)
+		return 0;
+
+	if(size < 216)
+		base = 3;
+	else
+		base = 9;
+
+#if QLZ_STREAMING_BUFFER > 0
+	if (state->stream_counter + size - 1 >= QLZ_STREAMING_BUFFER)
+#endif
+	{
+		reset_table_compress(state);
+		r = base + qlz_compress_core((const unsigned char *)source, (unsigned char*)destination + base, size, state);
+#if QLZ_STREAMING_BUFFER > 0
+		reset_table_compress(state);
+#endif
+		if(r == base)
+		{
+			bench_memcpy(destination + base, source, size);
+			r = size + base;
+			compressed = 0;
+		}
+		else
+		{
+			compressed = 1;
+		}
+		state->stream_counter = 0;
+	}
+#if QLZ_STREAMING_BUFFER > 0
+	else
+	{
+		unsigned char *src = state->stream_buffer + state->stream_counter;
+
+		bench_memcpy(src, source, size);
+		r = base + qlz_compress_core(src, (unsigned char*)destination + base, size, state);
+
+ 		if(r == base)
+		{
+			bench_memcpy(destination + base, src, size);
+			r = size + base;
+			compressed = 0;
+			reset_table_compress(state);
+		}
+		else
+		{
+			compressed = 1;
+		}
+		state->stream_counter += size;
+	}
+#endif
+	if(base == 3)
+	{
+		*destination = (unsigned char)(0 | compressed);
+		*(destination + 1) = (unsigned char)r;
+		*(destination + 2) = (unsigned char)size;
+	}
+	else
+	{
+		*destination = (unsigned char)(2 | compressed);
+		fast_write((ui32)r, destination + 1, 4);
+		fast_write((ui32)size, destination + 5, 4);
+	}
+
+	*destination |= (QLZ_COMPRESSION_LEVEL << 2);
+	*destination |= (1 << 6);
+	*destination |= ((QLZ_STREAMING_BUFFER == 0 ? 0 : (QLZ_STREAMING_BUFFER == 100000 ? 1 : (QLZ_STREAMING_BUFFER == 1000000 ? 2 : 3))) << 4);
+
+// 76543210
+// 01SSLLHC
+
+	return r;
+}
+
+size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state)
+{
+	size_t dsiz = qlz_size_decompressed(source);
+
+#if QLZ_STREAMING_BUFFER > 0
+	if (state->stream_counter + qlz_size_decompressed(source) - 1 >= QLZ_STREAMING_BUFFER)
+#endif
+	{
+		if((*source & 1) == 1)
+		{
+			reset_table_decompress(state);
+			dsiz = qlz_decompress_core((const unsigned char *)source, (unsigned char *)destination, dsiz, state, (const unsigned char *)destination);
+		}
+		else
+		{
+			bench_memcpy(destination, source + qlz_size_header(source), dsiz);
+		}
+		state->stream_counter = 0;
+		reset_table_decompress(state);
+	}
+#if QLZ_STREAMING_BUFFER > 0
+	else
+	{
+		unsigned char *dst = state->stream_buffer + state->stream_counter;
+		if((*source & 1) == 1)
+		{
+			dsiz = qlz_decompress_core((const unsigned char *)source, dst, dsiz, state, (const unsigned char *)state->stream_buffer);
+		}
+		else
+		{
+			bench_memcpy(dst, source + qlz_size_header(source), dsiz);
+			reset_table_decompress(state);
+		}
+		bench_memcpy(destination, dst, dsiz);
+		state->stream_counter += dsiz;
+	}
+#endif
+	return dsiz;
+}
+
--- a/benchmarks/micro/src/lzip/quicklz.h
+++ b/benchmarks/micro/src/lzip/quicklz.h
@ -0,0 +1,164 @@
+#ifndef QLZ_HEADER
+#define QLZ_HEADER
+
+#include <am.h>
+#include <klib.h>
+
+static inline void* bench_memcpy(void* dst, const void* src, size_t n){
+  assert(dst&&src);
+  const char* s;
+  char* d;
+  if(src+n>dst&&src<dst){
+    s=src+n;
+    d=dst+n;
+    while(n-->0)*--d=*--s;
+  }
+  else{
+    s=src;
+    d=dst;
+    while(n-->0)*d++=*s++;
+  }
+  return dst;
+}
+
+
+// Fast data compression library
+// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
+// lar@quicklz.com
+//
+// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
+// released into public must be open source) or under a commercial license if such
+// has been acquired (see http://www.quicklz.com/order.html). The commercial license
+// does not cover derived or ported versions created by third parties under GPL.
+
+// You can edit following user settings. Data must be decompressed with the same
+// setting of QLZ_COMPRESSION_LEVEL and QLZ_STREAMING_BUFFER as it was compressed
+// (see manual). If QLZ_STREAMING_BUFFER > 0, scratch buffers must be initially
+// zeroed out (see manual). First #ifndef makes it possible to define settings from
+// the outside like the compiler command line.
+
+// 1.5.0 final
+
+#ifndef QLZ_COMPRESSION_LEVEL
+
+	// 1 gives fastest compression speed. 3 gives fastest decompression speed and best
+	// compression ratio.
+	//#define QLZ_COMPRESSION_LEVEL 1
+	//#define QLZ_COMPRESSION_LEVEL 2
+	//#define QLZ_COMPRESSION_LEVEL 3
+	#define QLZ_COMPRESSION_LEVEL 2
+
+	// If > 0, zero out both states prior to first call to qlz_compress() or qlz_decompress()
+	// and decompress packets in the same order as they were compressed
+	#define QLZ_STREAMING_BUFFER 0
+	//#define QLZ_STREAMING_BUFFER 100000
+	//#define QLZ_STREAMING_BUFFER 1000000
+
+	// Guarantees that decompression of corrupted data cannot crash. Decreases decompression
+	// speed 10-20%. Compression speed not affected.
+	//#define QLZ_MEMORY_SAFE
+#endif
+
+#define QLZ_VERSION_MAJOR 1
+#define QLZ_VERSION_MINOR 5
+#define QLZ_VERSION_REVISION 0
+
+// Verify compression level
+#if QLZ_COMPRESSION_LEVEL != 1 && QLZ_COMPRESSION_LEVEL != 2 && QLZ_COMPRESSION_LEVEL != 3
+#error QLZ_COMPRESSION_LEVEL must be 1, 2 or 3
+#endif
+
+typedef unsigned int ui32;
+typedef unsigned short int ui16;
+
+// Decrease QLZ_POINTERS for level 3 to increase compression speed. Do not touch any other values!
+#if QLZ_COMPRESSION_LEVEL == 1
+#define QLZ_POINTERS 1
+#define QLZ_HASH_VALUES 4096
+#elif QLZ_COMPRESSION_LEVEL == 2
+#define QLZ_POINTERS 4
+#define QLZ_HASH_VALUES 2048
+#elif QLZ_COMPRESSION_LEVEL == 3
+#define QLZ_POINTERS 16
+#define QLZ_HASH_VALUES 4096
+#endif
+
+// hash entry
+typedef struct
+{
+#if QLZ_COMPRESSION_LEVEL == 1
+	ui32 cache;
+#if defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
+	unsigned int offset;
+#else
+	const unsigned char *offset;
+#endif
+#else
+	const unsigned char *offset[QLZ_POINTERS];
+#endif
+
+} qlz_hash_compress;
+
+typedef struct
+{
+#if QLZ_COMPRESSION_LEVEL == 1
+	const unsigned char *offset;
+#else
+	const unsigned char *offset[QLZ_POINTERS];
+#endif
+} qlz_hash_decompress;
+
+
+// states
+typedef struct
+{
+	#if QLZ_STREAMING_BUFFER > 0
+		unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
+	#endif
+	size_t stream_counter;
+	qlz_hash_compress hash[QLZ_HASH_VALUES];
+	unsigned char hash_counter[QLZ_HASH_VALUES];
+} qlz_state_compress;
+
+
+#if QLZ_COMPRESSION_LEVEL == 1 || QLZ_COMPRESSION_LEVEL == 2
+	typedef struct
+	{
+#if QLZ_STREAMING_BUFFER > 0
+		unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
+#endif
+		qlz_hash_decompress hash[QLZ_HASH_VALUES];
+		unsigned char hash_counter[QLZ_HASH_VALUES];
+		size_t stream_counter;
+	} qlz_state_decompress;
+#elif QLZ_COMPRESSION_LEVEL == 3
+	typedef struct
+	{
+#if QLZ_STREAMING_BUFFER > 0
+		unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
+#endif
+#if QLZ_COMPRESSION_LEVEL <= 2
+		qlz_hash_decompress hash[QLZ_HASH_VALUES];
+#endif
+		size_t stream_counter;
+	} qlz_state_decompress;
+#endif
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+// Public functions of QuickLZ
+size_t qlz_size_decompressed(const char *source);
+size_t qlz_size_compressed(const char *source);
+size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state);
+size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state);
+int qlz_get_setting(int setting);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif
+
--- a/benchmarks/micro/src/md5/md5.c
+++ b/benchmarks/micro/src/md5/md5.c
@ -0,0 +1,159 @@
+/*
+ * Simple MD5 implementation (github.com/pod32g/md5)
+ *
+ */
+
+#include <benchmark.h>
+
+static int N;
+
+// Constants are the integer part of the sines of integers (in radians) * 2^32.
+const uint32_t k[64] = {
+0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee ,
+0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 ,
+0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be ,
+0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 ,
+0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa ,
+0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 ,
+0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed ,
+0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a ,
+0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c ,
+0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 ,
+0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 ,
+0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 ,
+0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 ,
+0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 ,
+0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 ,
+0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
+
+// r specifies the per-round shift amounts
+static const uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
+                 5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20,
+                 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
+                 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
+
+// leftrotate function definition
+#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
+
+static void to_bytes(uint32_t val, uint8_t *bytes)
+{
+    bytes[0] = (uint8_t) val;
+    bytes[1] = (uint8_t) (val >> 8);
+    bytes[2] = (uint8_t) (val >> 16);
+    bytes[3] = (uint8_t) (val >> 24);
+}
+
+static uint32_t to_int32(const uint8_t *bytes)
+{
+    return (uint32_t) bytes[0]
+        | ((uint32_t) bytes[1] << 8)
+        | ((uint32_t) bytes[2] << 16)
+        | ((uint32_t) bytes[3] << 24);
+}
+
+static void md5(uint8_t *msg, size_t initial_len, uint8_t *digest) {
+
+    // These vars will contain the hash
+    uint32_t h0, h1, h2, h3;
+
+    size_t new_len, offset;
+    uint32_t w[16];
+    uint32_t a, b, c, d, i, f, g, temp;
+
+    // Initialize variables - simple count in nibbles:
+    h0 = 0x67452301;
+    h1 = 0xefcdab89;
+    h2 = 0x98badcfe;
+    h3 = 0x10325476;
+
+    //Pre-processing:
+    //append "1" bit to message
+    //append "0" bits until message length in bits ≡ 448 (mod 512)
+    //append length mod (2^64) to message
+
+    for (new_len = initial_len + 1; new_len % (512/8) != 448/8; new_len++)
+        ;
+
+    msg[initial_len] = 0x80; // append the "1" bit; most significant bit is "first"
+    for (offset = initial_len + 1; offset < new_len; offset++)
+        msg[offset] = 0; // append "0" bits
+
+    // append the len in bits at the end of the buffer.
+    to_bytes(initial_len*8, msg + new_len);
+    // initial_len>>29 == initial_len*8>>32, but avoids overflow.
+    to_bytes(initial_len>>29, msg + new_len + 4);
+
+    // Process the message in successive 512-bit chunks:
+    //for each 512-bit chunk of message:
+    for(offset=0; offset<new_len; offset += (512/8)) {
+
+        // break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
+        for (i = 0; i < 16; i++)
+            w[i] = to_int32(msg + offset + i*4);
+
+        // Initialize hash value for this chunk:
+        a = h0;
+        b = h1;
+        c = h2;
+        d = h3;
+
+        // Main loop:
+        for(i = 0; i<64; i++) {
+
+            if (i < 16) {
+                f = (b & c) | ((~b) & d);
+                g = i;
+            } else if (i < 32) {
+                f = (d & b) | ((~d) & c);
+                g = (5*i + 1) % 16;
+            } else if (i < 48) {
+                f = b ^ c ^ d;
+                g = (3*i + 5) % 16;
+            } else {
+                f = c ^ (b | (~d));
+                g = (7*i) % 16;
+            }
+
+            temp = d;
+            d = c;
+            c = b;
+            b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
+            a = temp;
+
+        }
+
+        // Add this chunk's hash to result so far:
+        h0 += a;
+        h1 += b;
+        h2 += c;
+        h3 += d;
+
+    }
+
+    //var char digest[16] := h0 append h1 append h2 append h3 //(Output is in little-endian)
+    to_bytes(h0, digest);
+    to_bytes(h1, digest + 4);
+    to_bytes(h2, digest + 8);
+    to_bytes(h3, digest + 12);
+}
+
+static uint8_t *str;
+static uint8_t *digest;
+
+void bench_md5_prepare() {
+  N = setting->size;
+  bench_srand(1);
+  str = bench_alloc(N);
+  for (int i = 0; i < N; i ++) {
+    str[i] = bench_rand();
+  }
+  digest = bench_alloc(16);
+}
+
+void bench_md5_run() {
+  md5(str, N, digest);
+}
+
+int bench_md5_validate() {
+  return checksum(digest, digest + 16) == setting->checksum;
+}
--- a/benchmarks/micro/src/qsort/qsort.c
+++ b/benchmarks/micro/src/qsort/qsort.c
@ -0,0 +1,44 @@
+#include <benchmark.h>
+
+static int N, *data;
+
+void bench_qsort_prepare() {
+  bench_srand(1);
+
+  N = setting->size;
+
+  data = bench_alloc(N * sizeof(int));
+  for (int i = 0; i < N; i ++) {
+    int a = bench_rand();
+    int b = bench_rand();
+    data[i] = (a << 16) | b;
+  }
+}
+
+static void swap(int *a, int *b) {
+  int t = *a;
+  *a = *b;
+  *b = t;
+}
+
+static void myqsort(int *a, int l, int r) {
+  if (l < r) {
+    int p = a[l], pivot = l, j;
+    for (j = l + 1; j < r; j ++) {
+      if (a[j] < p) {
+        swap(&a[++pivot], &a[j]);
+      }
+    }
+    swap(&a[pivot], &a[l]);
+    myqsort(a, l, pivot);
+    myqsort(a, pivot + 1, r);
+  }
+}
+
+void bench_qsort_run() {
+  myqsort(data, 0, N);
+}
+
+int bench_qsort_validate() {
+  return checksum(data, data + N) == setting->checksum;
+}
--- a/benchmarks/micro/src/queen/queen.c
+++ b/benchmarks/micro/src/queen/queen.c
@ -0,0 +1,32 @@
+#include <benchmark.h>
+
+static unsigned int FULL;
+
+static unsigned int dfs(unsigned int row, unsigned int ld, unsigned int rd) {
+  if (row == FULL) {
+    return 1;
+  } else {
+    unsigned int pos = FULL & (~(row | ld | rd)), ans = 0;
+    while (pos) {
+      unsigned int p = (pos & (~pos + 1));
+      pos -= p;
+      ans += dfs(row | p, (ld | p) << 1, (rd | p) >> 1);
+    }
+    return ans;
+  }
+}
+
+static unsigned int ans;
+
+void bench_queen_prepare() {
+  ans = 0;
+  FULL = (1 << setting->size) - 1;
+}
+
+void bench_queen_run() {
+  ans = dfs(0, 0, 0);
+}
+
+int bench_queen_validate() {
+  return ans == setting->checksum;
+}
--- a/benchmarks/micro/src/sieve/sieve.c
+++ b/benchmarks/micro/src/sieve/sieve.c
@ -0,0 +1,42 @@
+#include <benchmark.h>
+
+static int N;
+
+static int ans;
+static uint32_t *primes;
+
+static inline int get(int n) {
+  return (primes[n >> 5] >> (n & 31)) & 1;
+}
+
+static inline void clear(int n) {
+  primes[n >> 5] &= ~(1ul << (n & 31));
+}
+
+void bench_sieve_prepare() {
+  N = setting->size;
+  primes = (uint32_t*)bench_alloc(N / 8 + 128);
+  for (int i = 0; i <= N / 32; i ++) {
+    primes[i] = 0xffffffff;
+  }
+}
+
+void bench_sieve_run() {
+  for (int i = 1; i <= N; i ++)
+    if (!get(i)) return;
+  for (int i = 2; i * i <= N; i ++) {
+    if (get(i)) {
+      for (int j = i + i; j <= N; j += i)
+        clear(j);
+    }
+  }
+  ans = 0;
+  for (int i = 2; i <= N; i ++)
+    if (get(i)) {
+      ans ++;
+    }
+}
+
+int bench_sieve_validate() {
+  return ans == setting->checksum;
+}
--- a/benchmarks/micro/src/ssort/ssort.cc
+++ b/benchmarks/micro/src/ssort/ssort.cc
@ -0,0 +1,111 @@
+// This is the Skew algorithm's reference implementation.
+
+#include <benchmark.h>
+
+static int N;
+
+inline bool leq(int a1, int a2,   int b1, int b2) { // lexic. order for pairs
+  return(a1 < b1 || (a1 == b1 && a2 <= b2));
+}                                                   // and triples
+inline bool leq(int a1, int a2, int a3,   int b1, int b2, int b3) {
+  return(a1 < b1 || (a1 == b1 && leq(a2,a3, b2,b3)));
+}
+// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
+static void radixPass(int* a, int* b, int* r, int n, int K)
+{ // count occurrences
+  int* c = (int*)bench_alloc(sizeof(int)*(K+1));
+  for (int i = 0;  i <= K;  i++) c[i] = 0;         // reset counters
+  for (int i = 0;  i < n;  i++) c[r[a[i]]]++;    // count occurences
+  for (int i = 0, sum = 0;  i <= K;  i++) { // exclusive prefix sums
+     int t = c[i];  c[i] = sum;  sum += t;
+  }
+  for (int i = 0;  i < n;  i++) b[c[r[a[i]]]++] = a[i];      // sort
+}
+
+// find the suffix array SA of s[0..n-1] in {1..K}^n
+// require s[n]=s[n+1]=s[n+2]=0, n>=2
+void suffixArray(int* s, int* SA, int n, int K) {
+  int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2;
+  int* s12  = (int*)bench_alloc(sizeof(int)*(n02+3));  s12[n02]= s12[n02+1]= s12[n02+2]=0;
+  int* SA12 = (int*)bench_alloc(sizeof(int)*(n02+3)); SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
+  int* s0   = (int*)bench_alloc(sizeof(int)*n0);
+  int* SA0  = (int*)bench_alloc(sizeof(int)*n0);
+
+  // generate positions of mod 1 and mod  2 suffixes
+  // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
+  for (int i=0, j=0;  i < n+(n0-n1);  i++) if (i%3 != 0) s12[j++] = i;
+
+  // lsb radix sort the mod 1 and mod 2 triples
+  radixPass(s12 , SA12, s+2, n02, K);
+  radixPass(SA12, s12 , s+1, n02, K);
+  radixPass(s12 , SA12, s  , n02, K);
+
+  // find lexicographic names of triples
+  int name = 0, c0 = -1, c1 = -1, c2 = -1;
+  for (int i = 0;  i < n02;  i++) {
+    if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) {
+      name++;  c0 = s[SA12[i]];  c1 = s[SA12[i]+1];  c2 = s[SA12[i]+2];
+    }
+    if (SA12[i] % 3 == 1) { s12[SA12[i]/3]      = name; } // left half
+    else                  { s12[SA12[i]/3 + n0] = name; } // right half
+  }
+
+  // recurse if names are not yet unique
+  if (name < n02) {
+    suffixArray(s12, SA12, n02, name);
+    // store unique names in s12 using the suffix array
+    for (int i = 0;  i < n02;  i++) s12[SA12[i]] = i + 1;
+  } else // generate the suffix array of s12 directly
+    for (int i = 0;  i < n02;  i++) SA12[s12[i] - 1] = i;
+
+  // stably sort the mod 0 suffixes from SA12 by their first character
+  for (int i=0, j=0;  i < n02;  i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
+  radixPass(s0, SA0, s, n0, K);
+
+  // merge sorted SA0 suffixes and sorted SA12 suffixes
+  for (int p=0,  t=n0-n1,  k=0;  k < n;  k++) {
+#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
+    int i = GetI(); // pos of current offset 12 suffix
+    int j = SA0[p]; // pos of current offset 0  suffix
+    if (SA12[t] < n0 ?
+        leq(s[i],       s12[SA12[t] + n0], s[j],       s12[j/3]) :
+        leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
+    { // suffix from SA12 is smaller
+      SA[k] = i;  t++;
+      if (t == n02) { // done --- only SA0 suffixes left
+        for (k++;  p < n0;  p++, k++) SA[k] = SA0[p];
+      }
+    } else {
+      SA[k] = j;  p++;
+      if (p == n0)  { // done --- only SA12 suffixes left
+        for (k++;  t < n02;  t++, k++) SA[k] = GetI();
+      }
+    }
+  }
+}
+
+extern "C" {
+
+static int *s, *sa;
+
+void bench_ssort_prepare() {
+  N = setting->size;
+  bench_srand(1);
+  s = (int*)bench_alloc(sizeof(int)*(N+10));
+  sa = (int*)bench_alloc(sizeof(int)*(N+10));
+
+  for (int i = 0; i < N; i ++) {
+    s[i] = bench_rand() % 26;
+  }
+}
+
+void bench_ssort_run() {
+  suffixArray(s, sa, N, 26);
+}
+
+int bench_ssort_validate() {
+  return checksum(sa, sa + N) == setting->checksum;
+}
+
+}
+