restructure project

This commit is contained in:
Yanyan Jiang 2020-08-12 05:43:20 +00:00 committed by Zihao Yu
parent a317d8cce1
commit 960dc907e9
87 changed files with 23 additions and 17 deletions

View file

@ -0,0 +1,3 @@
NAME = micro-bench
SRCS = $(shell find src/ -name "*.c" -o -name "*.cc")
include $(AM_HOME)/Makefile

View file

@ -0,0 +1,113 @@
#ifndef __BENCHMARK_H__
#define __BENCHMARK_H__
#include <am.h>
#include <klib.h>
#include <klib-macros.h>
#ifdef __cplusplus
extern "C" {
#endif
#define MB * 1024 * 1024
#define KB * 1024
#define REF_CPU "i7-7700K @ 4.20GHz"
#define REF_SCORE 100000
#define REPEAT 1
// size | heap | time | checksum
#define QSORT_S { 100, 1 KB, 0, 0x08467105}
#define QSORT_M { 30000, 128 KB, 0, 0xa3e99fe4}
#define QSORT_L { 100000, 640 KB, 5114, 0xed8cff89}
#define QUEEN_S { 8, 0 KB, 0, 0x0000005c}
#define QUEEN_M { 11, 0 KB, 0, 0x00000a78}
#define QUEEN_L { 12, 0 KB, 4707, 0x00003778}
#define BF_S { 4, 32 KB, 0, 0xa6f0079e}
#define BF_M { 25, 32 KB, 0, 0xa88f8a65}
#define BF_L { 180, 32 KB, 23673, 0x9221e2b3}
#define FIB_S { 2, 1 KB, 0, 0x7cfeddf0}
#define FIB_M { 23, 16 KB, 0, 0x94ad8800}
#define FIB_L { 91, 256 KB, 28318, 0xebdc5f80}
#define SIEVE_S { 100, 1 KB, 0, 0x00000019}
#define SIEVE_M { 200000, 32 KB, 0, 0x00004640}
#define SIEVE_L {10000000, 2 MB, 39361, 0x000a2403}
#define PZ15_S { 0, 1 KB, 0, 0x00000006}
#define PZ15_M { 1, 256 KB, 0, 0x0000b0df}
#define PZ15_L { 2, 2 MB, 4486, 0x00068b8c}
#define DINIC_S { 10, 8 KB, 0, 0x0000019c}
#define DINIC_M { 80, 512 KB, 0, 0x00004f99}
#define DINIC_L { 128, 1 MB, 10882, 0x0000c248}
#define LZIP_S { 128, 128 KB, 0, 0xe05fc832}
#define LZIP_M { 50000, 1 MB, 0, 0xdc93e90c}
#define LZIP_L { 1048576, 4 MB, 7593, 0x8d62c81f}
#define SSORT_S { 100, 4 KB, 0, 0x4c555e09}
#define SSORT_M { 10000, 512 KB, 0, 0x0db7909b}
#define SSORT_L { 100000, 4 MB, 4504, 0x4f0ab431}
#define MD5_S { 100, 1 KB, 0, 0xf902f28f}
#define MD5_M { 200000, 256 KB, 0, 0xd4f9bc6d}
#define MD5_L {10000000, 16 MB, 17239, 0x27286a42}
#define BENCHMARK_LIST(def) \
def(qsort, "qsort", QSORT_S, QSORT_M, QSORT_L, "Quick sort") \
def(queen, "queen", QUEEN_S, QUEEN_M, QUEEN_L, "Queen placement") \
def( bf, "bf", BF_S, BF_M, BF_L, "Brainf**k interpreter") \
def( fib, "fib", FIB_S, FIB_M, FIB_L, "Fibonacci number") \
def(sieve, "sieve", SIEVE_S, SIEVE_M, SIEVE_L, "Eratosthenes sieve") \
def( 15pz, "15pz", PZ15_S, PZ15_M, PZ15_L, "A* 15-puzzle search") \
def(dinic, "dinic", DINIC_S, DINIC_M, DINIC_L, "Dinic's maxflow algorithm") \
def( lzip, "lzip", LZIP_S, LZIP_M, LZIP_L, "Lzip compression") \
def(ssort, "ssort", SSORT_S, SSORT_M, SSORT_L, "Suffix sort") \
def( md5, "md5", MD5_S, MD5_M, MD5_L, "MD5 digest") \
// Each benchmark will run REPEAT times
#define DECL(_name, _sname, _s, _m, _l, _desc) \
void bench_##_name##_prepare(); \
void bench_##_name##_run(); \
int bench_##_name##_validate();
BENCHMARK_LIST(DECL)
typedef struct Setting {
int size;
unsigned long mlim, ref;
uint32_t checksum;
} Setting;
typedef struct Benchmark {
void (*prepare)();
void (*run)();
int (*validate)();
const char *name, *desc;
Setting settings[3];
} Benchmark;
extern Benchmark *current;
extern Setting *setting;
typedef struct Result {
int pass;
unsigned long tsc, msec;
} Result;
void prepare(Result *res);
void done(Result *res);
// memory allocation
void* bench_alloc(size_t size);
void bench_free(void *ptr);
// random number generator
void bench_srand(uint32_t seed);
uint32_t bench_rand(); // return a random number between 0..32767
// checksum
uint32_t checksum(void *start, void *end);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,88 @@
#include <benchmark.h>
#include "puzzle.h"
#include "heap.h"
const int N = 4;
static int PUZZLE_S[N*N] = {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 0, 11,
13, 14, 15, 12,
};
static int PUZZLE_M[N*N] = {
1, 2, 3, 4,
5, 6, 7, 8,
12, 0, 14, 13,
11, 15, 10, 9,
};
static int PUZZLE_L[N*N] = {
0, 2, 3, 4,
9, 6, 7, 8,
5, 11, 10, 12,
1, 15, 13, 14,
};
static int ans;
extern "C" {
void bench_15pz_prepare() {
}
void bench_15pz_run() {
N_puzzle<N> puzzle;
int MAXN;
switch (setting->size) {
case 0: puzzle = N_puzzle<N>(PUZZLE_S); MAXN = 10; break;
case 1: puzzle = N_puzzle<N>(PUZZLE_M); MAXN = 2048; break;
case 2: puzzle = N_puzzle<N>(PUZZLE_L); MAXN = 16384; break;
default: assert(0);
}
assert(puzzle.solvable());
auto *heap = (Updatable_heap<N_puzzle<N>> *) bench_alloc(sizeof(Updatable_heap<N_puzzle<N>>));
heap->init(MAXN);
heap->push( puzzle, 0 );
int n = 0;
ans = -1;
while( heap->size() != 0 && n != MAXN ) {
N_puzzle<N> top = heap->pop();
++n;
if ( top == N_puzzle<N>::solution() ) {
// We are done
ans = heap->length(top) * n;
return;
}
if ( top.tile_left_possible() ) {
heap->push( top.tile_left(), heap->length( top ) + 1 );
}
if ( top.tile_right_possible() ) {
heap->push( top.tile_right(), heap->length( top ) + 1 );
}
if ( top.tile_up_possible() ) {
heap->push( top.tile_up(), heap->length( top ) + 1 );
}
if ( top.tile_down_possible() ) {
heap->push( top.tile_down(), heap->length( top ) + 1 );
}
}
}
int bench_15pz_validate() {
return (uint32_t)ans == setting->checksum;
}
}

View file

@ -0,0 +1,227 @@
// Author: Douglas Wilhelm Harder
// Copyright (c) 2009 by Douglas Wilhelm Harder. All rights reserved.
template <typename T>
T max(T a, T b) {
return a > b ? a : b;
}
template <typename T>
class Updatable_heap {
private:
int M;
class Step;
Step **hash_table;
Step **heap;
int heap_size;
int maximum_heap_size;
void inline swap( int, int );
void percolate_down();
void percolate_up( int );
Step *pointer( T const & ) const;
public:
void init(int m);
~Updatable_heap();
T pop();
void push( T const &, int );
int size() const;
int maximum_size() const;
int length( T const & ) const;
};
template <typename T>
class Updatable_heap<T>::Step {
public:
T element;
Step *next;
int heap_index;
int path_length;
int path_weight;
bool visited;
Step *previous_step;
void init( T const &, Step *, int, int );
int length() const;
int weight() const;
};
template <typename T>
void Updatable_heap<T>::init(int m) {
M = m;
heap = (Step **)bench_alloc(sizeof(void *) * M);
hash_table = (Step **)bench_alloc(sizeof(void *) * (M + 1));
heap_size = 0;
maximum_heap_size = 0;
for ( int i = 0; i < M; ++i ) {
hash_table[i] = 0;
}
}
template <typename T>
Updatable_heap<T>::~Updatable_heap() {
for ( int i = 0; i < M; ++i ) {
Step *ptr = hash_table[i];
while ( ptr != 0 ) {
Step *tmp = ptr;
ptr = ptr->next;
}
}
}
template <typename T>
T Updatable_heap<T>::pop() {
if ( size() == 0 ) {
return T();
}
T top = heap[1]->element;
if ( size() == 1 ) {
heap_size = 0;
} else {
assert( size() > 1 );
heap[1] = heap[size()];
heap[1]->heap_index = 1;
--heap_size;
percolate_down();
}
return top;
}
template <typename T>
void inline Updatable_heap<T>::swap( int i, int j ) {
Step *tmp = heap[j];
heap[j] = heap[i];
heap[i] = tmp;
heap[i]->heap_index = i;
heap[j]->heap_index = j;
}
template <typename T>
void Updatable_heap<T>::percolate_down() {
int n = 1;
while ( 2*n + 1 <= size() ) {
if ( heap[n]->weight() < heap[2*n]->weight() && heap[n]->weight() < heap[2*n + 1]->weight() ) {
return;
}
if ( heap[2*n]->weight() < heap[2*n + 1]->weight() ) {
swap( n, 2*n );
n = 2*n;
} else {
assert( heap[2*n]->weight() >= heap[2*n + 1]->weight() );
swap( n, 2*n + 1 );
n = 2*n + 1;
}
}
if ( 2*n == size() && heap[2*n]->weight() < heap[n]->weight() ) {
swap( n, 2*n );
}
}
template <typename T>
void Updatable_heap<T>::percolate_up( int n ) {
while ( n != 1 ) {
int parent = n/2;
if ( heap[parent]->weight() > heap[n]->weight() ) {
swap( parent, n );
n = parent;
} else {
return;
}
}
}
template <typename T>
void Updatable_heap<T>::push( T const &pz, int path_length ) {
Step *ptr = pointer( pz );
if ( ptr == 0 ) {
assert( heap_size <= M );
++heap_size;
Step *ptr = (Step*)bench_alloc(sizeof(Step));
ptr->init( pz, hash_table[pz.hash() & (M - 1)], size(), path_length );
hash_table[pz.hash() & (M - 1)] = ptr;
heap[size()] = ptr;
percolate_up( size() );
maximum_heap_size = max( maximum_heap_size, size() );
} else {
if ( !ptr->visited ) {
if ( path_length + ptr->element.lower_bound() < ptr->weight() ) {
ptr->path_weight = path_length + ptr->element.lower_bound();
percolate_up( ptr->heap_index );
}
}
}
}
template <typename T>
int Updatable_heap<T>::size() const {
return heap_size;
}
template <typename T>
int Updatable_heap<T>::maximum_size() const {
return maximum_heap_size;
}
template <typename T>
int Updatable_heap<T>::length( T const &pz ) const {
Step *ptr = pointer( pz );
return ( ptr == 0 ) ? 2147483647 : ptr->length();
}
template <typename T>
typename Updatable_heap<T>::Step *Updatable_heap<T>::pointer( T const &pz ) const {
for ( Step *ptr = hash_table[pz.hash() & (M - 1)]; ptr != 0; ptr = ptr->next ) {
if ( ptr->element == pz ) {
return ptr;
}
}
return 0;
}
/****************************************************
* ************************************************ *
* * Iterator * *
* ************************************************ *
****************************************************/
template <typename T>
void Updatable_heap<T>::Step::init( T const &pz, Step *n, int hi, int dist ) {
element = pz;
next = n;
heap_index = hi;
path_length = dist;
path_weight = dist + element.lower_bound();
visited = false;
previous_step = 0;
}
template <typename T>
int Updatable_heap<T>::Step::length() const {
return path_length;
}
template <typename T>
int Updatable_heap<T>::Step::weight() const {
return path_weight;
}

View file

@ -0,0 +1,475 @@
// Author: Douglas Wilhelm Harder
// Copyright (c) 2009 by Douglas Wilhelm Harder. All rights reserved.
// Url: https://ece.uwaterloo.ca/~dwharder/aads/Algorithms/N_puzzles/
template <int N>
class N_puzzle {
private:
bool puzzle_valid;
uint8_t zero_i, zero_j;
int8_t manhattan_distance;
int8_t puzzle[N][N];
int hash_value;
void determine_hash();
static int abs( int n ) { return ( n < 0 ) ? -n : n; }
public:
N_puzzle();
N_puzzle( int array[N*N] );
N_puzzle( N_puzzle const & );
N_puzzle &operator=( N_puzzle const & );
bool solvable() const;
bool valid() const;
int lower_bound() const;
unsigned int hash() const;
bool tile_up_possible() const;
bool tile_down_possible() const;
bool tile_left_possible() const;
bool tile_right_possible() const;
N_puzzle tile_up() const;
N_puzzle tile_down() const;
N_puzzle tile_left() const;
N_puzzle tile_right() const;
bool operator==( N_puzzle const & ) const;
bool operator!=( N_puzzle const & ) const;
N_puzzle static solution();
};
template < int N >
N_puzzle<N>::N_puzzle():
puzzle_valid( true ),
manhattan_distance( 0 ) {
int array[N*N];
for ( int i = 0; i < N*N; ++i ) {
array[i] = i;
}
int n = 0;
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
int k = bench_rand() % (N*N - n);
puzzle[i][j] = array[k];
if ( array[k] == 0 ) {
zero_i = i;
zero_j = j;
} else {
manhattan_distance += abs( ((array[k] - 1) / N) - i );
manhattan_distance += abs( ((array[k] - 1) % N) - j );
}
++n;
array[k] = array[N*N - n];
}
}
determine_hash();
}
template < int N >
N_puzzle<N>::N_puzzle( int array[N*N] ):
puzzle_valid( true ),
manhattan_distance( 0 ) {
bool check[N*N];
for ( int i = 0; i < N*N; ++i ) {
check[i] = false;
}
int n = 0;
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
puzzle[i][j] = array[n];
check[array[n]] = true;
if ( array[n] == 0 ) {
zero_i = i;
zero_j = j;
} else {
manhattan_distance += abs( ((array[n] - 1) / N) - i );
manhattan_distance += abs( ((array[n] - 1) % N) - j );
}
++n;
}
}
for ( int i = 0; i < N*N; ++i ) {
if ( !check[i] ) {
puzzle_valid = false;
return;
}
}
determine_hash();
}
/*
* Determine a hash value for the puzzle.
*/
template < int N >
void N_puzzle<N>::determine_hash() {
hash_value = 0;
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
hash_value = hash_value*1973 + puzzle[i][j];
}
}
}
template < int N >
N_puzzle<N>::N_puzzle( N_puzzle const &pz ):
puzzle_valid( pz.puzzle_valid ),
zero_i( pz.zero_i ),
zero_j( pz.zero_j ),
manhattan_distance( pz.manhattan_distance ),
hash_value( pz.hash_value ) {
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
puzzle[i][j] = pz.puzzle[i][j];
}
}
}
template < int N >
N_puzzle<N> &N_puzzle<N>::operator=( N_puzzle const &rhs ) {
puzzle_valid = rhs.puzzle_valid;
zero_i = rhs.zero_i;
zero_j = rhs.zero_j;
manhattan_distance = rhs.manhattan_distance;
hash_value = rhs.hash_value;
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
puzzle[i][j] = rhs.puzzle[i][j];
}
}
return *this;
}
/*
* Moving a tile up is possible as long as
* the blank is not in the last row.
*/
template <int N>
bool N_puzzle<N>::tile_up_possible() const {
return puzzle_valid && (zero_i != N - 1);
}
/*
* Moving a tile down is possible as long as
* the blank is not in the first row.
*/
template <int N>
bool N_puzzle<N>::tile_down_possible() const {
return puzzle_valid && (zero_i != 0);
}
/*
* Moving a tile left is possible as long as
* the blank is not in the last column.
*/
template <int N>
bool N_puzzle<N>::tile_left_possible() const {
return puzzle_valid && (zero_j != N - 1);
}
/*
* Moving a tile right is possible as long as
* the blank is not in the first column.
*/
template <int N>
bool N_puzzle<N>::tile_right_possible() const {
return puzzle_valid && (zero_j != 0);
}
template <int N>
N_puzzle<N> N_puzzle<N>::tile_up() const {
if ( !puzzle_valid ) {
return *this;
}
N_puzzle result( *this );
if ( zero_i == N - 1 ) {
result.puzzle_valid = false;
return result;
}
result.manhattan_distance +=
abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - zero_i ) -
abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - (zero_i + 1) );
result.puzzle[zero_i][zero_j] = puzzle[zero_i + 1][zero_j];
++result.zero_i;
result.puzzle[result.zero_i][zero_j] = 0;
result.determine_hash();
return result;
}
template <int N>
N_puzzle<N> N_puzzle<N>::tile_down() const {
if ( !puzzle_valid ) {
return *this;
}
N_puzzle result( *this );
if ( zero_i == 0 ) {
result.puzzle_valid = false;
return result;
}
result.manhattan_distance +=
abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - zero_i ) -
abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - (zero_i - 1) );
result.puzzle[zero_i][zero_j] = puzzle[zero_i - 1][zero_j];
--result.zero_i;
result.puzzle[result.zero_i][zero_j] = 0;
result.determine_hash();
return result;
}
template <int N>
N_puzzle<N> N_puzzle<N>::tile_left() const {
if ( !puzzle_valid ) {
return *this;
}
N_puzzle result( *this );
if ( zero_j == N - 1 ) {
result.puzzle_valid = false;
return result;
}
result.manhattan_distance +=
abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - zero_j ) -
abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - (zero_j + 1) );
result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j + 1];
++result.zero_j;
result.puzzle[zero_i][result.zero_j] = 0;
result.determine_hash();
return result;
}
template <int N>
N_puzzle<N> N_puzzle<N>::tile_right() const {
if ( !puzzle_valid ) {
return *this;
}
N_puzzle result( *this );
if ( zero_j == 0 ) {
result.puzzle_valid = false;
return result;
}
result.manhattan_distance +=
abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - zero_j ) -
abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - (zero_j - 1) );
result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j - 1];
--result.zero_j;
result.puzzle[zero_i][result.zero_j] = 0;
result.determine_hash();
return result;
}
/*
* Check if the puzzle is solvable: that is, check the
* number of inversions pluse the Manhattan distance of
* the black from the lower-right corner.
*
* Run time: O(n^2)
* Memory: O(n)
*/
template <int N>
bool N_puzzle<N>::solvable() const {
if ( !valid() ) {
return false;
}
int entries[N*N];
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
if ( puzzle[i][j] == 0 ) {
entries[N*i + j] = N*N;
} else {
entries[N*i + j] = puzzle[i][j];
}
}
}
int parity = 0;
for ( int i = 0; i < N*N; ++i ) {
for ( int j = i + 1; j < N*N; ++j ) {
if ( entries[i] > entries[j] ) {
++parity;
}
}
}
parity += 2*N - 2 - zero_i - zero_j;
return ( (parity & 1) == 0 );
}
template <int N>
bool N_puzzle<N>::valid() const {
return puzzle_valid;
}
/*
* Return either the Manhattan, Hamming, or discrete distance
* between the puzzle and the solution.
*/
template <int N>
int N_puzzle<N>::lower_bound() const {
// The Manhattan distance
return valid() ? manhattan_distance : N*N*N;
int result = 0;
int count = 1;
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
if ( puzzle[i][j] != (count % N*N) ) {
++result;
}
++count;
}
}
// The Hamming distance, or
return result;
// The discrete distance: converts the A* search to Dijkstra's algorithm
// return ( result == 0 ) ? 0 : 1;
}
/*
* puzzle1 == puzzle2
*
* Two puzzles are considered to be equal if their entries
* are equal:
* If either puzzle is not valid, return false.
* If the hash values are different, they are different; return false.
* Otherwise, check all entries to see if they are the same.
*/
template < int N >
bool N_puzzle<N>::operator==( N_puzzle const &rhs ) const {
if ( !valid() || !rhs.valid() || hash() != rhs.hash() ) {
return false;
}
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
if ( puzzle[i][j] != rhs.puzzle[i][j] ) {
return false;
}
}
}
return true;
}
/*
* puzzle1 != puzzle2
*
* Two puzzles are considered to be unequal if any of the entries
* different:
* If either puzzle is not valid, return false.
* If the hash values are different, they are different; return true.
* Otherwise, check all entries to see if they are the same.
*/
template < int N >
bool N_puzzle<N>::operator!=( N_puzzle const &rhs ) const {
if ( !valid() || !rhs.valid() ) {
return false;
}
if ( hash() != rhs.hash() ) {
return true;
}
for ( int i = 0; i < N; ++i ) {
for ( int j = 0; j < N; ++j ) {
if ( puzzle[i][j] != rhs.puzzle[i][j] ) {
return true;
}
}
}
return false;
}
/*
* unsigned int hash() const
*
* Returns the pre-calculated hash value.
*/
template < int N >
unsigned int N_puzzle<N>::hash() const {
return valid() ? hash_value : 0;
}
/*
* N_puzzle<N> solution()
*
* Returns the correct solution to the N puzzle:
*
* 1 2 3 1 2 3 4
* 3x3: 4 5 6 4x4: 5 6 7 8
* 7 8 9 10 11 12
* 13 14 15
*/
template <int N>
N_puzzle<N> N_puzzle<N>::solution() {
int array[N*N];
for ( int i = 0; i < N*N - 1; ++i ) {
array[i] = i + 1;
}
array[N*N - 1] = 0;
return N_puzzle<N>( array );
}

View file

@ -0,0 +1,181 @@
#include <am.h>
#include <benchmark.h>
#include <limits.h>
#include <klib-macros.h>
Benchmark *current;
Setting *setting;
static char *hbrk;
static uint32_t uptime_ms() { return io_read(AM_TIMER_UPTIME).us / 1000; }
// The benchmark list
#define ENTRY(_name, _sname, _s, _m, _l, _desc) \
{ .prepare = bench_##_name##_prepare, \
.run = bench_##_name##_run, \
.validate = bench_##_name##_validate, \
.name = _sname, \
.desc = _desc, \
.settings = {_s, _m, _l}, },
Benchmark benchmarks[] = {
BENCHMARK_LIST(ENTRY)
};
// Running a benchmark
static void bench_prepare(Result *res) {
res->msec = uptime_ms();
}
static void bench_reset() {
hbrk = (void *)ROUNDUP(heap.start, 8);
}
static void bench_done(Result *res) {
res->msec = uptime_ms() - res->msec;
}
static const char *bench_check(Benchmark *bench) {
uintptr_t freesp = (uintptr_t)heap.end - (uintptr_t)heap.start;
if (freesp < setting->mlim) {
return "(insufficient memory)";
}
return NULL;
}
static void run_once(Benchmark *b, Result *res) {
bench_reset(); // reset malloc state
current->prepare(); // call bechmark's prepare function
bench_prepare(res); // clean everything, start timer
current->run(); // run it
bench_done(res); // collect results
res->pass = current->validate();
}
static unsigned long score(Benchmark *b, unsigned long tsc, unsigned long msec) {
if (msec == 0) return 0;
return (REF_SCORE / 1000) * setting->ref / msec;
}
int main(const char *args) {
const char *setting_name = args;
if (args == NULL || strcmp(args, "") == 0) {
printf("Empty mainargs. Use \"ref\" by default\n");
setting_name = "ref";
}
int setting_id = -1;
if (strcmp(setting_name, "test" ) == 0) setting_id = 0;
else if (strcmp(setting_name, "train") == 0) setting_id = 1;
else if (strcmp(setting_name, "ref" ) == 0) setting_id = 2;
else {
printf("Invalid mainargs: \"%s\"; "
"must be in {test, train, ref}\n", setting_name);
halt(1);
}
ioe_init();
printf("======= Running MicroBench [input *%s*] =======\n", setting_name);
unsigned long bench_score = 0;
int pass = 1;
uint32_t t0 = uptime_ms();
for (int i = 0; i < LENGTH(benchmarks); i ++) {
Benchmark *bench = &benchmarks[i];
current = bench;
setting = &bench->settings[setting_id];
const char *msg = bench_check(bench);
printf("[%s] %s: ", bench->name, bench->desc);
if (msg != NULL) {
printf("Ignored %s\n", msg);
} else {
unsigned long msec = ULONG_MAX;
int succ = 1;
for (int i = 0; i < REPEAT; i ++) {
Result res;
run_once(bench, &res);
printf(res.pass ? "*" : "X");
succ &= res.pass;
if (res.msec < msec) msec = res.msec;
}
if (succ) printf(" Passed.");
else printf(" Failed.");
pass &= succ;
unsigned long cur = score(bench, 0, msec);
printf("\n");
if (setting_id != 0) {
printf(" min time: %d ms [%d]\n", (unsigned int)msec, (unsigned int)cur);
}
bench_score += cur;
}
}
uint32_t t1 = uptime_ms();
bench_score /= LENGTH(benchmarks);
printf("==================================================\n");
printf("MicroBench %s", pass ? "PASS" : "FAIL");
if (setting_id == 2) {
printf(" %d Marks\n", (unsigned int)bench_score);
printf(" vs. %d Marks (%s)\n", REF_SCORE, REF_CPU);
} else {
printf("\n");
}
printf("Total time: %d ms\n", t1 - t0);
return 0;
}
// Libraries
void* bench_alloc(size_t size) {
size = (size_t)ROUNDUP(size, 8);
char *old = hbrk;
hbrk += size;
assert((uintptr_t)heap.start <= (uintptr_t)hbrk && (uintptr_t)hbrk < (uintptr_t)heap.end);
for (uint64_t *p = (uint64_t *)old; p != (uint64_t *)hbrk; p ++) {
*p = 0;
}
assert((uintptr_t)hbrk - (uintptr_t)heap.start <= setting->mlim);
return old;
}
void bench_free(void *ptr) {
}
static uint32_t seed = 1;
void bench_srand(uint32_t _seed) {
seed = _seed & 0x7fff;
}
uint32_t bench_rand() {
seed = (seed * (uint32_t)214013L + (uint32_t)2531011L);
return (seed >> 16) & 0x7fff;
}
// FNV hash
uint32_t checksum(void *start, void *end) {
const uint32_t x = 16777619;
uint32_t h1 = 2166136261u;
for (uint8_t *p = (uint8_t*)start; p + 4 < (uint8_t*)end; p += 4) {
for (int i = 0; i < 4; i ++) {
h1 = (h1 ^ p[i]) * x;
}
}
int32_t hash = (uint32_t)h1;
hash += hash << 13;
hash ^= hash >> 7;
hash += hash << 3;
hash ^= hash >> 17;
hash += hash << 5;
return hash;
}

View file

@ -0,0 +1,151 @@
/*
Brainfuck-C ( http://github.com/kgabis/brainfuck-c )
Copyright (c) 2012 Krzysztof Gabis
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <benchmark.h>
static int ARR_SIZE;
#define CODE ">>+>>>>>,[>+>>,]>+[--[+<<<-]<[<+>-]<[<[->[<<<+>>>>+<-]<<[>>+>[->]<<[<]" \
"<-]>]>>>+<[[-]<[>+<-]<]>[[>>>]+<<<-<[<<[<<<]>>+>[>>>]<-]<<[<<<]>[>>[>>" \
">]<+<<[<<<]>-]]+<<<]+[->>>]>>]>>[.>>>]"
#define OP_END 0
#define OP_INC_DP 1
#define OP_DEC_DP 2
#define OP_INC_VAL 3
#define OP_DEC_VAL 4
#define OP_OUT 5
#define OP_IN 6
#define OP_JMP_FWD 7
#define OP_JMP_BCK 8
#define SUCCESS 0
#define FAILURE 1
#define PROGRAM_SIZE 4096
#define STACK_SIZE 512
#define DATA_SIZE 4096
#define STACK_PUSH(A) (STACK[SP++] = A)
#define STACK_POP() (STACK[--SP])
#define STACK_EMPTY() (SP == 0)
#define STACK_FULL() (SP == STACK_SIZE)
struct instruction_t {
unsigned short operator;
unsigned short operand;
};
static struct instruction_t *PROGRAM;
static unsigned short *STACK;
static unsigned int SP;
static const char *code;
static char *input;
static int compile_bf() {
unsigned short pc = 0, jmp_pc;
for (; *code; code ++) {
int c = *code;
if (pc >= PROGRAM_SIZE) break;
switch (c) {
case '>': PROGRAM[pc].operator = OP_INC_DP; break;
case '<': PROGRAM[pc].operator = OP_DEC_DP; break;
case '+': PROGRAM[pc].operator = OP_INC_VAL; break;
case '-': PROGRAM[pc].operator = OP_DEC_VAL; break;
case '.': PROGRAM[pc].operator = OP_OUT; break;
case ',': PROGRAM[pc].operator = OP_IN; break;
case '[':
PROGRAM[pc].operator = OP_JMP_FWD;
if (STACK_FULL()) {
return FAILURE;
}
STACK_PUSH(pc);
break;
case ']':
if (STACK_EMPTY()) {
return FAILURE;
}
jmp_pc = STACK_POP();
PROGRAM[pc].operator = OP_JMP_BCK;
PROGRAM[pc].operand = jmp_pc;
PROGRAM[jmp_pc].operand = pc;
break;
default: pc--; break;
}
pc++;
}
if (!STACK_EMPTY() || pc == PROGRAM_SIZE) {
return FAILURE;
}
PROGRAM[pc].operator = OP_END;
return SUCCESS;
}
static unsigned short *data;
static char *output;
static int noutput;
static void execute_bf() {
unsigned int pc = 0, ptr = 0;
while (PROGRAM[pc].operator != OP_END && ptr < DATA_SIZE) {
switch (PROGRAM[pc].operator) {
case OP_INC_DP: ptr++; break;
case OP_DEC_DP: ptr--; break;
case OP_INC_VAL: data[ptr]++; break;
case OP_DEC_VAL: data[ptr]--; break;
case OP_OUT: output[noutput ++] = data[ptr]; break;
case OP_IN: data[ptr] = *(input ++); break;
case OP_JMP_FWD: if(!data[ptr]) { pc = PROGRAM[pc].operand; } break;
case OP_JMP_BCK: if(data[ptr]) { pc = PROGRAM[pc].operand; } break;
default: return;
}
pc++;
}
}
void bench_bf_prepare() {
ARR_SIZE = setting->size;
SP = 0;
PROGRAM = bench_alloc(sizeof(PROGRAM[0]) * PROGRAM_SIZE);
STACK = bench_alloc(sizeof(STACK[0]) * STACK_SIZE);
data = bench_alloc(sizeof(data[0]) * DATA_SIZE);
code = CODE;
input = bench_alloc(ARR_SIZE + 1);
output = bench_alloc(DATA_SIZE);
noutput = 0;
bench_srand(1);
for (int i = 0; i < ARR_SIZE; i ++) {
input[i] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"[bench_rand() % 62];
}
}
void bench_bf_run() {
compile_bf();
execute_bf();
}
int bench_bf_validate() {
uint32_t cs = checksum(output, output + noutput);
return noutput == ARR_SIZE && cs == setting->checksum;
}

View file

@ -0,0 +1,138 @@
#include <benchmark.h>
static int N;
const int INF = 0x3f3f3f;
struct Edge {
int from, to, cap, flow;
Edge(){}
Edge(int from, int to, int cap, int flow) {
this->from = from;
this->to = to;
this->cap = cap;
this->flow = flow;
}
};
template<typename T>
static inline T min(T x, T y) {
return x < y ? x : y;
}
struct Dinic {
int n, m, s, t;
Edge *edges;
int *head, *nxt, *d, *cur, *queue;
bool *vis;
void init(int n) {
int nold = (n - 2) / 2;
int maxm = (nold * nold + nold * 2) * 2;
edges = (Edge *)bench_alloc(sizeof(Edge) * maxm);
head = (int *)bench_alloc(sizeof(int) * n);
nxt = (int *)bench_alloc(sizeof(int) * maxm);
vis = (bool *)bench_alloc(sizeof(bool) * n);
d = (int *)bench_alloc(sizeof(int) * n);
cur = (int *)bench_alloc(sizeof(int) * n);
queue = (int *)bench_alloc(sizeof(int) * n);
this->n = n;
for (int i = 0; i < n; i ++) {
head[i] = -1;
}
m = 0;
}
void AddEdge(int u, int v, int c) {
if (c == 0) return;
edges[m] = Edge(u, v, c, 0);
nxt[m] = head[u];
head[u] = m++;
edges[m] = Edge(v, u, 0, 0);
nxt[m] = head[v];
head[v] = m++;
}
bool BFS() {
for (int i = 0; i < n; i ++) vis[i] = 0;
int qf = 0, qr = 0;
queue[qr ++] = s;
d[s] = 0;
vis[s] = 1;
while (qf != qr) {
int x = queue[qf ++];
for (int i = head[x]; i != -1; i = nxt[i]) {
Edge& e = edges[i];
if (!vis[e.to] && e.cap > e.flow) {
vis[e.to] = 1;
d[e.to] = d[x] + 1;
queue[qr ++] = e.to;
}
}
}
return vis[t];
}
int DFS(int x, int a) {
if (x == t || a == 0) return a;
int flow = 0, f;
for (int i = cur[x]; i != -1; i = nxt[i]) {
Edge& e = edges[i];
if (d[x] + 1 == d[e.to] && (f = DFS(e.to, min(a, e.cap-e.flow))) > 0) {
e.flow += f;
edges[i^1].flow -= f;
flow += f;
a -= f;
if (a == 0) break;
}
}
return flow;
}
int Maxflow(int s, int t) {
this -> s = s; this -> t = t;
int flow = 0;
while (BFS()) {
for (int i = 0; i < n; i++)
cur[i] = head[i];
flow += DFS(s, INF);
}
return flow;
}
};
extern "C" {
static Dinic *G;
static int ans;
void bench_dinic_prepare() {
N = setting->size;
bench_srand(1);
int s = 2 * N, t = 2 * N + 1;
G = (Dinic*)bench_alloc(sizeof(Dinic));
G->init(2 * N + 2);
for (int i = 0; i < N; i ++)
for (int j = 0; j < N; j ++) {
G->AddEdge(i, N + j, bench_rand() % 10);
}
for (int i = 0; i < N; i ++) {
G->AddEdge(s, i, bench_rand() % 1000);
G->AddEdge(N + i, t, bench_rand() % 1000);
}
}
void bench_dinic_run() {
ans = G->Maxflow(2 * N, 2 * N + 1);
}
int bench_dinic_validate() {
return (uint32_t)ans == setting->checksum;
}
}

View file

@ -0,0 +1,64 @@
#include <benchmark.h>
// f(n) = (f(n-1) + f(n-2) + .. f(n-m)) mod 2^32
#define N 2147483603
static int M;
static void put(uint32_t *m, int i, int j, uint32_t data) {
m[i * M + j] = data;
}
static uint32_t get(uint32_t *m, int i, int j) {
return m[i * M + j];
}
static inline void mult(uint32_t *c, uint32_t *a, uint32_t *b) {
for (int i = 0; i < M; i ++)
for (int j = 0; j < M; j ++) {
put(c, i, j, 0);
for (int k = 0; k < M; k ++) {
put(c, i, j, get(c, i, j) + get(a, i, k) * get(b, k, j));
}
}
}
static inline void assign(uint32_t *a, uint32_t *b) {
for (int i = 0; i < M; i ++)
for (int j = 0; j < M; j ++)
put(a, i, j, get(b, i, j));
}
static uint32_t *A, *ans, *T, *tmp;
void bench_fib_prepare() {
M = setting->size;
int sz = sizeof(uint32_t) * M * M;
A = bench_alloc(sz);
T = bench_alloc(sz);
ans = bench_alloc(sz);
tmp = bench_alloc(sz);
}
void bench_fib_run() {
for (int i = 0; i < M; i ++)
for (int j = 0; j < M; j ++) {
uint32_t x = (i == M - 1 || j == i + 1);
put(A, i, j, x);
put(T, i, j, x);
put(ans, i, j, i == j);
}
for (int n = N; n > 0; n >>= 1) {
if (n & 1) {
mult(tmp, ans, T);
assign(ans, tmp);
}
mult(tmp, T, T);
assign(T, tmp);
}
}
int bench_fib_validate() {
return get(ans, M-1, M-1) == setting->checksum;
}

View file

@ -0,0 +1,29 @@
#include "quicklz.h"
#include <benchmark.h>
static int SIZE;
static qlz_state_compress *state;
static char *blk;
static char *compress;
static int len;
void bench_lzip_prepare() {
SIZE = setting->size;
bench_srand(1);
state = bench_alloc(sizeof(qlz_state_compress));
blk = bench_alloc(SIZE);
compress = bench_alloc(SIZE + 400);
for (int i = 0; i < SIZE; i ++) {
blk[i] = 'a' + bench_rand() % 26;
}
}
void bench_lzip_run() {
len = qlz_compress(blk, compress, SIZE, state);
}
int bench_lzip_validate() {
return checksum(compress, compress + len) == setting->checksum;
}

View file

@ -0,0 +1,761 @@
// Fast data compression library
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
// lar@quicklz.com
//
// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
// released into public must be open source) or under a commercial license if such
// has been acquired (see http://www.quicklz.com/order.html). The commercial license
// does not cover derived or ported versions created by third parties under GPL.
// 1.5.0 final
#include "quicklz.h"
#if QLZ_VERSION_MAJOR != 1 || QLZ_VERSION_MINOR != 5 || QLZ_VERSION_REVISION != 0
#error quicklz.c and quicklz.h have different versions
#endif
#define MINOFFSET 2
#define UNCONDITIONAL_MATCHLEN 6
#define UNCOMPRESSED_END 4
#define CWORD_LEN 4
#if QLZ_COMPRESSION_LEVEL == 1 && defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
#define OFFSET_BASE source
#define CAST (ui32)(size_t)
#else
#define OFFSET_BASE 0
#define CAST
#endif
int qlz_get_setting(int setting)
{
switch (setting)
{
case 0: return QLZ_COMPRESSION_LEVEL;
case 1: return sizeof(qlz_state_compress);
case 2: return sizeof(qlz_state_decompress);
case 3: return QLZ_STREAMING_BUFFER;
#ifdef QLZ_MEMORY_SAFE
case 6: return 1;
#else
case 6: return 0;
#endif
case 7: return QLZ_VERSION_MAJOR;
case 8: return QLZ_VERSION_MINOR;
case 9: return QLZ_VERSION_REVISION;
}
return -1;
}
#if QLZ_COMPRESSION_LEVEL == 1
static int same(const unsigned char *src, size_t n)
{
while(n > 0 && *(src + n) == *src)
n--;
return n == 0 ? 1 : 0;
}
#endif
static void reset_table_compress(qlz_state_compress *state)
{
int i;
for(i = 0; i < QLZ_HASH_VALUES; i++)
{
#if QLZ_COMPRESSION_LEVEL == 1
state->hash[i].offset = 0;
#else
state->hash_counter[i] = 0;
#endif
}
}
static void reset_table_decompress(qlz_state_decompress *state)
{
int i;
(void)state;
(void)i;
#if QLZ_COMPRESSION_LEVEL == 2
for(i = 0; i < QLZ_HASH_VALUES; i++)
{
state->hash_counter[i] = 0;
}
#endif
}
static __inline ui32 hash_func(ui32 i)
{
#if QLZ_COMPRESSION_LEVEL == 2
return ((i >> 9) ^ (i >> 13) ^ i) & (QLZ_HASH_VALUES - 1);
#else
return ((i >> 12) ^ i) & (QLZ_HASH_VALUES - 1);
#endif
}
static __inline ui32 fast_read(void const *src, ui32 bytes)
{
uint32_t ret = 0;
if (bytes >= 1 && bytes <= 4) {
for (uint32_t i = 0; i < bytes; i ++) {
ret |= ((uint8_t*)src)[i] << (i * 8);
}
}
return ret;
}
static __inline ui32 hashat(const unsigned char *src)
{
ui32 fetch, hash;
fetch = fast_read(src, 3);
hash = hash_func(fetch);
return hash;
}
static __inline void fast_write(ui32 f, void *dst, size_t bytes)
{
for (size_t i = 0; i != bytes; i ++) {
((char*)dst)[i] = ((char*)&f)[i];
}
}
size_t qlz_size_decompressed(const char *source)
{
ui32 n, r;
n = (((*source) & 2) == 2) ? 4 : 1;
r = fast_read(source + 1 + n, n);
r = r & (0xffffffff >> ((4 - n)*8));
return r;
}
size_t qlz_size_compressed(const char *source)
{
ui32 n, r;
n = (((*source) & 2) == 2) ? 4 : 1;
r = fast_read(source + 1, n);
r = r & (0xffffffff >> ((4 - n)*8));
return r;
}
size_t qlz_size_header(const char *source)
{
size_t n = 2*((((*source) & 2) == 2) ? 4 : 1) + 1;
return n;
}
static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui32 n)
{
assert(0); // unaligned memory access
}
static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s)
{
#if QLZ_COMPRESSION_LEVEL == 1
ui32 hash;
hash = hashat(s);
state->hash[hash].offset = s;
state->hash_counter[hash] = 1;
#elif QLZ_COMPRESSION_LEVEL == 2
ui32 hash;
unsigned char c;
hash = hashat(s);
c = state->hash_counter[hash];
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = s;
c++;
state->hash_counter[hash] = c;
#endif
(void)state;
(void)s;
}
#if QLZ_COMPRESSION_LEVEL <= 2
static void update_hash_upto(qlz_state_decompress *state, unsigned char **lh, const unsigned char *max)
{
while(*lh < max)
{
(*lh)++;
update_hash(state, *lh);
}
}
#endif
static size_t qlz_compress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_compress *state)
{
const unsigned char *last_byte = source + size - 1;
const unsigned char *src = source;
unsigned char *cword_ptr = destination;
unsigned char *dst = destination + CWORD_LEN;
ui32 cword_val = 1U << 31;
const unsigned char *last_matchstart = last_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
ui32 fetch = 0;
unsigned int lits = 0;
(void) lits;
if(src <= last_matchstart)
fetch = fast_read(src, 3);
while(src <= last_matchstart)
{
if ((cword_val & 1) == 1)
{
// store uncompressed if compression ratio is too low
if (src > source + (size >> 1) && dst - destination > src - source - ((src - source) >> 5))
return 0;
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
cword_ptr = dst;
dst += CWORD_LEN;
cword_val = 1U << 31;
fetch = fast_read(src, 3);
}
#if QLZ_COMPRESSION_LEVEL == 1
{
const unsigned char *o;
ui32 hash, cached;
hash = hash_func(fetch);
cached = fetch ^ state->hash[hash].cache;
state->hash[hash].cache = fetch;
o = state->hash[hash].offset + OFFSET_BASE;
state->hash[hash].offset = CAST(src - OFFSET_BASE);
if (cached == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6))))
{
if (*(o + 3) != *(src + 3))
{
hash <<= 4;
cword_val = (cword_val >> 1) | (1U << 31);
fast_write((3 - 2) | hash, dst, 2);
src += 3;
dst += 2;
}
else
{
const unsigned char *old_src = src;
size_t matchlen;
hash <<= 4;
cword_val = (cword_val >> 1) | (1U << 31);
src += 4;
if(*(o + (src - old_src)) == *src)
{
src++;
if(*(o + (src - old_src)) == *src)
{
size_t q = last_byte - UNCOMPRESSED_END - (src - 5) + 1;
size_t remaining = q > 255 ? 255 : q;
src++;
while(*(o + (src - old_src)) == *src && (size_t)(src - old_src) < remaining)
src++;
}
}
matchlen = src - old_src;
if (matchlen < 18)
{
fast_write((ui32)(matchlen - 2) | hash, dst, 2);
dst += 2;
}
else
{
fast_write((ui32)(matchlen << 16) | hash, dst, 3);
dst += 3;
}
}
fetch = fast_read(src, 3);
lits = 0;
}
else
{
lits++;
*dst = *src;
src++;
dst++;
cword_val = (cword_val >> 1);
fetch = (fetch >> 8 & 0xffff) | (*(src + 2) << 16);
}
}
#elif QLZ_COMPRESSION_LEVEL >= 2
{
const unsigned char *o, *offset2;
ui32 hash, matchlen, k, m, best_k = 0;
unsigned char c;
size_t remaining = (last_byte - UNCOMPRESSED_END - src + 1) > 255 ? 255 : (last_byte - UNCOMPRESSED_END - src + 1);
(void)best_k;
//hash = hashat(src);
fetch = fast_read(src, 3);
hash = hash_func(fetch);
c = state->hash_counter[hash];
offset2 = state->hash[hash].offset[0];
if(offset2 < src - MINOFFSET && c > 0 && ((fast_read(offset2, 3) ^ fetch) & 0xffffff) == 0)
{
matchlen = 3;
if(*(offset2 + matchlen) == *(src + matchlen))
{
matchlen = 4;
while(*(offset2 + matchlen) == *(src + matchlen) && matchlen < remaining)
matchlen++;
}
}
else
matchlen = 0;
for(k = 1; k < QLZ_POINTERS && c > k; k++)
{
o = state->hash[hash].offset[k];
#if QLZ_COMPRESSION_LEVEL == 3
if(((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
#elif QLZ_COMPRESSION_LEVEL == 2
if(*(src + matchlen) == *(o + matchlen) && ((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
#endif
{
m = 3;
while(*(o + m) == *(src + m) && m < remaining)
m++;
#if QLZ_COMPRESSION_LEVEL == 3
if ((m > matchlen) || (m == matchlen && o > offset2))
#elif QLZ_COMPRESSION_LEVEL == 2
if (m > matchlen)
#endif
{
offset2 = o;
matchlen = m;
best_k = k;
}
}
}
o = offset2;
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
c++;
state->hash_counter[hash] = c;
#if QLZ_COMPRESSION_LEVEL == 3
if(matchlen > 2 && src - o < 131071)
{
ui32 u;
size_t offset = src - o;
for(u = 1; u < matchlen; u++)
{
hash = hashat(src + u);
c = state->hash_counter[hash]++;
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src + u;
}
cword_val = (cword_val >> 1) | (1U << 31);
src += matchlen;
if(matchlen == 3 && offset <= 63)
{
*dst = (unsigned char)(offset << 2);
dst++;
}
else if (matchlen == 3 && offset <= 16383)
{
ui32 f = (ui32)((offset << 2) | 1);
fast_write(f, dst, 2);
dst += 2;
}
else if (matchlen <= 18 && offset <= 1023)
{
ui32 f = ((matchlen - 3) << 2) | ((ui32)offset << 6) | 2;
fast_write(f, dst, 2);
dst += 2;
}
else if(matchlen <= 33)
{
ui32 f = ((matchlen - 2) << 2) | ((ui32)offset << 7) | 3;
fast_write(f, dst, 3);
dst += 3;
}
else
{
ui32 f = ((matchlen - 3) << 7) | ((ui32)offset << 15) | 3;
fast_write(f, dst, 4);
dst += 4;
}
}
else
{
*dst = *src;
src++;
dst++;
cword_val = (cword_val >> 1);
}
#elif QLZ_COMPRESSION_LEVEL == 2
if(matchlen > 2)
{
cword_val = (cword_val >> 1) | (1U << 31);
src += matchlen;
if (matchlen < 10)
{
ui32 f = best_k | ((matchlen - 2) << 2) | (hash << 5);
fast_write(f, dst, 2);
dst += 2;
}
else
{
ui32 f = best_k | (matchlen << 16) | (hash << 5);
fast_write(f, dst, 3);
dst += 3;
}
}
else
{
*dst = *src;
src++;
dst++;
cword_val = (cword_val >> 1);
}
#endif
}
#endif
}
while (src <= last_byte)
{
if ((cword_val & 1) == 1)
{
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
cword_ptr = dst;
dst += CWORD_LEN;
cword_val = 1U << 31;
}
#if QLZ_COMPRESSION_LEVEL < 3
if (src <= last_byte - 3)
{
#if QLZ_COMPRESSION_LEVEL == 1
ui32 hash, fetch;
fetch = fast_read(src, 3);
hash = hash_func(fetch);
state->hash[hash].offset = CAST(src - OFFSET_BASE);
state->hash[hash].cache = fetch;
#elif QLZ_COMPRESSION_LEVEL == 2
ui32 hash;
unsigned char c;
hash = hashat(src);
c = state->hash_counter[hash];
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
c++;
state->hash_counter[hash] = c;
#endif
}
#endif
*dst = *src;
src++;
dst++;
cword_val = (cword_val >> 1);
}
while((cword_val & 1) != 1)
cword_val = (cword_val >> 1);
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
// min. size must be 9 bytes so that the qlz_size functions can take 9 bytes as argument
return dst - destination < 9 ? 9 : dst - destination;
}
static size_t qlz_decompress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_decompress *state, const unsigned char *history)
{
const unsigned char *src = source + qlz_size_header((const char *)source);
unsigned char *dst = destination;
const unsigned char *last_destination_byte = destination + size - 1;
ui32 cword_val = 1;
const unsigned char *last_matchstart = last_destination_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
unsigned char *last_hashed = destination - 1;
const unsigned char *last_source_byte = source + qlz_size_compressed((const char *)source) - 1;
static const ui32 bitlut[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
(void) last_source_byte;
(void) last_hashed;
(void) state;
(void) history;
for(;;)
{
ui32 fetch;
if (cword_val == 1)
{
#ifdef QLZ_MEMORY_SAFE
if(src + CWORD_LEN - 1 > last_source_byte)
return 0;
#endif
cword_val = fast_read(src, CWORD_LEN);
src += CWORD_LEN;
}
#ifdef QLZ_MEMORY_SAFE
if(src + 4 - 1 > last_source_byte)
return 0;
#endif
fetch = fast_read(src, 4);
if ((cword_val & 1) == 1)
{
ui32 matchlen;
const unsigned char *offset2;
#if QLZ_COMPRESSION_LEVEL == 1
ui32 hash;
cword_val = cword_val >> 1;
hash = (fetch >> 4) & 0xfff;
offset2 = (const unsigned char *)(size_t)state->hash[hash].offset;
if((fetch & 0xf) != 0)
{
matchlen = (fetch & 0xf) + 2;
src += 2;
}
else
{
matchlen = *(src + 2);
src += 3;
}
#elif QLZ_COMPRESSION_LEVEL == 2
ui32 hash;
unsigned char c;
cword_val = cword_val >> 1;
hash = (fetch >> 5) & 0x7ff;
c = (unsigned char)(fetch & 0x3);
offset2 = state->hash[hash].offset[c];
if((fetch & (28)) != 0)
{
matchlen = ((fetch >> 2) & 0x7) + 2;
src += 2;
}
else
{
matchlen = *(src + 2);
src += 3;
}
#elif QLZ_COMPRESSION_LEVEL == 3
ui32 offset;
cword_val = cword_val >> 1;
if ((fetch & 3) == 0)
{
offset = (fetch & 0xff) >> 2;
matchlen = 3;
src++;
}
else if ((fetch & 2) == 0)
{
offset = (fetch & 0xffff) >> 2;
matchlen = 3;
src += 2;
}
else if ((fetch & 1) == 0)
{
offset = (fetch & 0xffff) >> 6;
matchlen = ((fetch >> 2) & 15) + 3;
src += 2;
}
else if ((fetch & 127) != 3)
{
offset = (fetch >> 7) & 0x1ffff;
matchlen = ((fetch >> 2) & 0x1f) + 2;
src += 3;
}
else
{
offset = (fetch >> 15);
matchlen = ((fetch >> 7) & 255) + 3;
src += 4;
}
offset2 = dst - offset;
#endif
#ifdef QLZ_MEMORY_SAFE
if(offset2 < history || offset2 > dst - MINOFFSET - 1)
return 0;
if(matchlen > (ui32)(last_destination_byte - dst - UNCOMPRESSED_END + 1))
return 0;
#endif
memcpy_up(dst, offset2, matchlen);
dst += matchlen;
#if QLZ_COMPRESSION_LEVEL <= 2
update_hash_upto(state, &last_hashed, dst - matchlen);
last_hashed = dst - 1;
#endif
}
else
{
if (dst < last_matchstart)
{
unsigned int n = bitlut[cword_val & 0xf];
memcpy_up(dst, src, 4);
cword_val = cword_val >> n;
dst += n;
src += n;
#if QLZ_COMPRESSION_LEVEL <= 2
update_hash_upto(state, &last_hashed, dst - 3);
#endif
}
else
{
while(dst <= last_destination_byte)
{
if (cword_val == 1)
{
src += CWORD_LEN;
cword_val = 1U << 31;
}
#ifdef QLZ_MEMORY_SAFE
if(src >= last_source_byte + 1)
return 0;
#endif
*dst = *src;
dst++;
src++;
cword_val = cword_val >> 1;
}
#if QLZ_COMPRESSION_LEVEL <= 2
update_hash_upto(state, &last_hashed, last_destination_byte - 3); // todo, use constant
#endif
return size;
}
}
}
}
size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state)
{
size_t r;
ui32 compressed;
size_t base;
if(size == 0 || size > 0xffffffff - 400)
return 0;
if(size < 216)
base = 3;
else
base = 9;
#if QLZ_STREAMING_BUFFER > 0
if (state->stream_counter + size - 1 >= QLZ_STREAMING_BUFFER)
#endif
{
reset_table_compress(state);
r = base + qlz_compress_core((const unsigned char *)source, (unsigned char*)destination + base, size, state);
#if QLZ_STREAMING_BUFFER > 0
reset_table_compress(state);
#endif
if(r == base)
{
bench_memcpy(destination + base, source, size);
r = size + base;
compressed = 0;
}
else
{
compressed = 1;
}
state->stream_counter = 0;
}
#if QLZ_STREAMING_BUFFER > 0
else
{
unsigned char *src = state->stream_buffer + state->stream_counter;
bench_memcpy(src, source, size);
r = base + qlz_compress_core(src, (unsigned char*)destination + base, size, state);
if(r == base)
{
bench_memcpy(destination + base, src, size);
r = size + base;
compressed = 0;
reset_table_compress(state);
}
else
{
compressed = 1;
}
state->stream_counter += size;
}
#endif
if(base == 3)
{
*destination = (unsigned char)(0 | compressed);
*(destination + 1) = (unsigned char)r;
*(destination + 2) = (unsigned char)size;
}
else
{
*destination = (unsigned char)(2 | compressed);
fast_write((ui32)r, destination + 1, 4);
fast_write((ui32)size, destination + 5, 4);
}
*destination |= (QLZ_COMPRESSION_LEVEL << 2);
*destination |= (1 << 6);
*destination |= ((QLZ_STREAMING_BUFFER == 0 ? 0 : (QLZ_STREAMING_BUFFER == 100000 ? 1 : (QLZ_STREAMING_BUFFER == 1000000 ? 2 : 3))) << 4);
// 76543210
// 01SSLLHC
return r;
}
size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state)
{
size_t dsiz = qlz_size_decompressed(source);
#if QLZ_STREAMING_BUFFER > 0
if (state->stream_counter + qlz_size_decompressed(source) - 1 >= QLZ_STREAMING_BUFFER)
#endif
{
if((*source & 1) == 1)
{
reset_table_decompress(state);
dsiz = qlz_decompress_core((const unsigned char *)source, (unsigned char *)destination, dsiz, state, (const unsigned char *)destination);
}
else
{
bench_memcpy(destination, source + qlz_size_header(source), dsiz);
}
state->stream_counter = 0;
reset_table_decompress(state);
}
#if QLZ_STREAMING_BUFFER > 0
else
{
unsigned char *dst = state->stream_buffer + state->stream_counter;
if((*source & 1) == 1)
{
dsiz = qlz_decompress_core((const unsigned char *)source, dst, dsiz, state, (const unsigned char *)state->stream_buffer);
}
else
{
bench_memcpy(dst, source + qlz_size_header(source), dsiz);
reset_table_decompress(state);
}
bench_memcpy(destination, dst, dsiz);
state->stream_counter += dsiz;
}
#endif
return dsiz;
}

View file

@ -0,0 +1,164 @@
#ifndef QLZ_HEADER
#define QLZ_HEADER
#include <am.h>
#include <klib.h>
static inline void* bench_memcpy(void* dst, const void* src, size_t n){
assert(dst&&src);
const char* s;
char* d;
if(src+n>dst&&src<dst){
s=src+n;
d=dst+n;
while(n-->0)*--d=*--s;
}
else{
s=src;
d=dst;
while(n-->0)*d++=*s++;
}
return dst;
}
// Fast data compression library
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
// lar@quicklz.com
//
// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
// released into public must be open source) or under a commercial license if such
// has been acquired (see http://www.quicklz.com/order.html). The commercial license
// does not cover derived or ported versions created by third parties under GPL.
// You can edit following user settings. Data must be decompressed with the same
// setting of QLZ_COMPRESSION_LEVEL and QLZ_STREAMING_BUFFER as it was compressed
// (see manual). If QLZ_STREAMING_BUFFER > 0, scratch buffers must be initially
// zeroed out (see manual). First #ifndef makes it possible to define settings from
// the outside like the compiler command line.
// 1.5.0 final
#ifndef QLZ_COMPRESSION_LEVEL
// 1 gives fastest compression speed. 3 gives fastest decompression speed and best
// compression ratio.
//#define QLZ_COMPRESSION_LEVEL 1
//#define QLZ_COMPRESSION_LEVEL 2
//#define QLZ_COMPRESSION_LEVEL 3
#define QLZ_COMPRESSION_LEVEL 2
// If > 0, zero out both states prior to first call to qlz_compress() or qlz_decompress()
// and decompress packets in the same order as they were compressed
#define QLZ_STREAMING_BUFFER 0
//#define QLZ_STREAMING_BUFFER 100000
//#define QLZ_STREAMING_BUFFER 1000000
// Guarantees that decompression of corrupted data cannot crash. Decreases decompression
// speed 10-20%. Compression speed not affected.
//#define QLZ_MEMORY_SAFE
#endif
#define QLZ_VERSION_MAJOR 1
#define QLZ_VERSION_MINOR 5
#define QLZ_VERSION_REVISION 0
// Verify compression level
#if QLZ_COMPRESSION_LEVEL != 1 && QLZ_COMPRESSION_LEVEL != 2 && QLZ_COMPRESSION_LEVEL != 3
#error QLZ_COMPRESSION_LEVEL must be 1, 2 or 3
#endif
typedef unsigned int ui32;
typedef unsigned short int ui16;
// Decrease QLZ_POINTERS for level 3 to increase compression speed. Do not touch any other values!
#if QLZ_COMPRESSION_LEVEL == 1
#define QLZ_POINTERS 1
#define QLZ_HASH_VALUES 4096
#elif QLZ_COMPRESSION_LEVEL == 2
#define QLZ_POINTERS 4
#define QLZ_HASH_VALUES 2048
#elif QLZ_COMPRESSION_LEVEL == 3
#define QLZ_POINTERS 16
#define QLZ_HASH_VALUES 4096
#endif
// hash entry
typedef struct
{
#if QLZ_COMPRESSION_LEVEL == 1
ui32 cache;
#if defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
unsigned int offset;
#else
const unsigned char *offset;
#endif
#else
const unsigned char *offset[QLZ_POINTERS];
#endif
} qlz_hash_compress;
typedef struct
{
#if QLZ_COMPRESSION_LEVEL == 1
const unsigned char *offset;
#else
const unsigned char *offset[QLZ_POINTERS];
#endif
} qlz_hash_decompress;
// states
typedef struct
{
#if QLZ_STREAMING_BUFFER > 0
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
#endif
size_t stream_counter;
qlz_hash_compress hash[QLZ_HASH_VALUES];
unsigned char hash_counter[QLZ_HASH_VALUES];
} qlz_state_compress;
#if QLZ_COMPRESSION_LEVEL == 1 || QLZ_COMPRESSION_LEVEL == 2
typedef struct
{
#if QLZ_STREAMING_BUFFER > 0
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
#endif
qlz_hash_decompress hash[QLZ_HASH_VALUES];
unsigned char hash_counter[QLZ_HASH_VALUES];
size_t stream_counter;
} qlz_state_decompress;
#elif QLZ_COMPRESSION_LEVEL == 3
typedef struct
{
#if QLZ_STREAMING_BUFFER > 0
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
#endif
#if QLZ_COMPRESSION_LEVEL <= 2
qlz_hash_decompress hash[QLZ_HASH_VALUES];
#endif
size_t stream_counter;
} qlz_state_decompress;
#endif
#if defined (__cplusplus)
extern "C" {
#endif
// Public functions of QuickLZ
size_t qlz_size_decompressed(const char *source);
size_t qlz_size_compressed(const char *source);
size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state);
size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state);
int qlz_get_setting(int setting);
#if defined (__cplusplus)
}
#endif
#endif

View file

@ -0,0 +1,159 @@
/*
* Simple MD5 implementation (github.com/pod32g/md5)
*
*/
#include <benchmark.h>
static int N;
// Constants are the integer part of the sines of integers (in radians) * 2^32.
const uint32_t k[64] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee ,
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 ,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be ,
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 ,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa ,
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 ,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed ,
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a ,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c ,
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 ,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 ,
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 ,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 ,
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 ,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 ,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
// r specifies the per-round shift amounts
static const uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
static void to_bytes(uint32_t val, uint8_t *bytes)
{
bytes[0] = (uint8_t) val;
bytes[1] = (uint8_t) (val >> 8);
bytes[2] = (uint8_t) (val >> 16);
bytes[3] = (uint8_t) (val >> 24);
}
static uint32_t to_int32(const uint8_t *bytes)
{
return (uint32_t) bytes[0]
| ((uint32_t) bytes[1] << 8)
| ((uint32_t) bytes[2] << 16)
| ((uint32_t) bytes[3] << 24);
}
static void md5(uint8_t *msg, size_t initial_len, uint8_t *digest) {
// These vars will contain the hash
uint32_t h0, h1, h2, h3;
size_t new_len, offset;
uint32_t w[16];
uint32_t a, b, c, d, i, f, g, temp;
// Initialize variables - simple count in nibbles:
h0 = 0x67452301;
h1 = 0xefcdab89;
h2 = 0x98badcfe;
h3 = 0x10325476;
//Pre-processing:
//append "1" bit to message
//append "0" bits until message length in bits ≡ 448 (mod 512)
//append length mod (2^64) to message
for (new_len = initial_len + 1; new_len % (512/8) != 448/8; new_len++)
;
msg[initial_len] = 0x80; // append the "1" bit; most significant bit is "first"
for (offset = initial_len + 1; offset < new_len; offset++)
msg[offset] = 0; // append "0" bits
// append the len in bits at the end of the buffer.
to_bytes(initial_len*8, msg + new_len);
// initial_len>>29 == initial_len*8>>32, but avoids overflow.
to_bytes(initial_len>>29, msg + new_len + 4);
// Process the message in successive 512-bit chunks:
//for each 512-bit chunk of message:
for(offset=0; offset<new_len; offset += (512/8)) {
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
for (i = 0; i < 16; i++)
w[i] = to_int32(msg + offset + i*4);
// Initialize hash value for this chunk:
a = h0;
b = h1;
c = h2;
d = h3;
// Main loop:
for(i = 0; i<64; i++) {
if (i < 16) {
f = (b & c) | ((~b) & d);
g = i;
} else if (i < 32) {
f = (d & b) | ((~d) & c);
g = (5*i + 1) % 16;
} else if (i < 48) {
f = b ^ c ^ d;
g = (3*i + 5) % 16;
} else {
f = c ^ (b | (~d));
g = (7*i) % 16;
}
temp = d;
d = c;
c = b;
b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
a = temp;
}
// Add this chunk's hash to result so far:
h0 += a;
h1 += b;
h2 += c;
h3 += d;
}
//var char digest[16] := h0 append h1 append h2 append h3 //(Output is in little-endian)
to_bytes(h0, digest);
to_bytes(h1, digest + 4);
to_bytes(h2, digest + 8);
to_bytes(h3, digest + 12);
}
static uint8_t *str;
static uint8_t *digest;
void bench_md5_prepare() {
N = setting->size;
bench_srand(1);
str = bench_alloc(N);
for (int i = 0; i < N; i ++) {
str[i] = bench_rand();
}
digest = bench_alloc(16);
}
void bench_md5_run() {
md5(str, N, digest);
}
int bench_md5_validate() {
return checksum(digest, digest + 16) == setting->checksum;
}

View file

@ -0,0 +1,44 @@
#include <benchmark.h>
static int N, *data;
void bench_qsort_prepare() {
bench_srand(1);
N = setting->size;
data = bench_alloc(N * sizeof(int));
for (int i = 0; i < N; i ++) {
int a = bench_rand();
int b = bench_rand();
data[i] = (a << 16) | b;
}
}
static void swap(int *a, int *b) {
int t = *a;
*a = *b;
*b = t;
}
static void myqsort(int *a, int l, int r) {
if (l < r) {
int p = a[l], pivot = l, j;
for (j = l + 1; j < r; j ++) {
if (a[j] < p) {
swap(&a[++pivot], &a[j]);
}
}
swap(&a[pivot], &a[l]);
myqsort(a, l, pivot);
myqsort(a, pivot + 1, r);
}
}
void bench_qsort_run() {
myqsort(data, 0, N);
}
int bench_qsort_validate() {
return checksum(data, data + N) == setting->checksum;
}

View file

@ -0,0 +1,32 @@
#include <benchmark.h>
static unsigned int FULL;
static unsigned int dfs(unsigned int row, unsigned int ld, unsigned int rd) {
if (row == FULL) {
return 1;
} else {
unsigned int pos = FULL & (~(row | ld | rd)), ans = 0;
while (pos) {
unsigned int p = (pos & (~pos + 1));
pos -= p;
ans += dfs(row | p, (ld | p) << 1, (rd | p) >> 1);
}
return ans;
}
}
static unsigned int ans;
void bench_queen_prepare() {
ans = 0;
FULL = (1 << setting->size) - 1;
}
void bench_queen_run() {
ans = dfs(0, 0, 0);
}
int bench_queen_validate() {
return ans == setting->checksum;
}

View file

@ -0,0 +1,42 @@
#include <benchmark.h>
static int N;
static int ans;
static uint32_t *primes;
static inline int get(int n) {
return (primes[n >> 5] >> (n & 31)) & 1;
}
static inline void clear(int n) {
primes[n >> 5] &= ~(1ul << (n & 31));
}
void bench_sieve_prepare() {
N = setting->size;
primes = (uint32_t*)bench_alloc(N / 8 + 128);
for (int i = 0; i <= N / 32; i ++) {
primes[i] = 0xffffffff;
}
}
void bench_sieve_run() {
for (int i = 1; i <= N; i ++)
if (!get(i)) return;
for (int i = 2; i * i <= N; i ++) {
if (get(i)) {
for (int j = i + i; j <= N; j += i)
clear(j);
}
}
ans = 0;
for (int i = 2; i <= N; i ++)
if (get(i)) {
ans ++;
}
}
int bench_sieve_validate() {
return ans == setting->checksum;
}

View file

@ -0,0 +1,111 @@
// This is the Skew algorithm's reference implementation.
#include <benchmark.h>
static int N;
inline bool leq(int a1, int a2, int b1, int b2) { // lexic. order for pairs
return(a1 < b1 || (a1 == b1 && a2 <= b2));
} // and triples
inline bool leq(int a1, int a2, int a3, int b1, int b2, int b3) {
return(a1 < b1 || (a1 == b1 && leq(a2,a3, b2,b3)));
}
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
static void radixPass(int* a, int* b, int* r, int n, int K)
{ // count occurrences
int* c = (int*)bench_alloc(sizeof(int)*(K+1));
for (int i = 0; i <= K; i++) c[i] = 0; // reset counters
for (int i = 0; i < n; i++) c[r[a[i]]]++; // count occurences
for (int i = 0, sum = 0; i <= K; i++) { // exclusive prefix sums
int t = c[i]; c[i] = sum; sum += t;
}
for (int i = 0; i < n; i++) b[c[r[a[i]]]++] = a[i]; // sort
}
// find the suffix array SA of s[0..n-1] in {1..K}^n
// require s[n]=s[n+1]=s[n+2]=0, n>=2
void suffixArray(int* s, int* SA, int n, int K) {
int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2;
int* s12 = (int*)bench_alloc(sizeof(int)*(n02+3)); s12[n02]= s12[n02+1]= s12[n02+2]=0;
int* SA12 = (int*)bench_alloc(sizeof(int)*(n02+3)); SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
int* s0 = (int*)bench_alloc(sizeof(int)*n0);
int* SA0 = (int*)bench_alloc(sizeof(int)*n0);
// generate positions of mod 1 and mod 2 suffixes
// the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
for (int i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i;
// lsb radix sort the mod 1 and mod 2 triples
radixPass(s12 , SA12, s+2, n02, K);
radixPass(SA12, s12 , s+1, n02, K);
radixPass(s12 , SA12, s , n02, K);
// find lexicographic names of triples
int name = 0, c0 = -1, c1 = -1, c2 = -1;
for (int i = 0; i < n02; i++) {
if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) {
name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2];
}
if (SA12[i] % 3 == 1) { s12[SA12[i]/3] = name; } // left half
else { s12[SA12[i]/3 + n0] = name; } // right half
}
// recurse if names are not yet unique
if (name < n02) {
suffixArray(s12, SA12, n02, name);
// store unique names in s12 using the suffix array
for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1;
} else // generate the suffix array of s12 directly
for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i;
// stably sort the mod 0 suffixes from SA12 by their first character
for (int i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
radixPass(s0, SA0, s, n0, K);
// merge sorted SA0 suffixes and sorted SA12 suffixes
for (int p=0, t=n0-n1, k=0; k < n; k++) {
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
int i = GetI(); // pos of current offset 12 suffix
int j = SA0[p]; // pos of current offset 0 suffix
if (SA12[t] < n0 ?
leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]) :
leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
{ // suffix from SA12 is smaller
SA[k] = i; t++;
if (t == n02) { // done --- only SA0 suffixes left
for (k++; p < n0; p++, k++) SA[k] = SA0[p];
}
} else {
SA[k] = j; p++;
if (p == n0) { // done --- only SA12 suffixes left
for (k++; t < n02; t++, k++) SA[k] = GetI();
}
}
}
}
extern "C" {
static int *s, *sa;
void bench_ssort_prepare() {
N = setting->size;
bench_srand(1);
s = (int*)bench_alloc(sizeof(int)*(N+10));
sa = (int*)bench_alloc(sizeof(int)*(N+10));
for (int i = 0; i < N; i ++) {
s[i] = bench_rand() % 26;
}
}
void bench_ssort_run() {
suffixArray(s, sa, N, 26);
}
int bench_ssort_validate() {
return checksum(sa, sa + N) == setting->checksum;
}
}