rename projects
This commit is contained in:
parent
bdcb3ba65a
commit
304daa3501
28 changed files with 3 additions and 3 deletions
3
micro-bench/Makefile
Normal file
3
micro-bench/Makefile
Normal file
|
@ -0,0 +1,3 @@
|
|||
NAME = micro-bench
|
||||
SRCS = $(shell find -L ./src/ -name "*.c" -o -name "*.cc")
|
||||
include $(AM_HOME)/Makefile
|
113
micro-bench/include/benchmark.h
Normal file
113
micro-bench/include/benchmark.h
Normal file
|
@ -0,0 +1,113 @@
|
|||
#ifndef __BENCHMARK_H__
|
||||
#define __BENCHMARK_H__
|
||||
|
||||
#include <am.h>
|
||||
#include <klib.h>
|
||||
#include <klib-macros.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MB * 1024 * 1024
|
||||
#define KB * 1024
|
||||
|
||||
#define REF_CPU "i7-7700K @ 4.20GHz"
|
||||
#define REF_SCORE 100000
|
||||
|
||||
#define REPEAT 1
|
||||
|
||||
// size | heap | time | checksum
|
||||
#define QSORT_S { 100, 1 KB, 0, 0x08467105}
|
||||
#define QSORT_M { 30000, 128 KB, 0, 0xa3e99fe4}
|
||||
#define QSORT_L { 100000, 640 KB, 5114, 0xed8cff89}
|
||||
#define QUEEN_S { 8, 0 KB, 0, 0x0000005c}
|
||||
#define QUEEN_M { 11, 0 KB, 0, 0x00000a78}
|
||||
#define QUEEN_L { 12, 0 KB, 4707, 0x00003778}
|
||||
#define BF_S { 4, 32 KB, 0, 0xa6f0079e}
|
||||
#define BF_M { 25, 32 KB, 0, 0xa88f8a65}
|
||||
#define BF_L { 180, 32 KB, 23673, 0x9221e2b3}
|
||||
#define FIB_S { 2, 1 KB, 0, 0x7cfeddf0}
|
||||
#define FIB_M { 23, 16 KB, 0, 0x94ad8800}
|
||||
#define FIB_L { 91, 256 KB, 28318, 0xebdc5f80}
|
||||
#define SIEVE_S { 100, 1 KB, 0, 0x00000019}
|
||||
#define SIEVE_M { 200000, 32 KB, 0, 0x00004640}
|
||||
#define SIEVE_L {10000000, 2 MB, 39361, 0x000a2403}
|
||||
#define PZ15_S { 0, 1 KB, 0, 0x00000006}
|
||||
#define PZ15_M { 1, 256 KB, 0, 0x0000b0df}
|
||||
#define PZ15_L { 2, 2 MB, 4486, 0x00068b8c}
|
||||
#define DINIC_S { 10, 8 KB, 0, 0x0000019c}
|
||||
#define DINIC_M { 80, 512 KB, 0, 0x00004f99}
|
||||
#define DINIC_L { 128, 1 MB, 10882, 0x0000c248}
|
||||
#define LZIP_S { 128, 128 KB, 0, 0xe05fc832}
|
||||
#define LZIP_M { 50000, 1 MB, 0, 0xdc93e90c}
|
||||
#define LZIP_L { 1048576, 4 MB, 7593, 0x8d62c81f}
|
||||
#define SSORT_S { 100, 4 KB, 0, 0x4c555e09}
|
||||
#define SSORT_M { 10000, 512 KB, 0, 0x0db7909b}
|
||||
#define SSORT_L { 100000, 4 MB, 4504, 0x4f0ab431}
|
||||
#define MD5_S { 100, 1 KB, 0, 0xf902f28f}
|
||||
#define MD5_M { 200000, 256 KB, 0, 0xd4f9bc6d}
|
||||
#define MD5_L {10000000, 16 MB, 17239, 0x27286a42}
|
||||
|
||||
#define BENCHMARK_LIST(def) \
|
||||
def(qsort, "qsort", QSORT_S, QSORT_M, QSORT_L, "Quick sort") \
|
||||
def(queen, "queen", QUEEN_S, QUEEN_M, QUEEN_L, "Queen placement") \
|
||||
def( bf, "bf", BF_S, BF_M, BF_L, "Brainf**k interpreter") \
|
||||
def( fib, "fib", FIB_S, FIB_M, FIB_L, "Fibonacci number") \
|
||||
def(sieve, "sieve", SIEVE_S, SIEVE_M, SIEVE_L, "Eratosthenes sieve") \
|
||||
def( 15pz, "15pz", PZ15_S, PZ15_M, PZ15_L, "A* 15-puzzle search") \
|
||||
def(dinic, "dinic", DINIC_S, DINIC_M, DINIC_L, "Dinic's maxflow algorithm") \
|
||||
def( lzip, "lzip", LZIP_S, LZIP_M, LZIP_L, "Lzip compression") \
|
||||
def(ssort, "ssort", SSORT_S, SSORT_M, SSORT_L, "Suffix sort") \
|
||||
def( md5, "md5", MD5_S, MD5_M, MD5_L, "MD5 digest") \
|
||||
|
||||
// Each benchmark will run REPEAT times
|
||||
|
||||
#define DECL(_name, _sname, _s, _m, _l, _desc) \
|
||||
void bench_##_name##_prepare(); \
|
||||
void bench_##_name##_run(); \
|
||||
int bench_##_name##_validate();
|
||||
|
||||
BENCHMARK_LIST(DECL)
|
||||
|
||||
typedef struct Setting {
|
||||
int size;
|
||||
unsigned long mlim, ref;
|
||||
uint32_t checksum;
|
||||
} Setting;
|
||||
|
||||
typedef struct Benchmark {
|
||||
void (*prepare)();
|
||||
void (*run)();
|
||||
int (*validate)();
|
||||
const char *name, *desc;
|
||||
Setting settings[3];
|
||||
} Benchmark;
|
||||
|
||||
extern Benchmark *current;
|
||||
extern Setting *setting;
|
||||
|
||||
typedef struct Result {
|
||||
int pass;
|
||||
unsigned long tsc, msec;
|
||||
} Result;
|
||||
|
||||
void prepare(Result *res);
|
||||
void done(Result *res);
|
||||
|
||||
// memory allocation
|
||||
void* bench_alloc(size_t size);
|
||||
void bench_free(void *ptr);
|
||||
|
||||
// random number generator
|
||||
void bench_srand(uint32_t seed);
|
||||
uint32_t bench_rand(); // return a random number between 0..32767
|
||||
|
||||
// checksum
|
||||
uint32_t checksum(void *start, void *end);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
88
micro-bench/src/15pz/15pz.cc
Normal file
88
micro-bench/src/15pz/15pz.cc
Normal file
|
@ -0,0 +1,88 @@
|
|||
#include <benchmark.h>
|
||||
#include "puzzle.h"
|
||||
#include "heap.h"
|
||||
|
||||
const int N = 4;
|
||||
|
||||
static int PUZZLE_S[N*N] = {
|
||||
1, 2, 3, 4,
|
||||
5, 6, 7, 8,
|
||||
9, 10, 0, 11,
|
||||
13, 14, 15, 12,
|
||||
};
|
||||
|
||||
static int PUZZLE_M[N*N] = {
|
||||
1, 2, 3, 4,
|
||||
5, 6, 7, 8,
|
||||
12, 0, 14, 13,
|
||||
11, 15, 10, 9,
|
||||
};
|
||||
|
||||
static int PUZZLE_L[N*N] = {
|
||||
0, 2, 3, 4,
|
||||
9, 6, 7, 8,
|
||||
5, 11, 10, 12,
|
||||
1, 15, 13, 14,
|
||||
};
|
||||
|
||||
static int ans;
|
||||
|
||||
extern "C" {
|
||||
|
||||
void bench_15pz_prepare() {
|
||||
}
|
||||
|
||||
void bench_15pz_run() {
|
||||
N_puzzle<N> puzzle;
|
||||
int MAXN;
|
||||
|
||||
switch (setting->size) {
|
||||
case 0: puzzle = N_puzzle<N>(PUZZLE_S); MAXN = 10; break;
|
||||
case 1: puzzle = N_puzzle<N>(PUZZLE_M); MAXN = 2048; break;
|
||||
case 2: puzzle = N_puzzle<N>(PUZZLE_L); MAXN = 16384; break;
|
||||
default: assert(0);
|
||||
}
|
||||
assert(puzzle.solvable());
|
||||
|
||||
auto *heap = (Updatable_heap<N_puzzle<N>> *) bench_alloc(sizeof(Updatable_heap<N_puzzle<N>>));
|
||||
heap->init(MAXN);
|
||||
heap->push( puzzle, 0 );
|
||||
|
||||
int n = 0;
|
||||
ans = -1;
|
||||
|
||||
while( heap->size() != 0 && n != MAXN ) {
|
||||
N_puzzle<N> top = heap->pop();
|
||||
++n;
|
||||
|
||||
if ( top == N_puzzle<N>::solution() ) {
|
||||
// We are done
|
||||
ans = heap->length(top) * n;
|
||||
return;
|
||||
}
|
||||
|
||||
if ( top.tile_left_possible() ) {
|
||||
heap->push( top.tile_left(), heap->length( top ) + 1 );
|
||||
}
|
||||
|
||||
if ( top.tile_right_possible() ) {
|
||||
heap->push( top.tile_right(), heap->length( top ) + 1 );
|
||||
}
|
||||
|
||||
if ( top.tile_up_possible() ) {
|
||||
heap->push( top.tile_up(), heap->length( top ) + 1 );
|
||||
}
|
||||
|
||||
if ( top.tile_down_possible() ) {
|
||||
heap->push( top.tile_down(), heap->length( top ) + 1 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int bench_15pz_validate() {
|
||||
return (uint32_t)ans == setting->checksum;
|
||||
}
|
||||
|
||||
}
|
||||
|
227
micro-bench/src/15pz/heap.h
Normal file
227
micro-bench/src/15pz/heap.h
Normal file
|
@ -0,0 +1,227 @@
|
|||
// Author: Douglas Wilhelm Harder
|
||||
// Copyright (c) 2009 by Douglas Wilhelm Harder. All rights reserved.
|
||||
|
||||
template <typename T>
|
||||
T max(T a, T b) {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class Updatable_heap {
|
||||
private:
|
||||
int M;
|
||||
class Step;
|
||||
Step **hash_table;
|
||||
Step **heap;
|
||||
int heap_size;
|
||||
int maximum_heap_size;
|
||||
|
||||
void inline swap( int, int );
|
||||
void percolate_down();
|
||||
void percolate_up( int );
|
||||
Step *pointer( T const & ) const;
|
||||
|
||||
public:
|
||||
void init(int m);
|
||||
~Updatable_heap();
|
||||
T pop();
|
||||
void push( T const &, int );
|
||||
int size() const;
|
||||
int maximum_size() const;
|
||||
int length( T const & ) const;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class Updatable_heap<T>::Step {
|
||||
public:
|
||||
T element;
|
||||
Step *next;
|
||||
int heap_index;
|
||||
int path_length;
|
||||
int path_weight;
|
||||
bool visited;
|
||||
Step *previous_step;
|
||||
|
||||
void init( T const &, Step *, int, int );
|
||||
int length() const;
|
||||
int weight() const;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::init(int m) {
|
||||
M = m;
|
||||
heap = (Step **)bench_alloc(sizeof(void *) * M);
|
||||
hash_table = (Step **)bench_alloc(sizeof(void *) * (M + 1));
|
||||
|
||||
heap_size = 0;
|
||||
maximum_heap_size = 0;
|
||||
for ( int i = 0; i < M; ++i ) {
|
||||
hash_table[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Updatable_heap<T>::~Updatable_heap() {
|
||||
for ( int i = 0; i < M; ++i ) {
|
||||
Step *ptr = hash_table[i];
|
||||
|
||||
while ( ptr != 0 ) {
|
||||
Step *tmp = ptr;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T Updatable_heap<T>::pop() {
|
||||
if ( size() == 0 ) {
|
||||
return T();
|
||||
}
|
||||
|
||||
T top = heap[1]->element;
|
||||
|
||||
if ( size() == 1 ) {
|
||||
heap_size = 0;
|
||||
} else {
|
||||
assert( size() > 1 );
|
||||
|
||||
heap[1] = heap[size()];
|
||||
heap[1]->heap_index = 1;
|
||||
|
||||
--heap_size;
|
||||
percolate_down();
|
||||
}
|
||||
|
||||
return top;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void inline Updatable_heap<T>::swap( int i, int j ) {
|
||||
Step *tmp = heap[j];
|
||||
heap[j] = heap[i];
|
||||
heap[i] = tmp;
|
||||
|
||||
heap[i]->heap_index = i;
|
||||
heap[j]->heap_index = j;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::percolate_down() {
|
||||
int n = 1;
|
||||
|
||||
while ( 2*n + 1 <= size() ) {
|
||||
if ( heap[n]->weight() < heap[2*n]->weight() && heap[n]->weight() < heap[2*n + 1]->weight() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ( heap[2*n]->weight() < heap[2*n + 1]->weight() ) {
|
||||
swap( n, 2*n );
|
||||
n = 2*n;
|
||||
} else {
|
||||
assert( heap[2*n]->weight() >= heap[2*n + 1]->weight() );
|
||||
|
||||
swap( n, 2*n + 1 );
|
||||
n = 2*n + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( 2*n == size() && heap[2*n]->weight() < heap[n]->weight() ) {
|
||||
swap( n, 2*n );
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::percolate_up( int n ) {
|
||||
while ( n != 1 ) {
|
||||
int parent = n/2;
|
||||
|
||||
if ( heap[parent]->weight() > heap[n]->weight() ) {
|
||||
swap( parent, n );
|
||||
n = parent;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::push( T const &pz, int path_length ) {
|
||||
Step *ptr = pointer( pz );
|
||||
|
||||
if ( ptr == 0 ) {
|
||||
assert( heap_size <= M );
|
||||
++heap_size;
|
||||
|
||||
Step *ptr = (Step*)bench_alloc(sizeof(Step));
|
||||
ptr->init( pz, hash_table[pz.hash() & (M - 1)], size(), path_length );
|
||||
hash_table[pz.hash() & (M - 1)] = ptr;
|
||||
heap[size()] = ptr;
|
||||
|
||||
percolate_up( size() );
|
||||
|
||||
maximum_heap_size = max( maximum_heap_size, size() );
|
||||
} else {
|
||||
if ( !ptr->visited ) {
|
||||
if ( path_length + ptr->element.lower_bound() < ptr->weight() ) {
|
||||
ptr->path_weight = path_length + ptr->element.lower_bound();
|
||||
percolate_up( ptr->heap_index );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::size() const {
|
||||
return heap_size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::maximum_size() const {
|
||||
return maximum_heap_size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::length( T const &pz ) const {
|
||||
Step *ptr = pointer( pz );
|
||||
|
||||
return ( ptr == 0 ) ? 2147483647 : ptr->length();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename Updatable_heap<T>::Step *Updatable_heap<T>::pointer( T const &pz ) const {
|
||||
for ( Step *ptr = hash_table[pz.hash() & (M - 1)]; ptr != 0; ptr = ptr->next ) {
|
||||
if ( ptr->element == pz ) {
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/****************************************************
|
||||
* ************************************************ *
|
||||
* * Iterator * *
|
||||
* ************************************************ *
|
||||
****************************************************/
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::Step::init( T const &pz, Step *n, int hi, int dist ) {
|
||||
element = pz;
|
||||
next = n;
|
||||
heap_index = hi;
|
||||
path_length = dist;
|
||||
path_weight = dist + element.lower_bound();
|
||||
visited = false;
|
||||
previous_step = 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::Step::length() const {
|
||||
return path_length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::Step::weight() const {
|
||||
return path_weight;
|
||||
}
|
||||
|
475
micro-bench/src/15pz/puzzle.h
Normal file
475
micro-bench/src/15pz/puzzle.h
Normal file
|
@ -0,0 +1,475 @@
|
|||
// Author: Douglas Wilhelm Harder
|
||||
// Copyright (c) 2009 by Douglas Wilhelm Harder. All rights reserved.
|
||||
// Url: https://ece.uwaterloo.ca/~dwharder/aads/Algorithms/N_puzzles/
|
||||
|
||||
template <int N>
|
||||
class N_puzzle {
|
||||
private:
|
||||
bool puzzle_valid;
|
||||
uint8_t zero_i, zero_j;
|
||||
int8_t manhattan_distance;
|
||||
int8_t puzzle[N][N];
|
||||
int hash_value;
|
||||
|
||||
void determine_hash();
|
||||
|
||||
static int abs( int n ) { return ( n < 0 ) ? -n : n; }
|
||||
|
||||
public:
|
||||
N_puzzle();
|
||||
N_puzzle( int array[N*N] );
|
||||
N_puzzle( N_puzzle const & );
|
||||
N_puzzle &operator=( N_puzzle const & );
|
||||
|
||||
bool solvable() const;
|
||||
bool valid() const;
|
||||
int lower_bound() const;
|
||||
unsigned int hash() const;
|
||||
|
||||
bool tile_up_possible() const;
|
||||
bool tile_down_possible() const;
|
||||
bool tile_left_possible() const;
|
||||
bool tile_right_possible() const;
|
||||
|
||||
N_puzzle tile_up() const;
|
||||
N_puzzle tile_down() const;
|
||||
N_puzzle tile_left() const;
|
||||
N_puzzle tile_right() const;
|
||||
|
||||
bool operator==( N_puzzle const & ) const;
|
||||
bool operator!=( N_puzzle const & ) const;
|
||||
|
||||
N_puzzle static solution();
|
||||
};
|
||||
|
||||
template < int N >
|
||||
N_puzzle<N>::N_puzzle():
|
||||
puzzle_valid( true ),
|
||||
manhattan_distance( 0 ) {
|
||||
int array[N*N];
|
||||
|
||||
for ( int i = 0; i < N*N; ++i ) {
|
||||
array[i] = i;
|
||||
}
|
||||
|
||||
int n = 0;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
int k = bench_rand() % (N*N - n);
|
||||
puzzle[i][j] = array[k];
|
||||
|
||||
if ( array[k] == 0 ) {
|
||||
zero_i = i;
|
||||
zero_j = j;
|
||||
} else {
|
||||
manhattan_distance += abs( ((array[k] - 1) / N) - i );
|
||||
manhattan_distance += abs( ((array[k] - 1) % N) - j );
|
||||
}
|
||||
|
||||
++n;
|
||||
array[k] = array[N*N - n];
|
||||
}
|
||||
}
|
||||
|
||||
determine_hash();
|
||||
}
|
||||
|
||||
template < int N >
|
||||
N_puzzle<N>::N_puzzle( int array[N*N] ):
|
||||
puzzle_valid( true ),
|
||||
manhattan_distance( 0 ) {
|
||||
bool check[N*N];
|
||||
|
||||
for ( int i = 0; i < N*N; ++i ) {
|
||||
check[i] = false;
|
||||
}
|
||||
|
||||
int n = 0;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
puzzle[i][j] = array[n];
|
||||
check[array[n]] = true;
|
||||
|
||||
if ( array[n] == 0 ) {
|
||||
zero_i = i;
|
||||
zero_j = j;
|
||||
} else {
|
||||
manhattan_distance += abs( ((array[n] - 1) / N) - i );
|
||||
manhattan_distance += abs( ((array[n] - 1) % N) - j );
|
||||
}
|
||||
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
for ( int i = 0; i < N*N; ++i ) {
|
||||
if ( !check[i] ) {
|
||||
puzzle_valid = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
determine_hash();
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine a hash value for the puzzle.
|
||||
*/
|
||||
|
||||
template < int N >
|
||||
void N_puzzle<N>::determine_hash() {
|
||||
hash_value = 0;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
hash_value = hash_value*1973 + puzzle[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template < int N >
|
||||
N_puzzle<N>::N_puzzle( N_puzzle const &pz ):
|
||||
puzzle_valid( pz.puzzle_valid ),
|
||||
zero_i( pz.zero_i ),
|
||||
zero_j( pz.zero_j ),
|
||||
manhattan_distance( pz.manhattan_distance ),
|
||||
hash_value( pz.hash_value ) {
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
puzzle[i][j] = pz.puzzle[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template < int N >
|
||||
N_puzzle<N> &N_puzzle<N>::operator=( N_puzzle const &rhs ) {
|
||||
puzzle_valid = rhs.puzzle_valid;
|
||||
zero_i = rhs.zero_i;
|
||||
zero_j = rhs.zero_j;
|
||||
manhattan_distance = rhs.manhattan_distance;
|
||||
hash_value = rhs.hash_value;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
puzzle[i][j] = rhs.puzzle[i][j];
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Moving a tile up is possible as long as
|
||||
* the blank is not in the last row.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::tile_up_possible() const {
|
||||
return puzzle_valid && (zero_i != N - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Moving a tile down is possible as long as
|
||||
* the blank is not in the first row.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::tile_down_possible() const {
|
||||
return puzzle_valid && (zero_i != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Moving a tile left is possible as long as
|
||||
* the blank is not in the last column.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::tile_left_possible() const {
|
||||
return puzzle_valid && (zero_j != N - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Moving a tile right is possible as long as
|
||||
* the blank is not in the first column.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::tile_right_possible() const {
|
||||
return puzzle_valid && (zero_j != 0);
|
||||
}
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::tile_up() const {
|
||||
if ( !puzzle_valid ) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
N_puzzle result( *this );
|
||||
|
||||
if ( zero_i == N - 1 ) {
|
||||
result.puzzle_valid = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
result.manhattan_distance +=
|
||||
abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - zero_i ) -
|
||||
abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - (zero_i + 1) );
|
||||
|
||||
result.puzzle[zero_i][zero_j] = puzzle[zero_i + 1][zero_j];
|
||||
++result.zero_i;
|
||||
result.puzzle[result.zero_i][zero_j] = 0;
|
||||
|
||||
result.determine_hash();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::tile_down() const {
|
||||
if ( !puzzle_valid ) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
N_puzzle result( *this );
|
||||
|
||||
if ( zero_i == 0 ) {
|
||||
result.puzzle_valid = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
result.manhattan_distance +=
|
||||
abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - zero_i ) -
|
||||
abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - (zero_i - 1) );
|
||||
|
||||
result.puzzle[zero_i][zero_j] = puzzle[zero_i - 1][zero_j];
|
||||
--result.zero_i;
|
||||
result.puzzle[result.zero_i][zero_j] = 0;
|
||||
|
||||
result.determine_hash();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::tile_left() const {
|
||||
if ( !puzzle_valid ) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
N_puzzle result( *this );
|
||||
|
||||
if ( zero_j == N - 1 ) {
|
||||
result.puzzle_valid = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
result.manhattan_distance +=
|
||||
abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - zero_j ) -
|
||||
abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - (zero_j + 1) );
|
||||
|
||||
result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j + 1];
|
||||
++result.zero_j;
|
||||
result.puzzle[zero_i][result.zero_j] = 0;
|
||||
|
||||
result.determine_hash();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::tile_right() const {
|
||||
if ( !puzzle_valid ) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
N_puzzle result( *this );
|
||||
|
||||
if ( zero_j == 0 ) {
|
||||
result.puzzle_valid = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
result.manhattan_distance +=
|
||||
abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - zero_j ) -
|
||||
abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - (zero_j - 1) );
|
||||
|
||||
result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j - 1];
|
||||
--result.zero_j;
|
||||
result.puzzle[zero_i][result.zero_j] = 0;
|
||||
|
||||
result.determine_hash();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the puzzle is solvable: that is, check the
|
||||
* number of inversions pluse the Manhattan distance of
|
||||
* the black from the lower-right corner.
|
||||
*
|
||||
* Run time: O(n^2)
|
||||
* Memory: O(n)
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::solvable() const {
|
||||
if ( !valid() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int entries[N*N];
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
if ( puzzle[i][j] == 0 ) {
|
||||
entries[N*i + j] = N*N;
|
||||
} else {
|
||||
entries[N*i + j] = puzzle[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int parity = 0;
|
||||
|
||||
for ( int i = 0; i < N*N; ++i ) {
|
||||
for ( int j = i + 1; j < N*N; ++j ) {
|
||||
if ( entries[i] > entries[j] ) {
|
||||
++parity;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parity += 2*N - 2 - zero_i - zero_j;
|
||||
|
||||
return ( (parity & 1) == 0 );
|
||||
}
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::valid() const {
|
||||
return puzzle_valid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return either the Manhattan, Hamming, or discrete distance
|
||||
* between the puzzle and the solution.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
int N_puzzle<N>::lower_bound() const {
|
||||
// The Manhattan distance
|
||||
return valid() ? manhattan_distance : N*N*N;
|
||||
|
||||
int result = 0;
|
||||
int count = 1;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
if ( puzzle[i][j] != (count % N*N) ) {
|
||||
++result;
|
||||
}
|
||||
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
// The Hamming distance, or
|
||||
return result;
|
||||
|
||||
// The discrete distance: converts the A* search to Dijkstra's algorithm
|
||||
// return ( result == 0 ) ? 0 : 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* puzzle1 == puzzle2
|
||||
*
|
||||
* Two puzzles are considered to be equal if their entries
|
||||
* are equal:
|
||||
* If either puzzle is not valid, return false.
|
||||
* If the hash values are different, they are different; return false.
|
||||
* Otherwise, check all entries to see if they are the same.
|
||||
*/
|
||||
|
||||
template < int N >
|
||||
bool N_puzzle<N>::operator==( N_puzzle const &rhs ) const {
|
||||
if ( !valid() || !rhs.valid() || hash() != rhs.hash() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
if ( puzzle[i][j] != rhs.puzzle[i][j] ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* puzzle1 != puzzle2
|
||||
*
|
||||
* Two puzzles are considered to be unequal if any of the entries
|
||||
* different:
|
||||
* If either puzzle is not valid, return false.
|
||||
* If the hash values are different, they are different; return true.
|
||||
* Otherwise, check all entries to see if they are the same.
|
||||
*/
|
||||
|
||||
template < int N >
|
||||
bool N_puzzle<N>::operator!=( N_puzzle const &rhs ) const {
|
||||
if ( !valid() || !rhs.valid() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( hash() != rhs.hash() ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
if ( puzzle[i][j] != rhs.puzzle[i][j] ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* unsigned int hash() const
|
||||
*
|
||||
* Returns the pre-calculated hash value.
|
||||
*/
|
||||
|
||||
template < int N >
|
||||
unsigned int N_puzzle<N>::hash() const {
|
||||
return valid() ? hash_value : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* N_puzzle<N> solution()
|
||||
*
|
||||
* Returns the correct solution to the N puzzle:
|
||||
*
|
||||
* 1 2 3 1 2 3 4
|
||||
* 3x3: 4 5 6 4x4: 5 6 7 8
|
||||
* 7 8 9 10 11 12
|
||||
* 13 14 15
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::solution() {
|
||||
int array[N*N];
|
||||
|
||||
for ( int i = 0; i < N*N - 1; ++i ) {
|
||||
array[i] = i + 1;
|
||||
}
|
||||
|
||||
array[N*N - 1] = 0;
|
||||
|
||||
return N_puzzle<N>( array );
|
||||
}
|
||||
|
181
micro-bench/src/bench.c
Normal file
181
micro-bench/src/bench.c
Normal file
|
@ -0,0 +1,181 @@
|
|||
#include <am.h>
|
||||
#include <benchmark.h>
|
||||
#include <limits.h>
|
||||
#include <klib-macros.h>
|
||||
|
||||
Benchmark *current;
|
||||
Setting *setting;
|
||||
|
||||
static char *hbrk;
|
||||
|
||||
static uint32_t uptime_ms() { return io_read(AM_TIMER_UPTIME).us / 1000; }
|
||||
|
||||
// The benchmark list
|
||||
|
||||
#define ENTRY(_name, _sname, _s, _m, _l, _desc) \
|
||||
{ .prepare = bench_##_name##_prepare, \
|
||||
.run = bench_##_name##_run, \
|
||||
.validate = bench_##_name##_validate, \
|
||||
.name = _sname, \
|
||||
.desc = _desc, \
|
||||
.settings = {_s, _m, _l}, },
|
||||
|
||||
Benchmark benchmarks[] = {
|
||||
BENCHMARK_LIST(ENTRY)
|
||||
};
|
||||
|
||||
// Running a benchmark
|
||||
static void bench_prepare(Result *res) {
|
||||
res->msec = uptime_ms();
|
||||
}
|
||||
|
||||
static void bench_reset() {
|
||||
hbrk = (void *)ROUNDUP(heap.start, 8);
|
||||
}
|
||||
|
||||
static void bench_done(Result *res) {
|
||||
res->msec = uptime_ms() - res->msec;
|
||||
}
|
||||
|
||||
static const char *bench_check(Benchmark *bench) {
|
||||
uintptr_t freesp = (uintptr_t)heap.end - (uintptr_t)heap.start;
|
||||
if (freesp < setting->mlim) {
|
||||
return "(insufficient memory)";
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void run_once(Benchmark *b, Result *res) {
|
||||
bench_reset(); // reset malloc state
|
||||
current->prepare(); // call bechmark's prepare function
|
||||
bench_prepare(res); // clean everything, start timer
|
||||
current->run(); // run it
|
||||
bench_done(res); // collect results
|
||||
res->pass = current->validate();
|
||||
}
|
||||
|
||||
static unsigned long score(Benchmark *b, unsigned long tsc, unsigned long msec) {
|
||||
if (msec == 0) return 0;
|
||||
return (REF_SCORE / 1000) * setting->ref / msec;
|
||||
}
|
||||
|
||||
int main(const char *args) {
|
||||
const char *setting_name = args;
|
||||
if (args == NULL || strcmp(args, "") == 0) {
|
||||
printf("Empty mainargs. Use \"ref\" by default\n");
|
||||
setting_name = "ref";
|
||||
}
|
||||
int setting_id = -1;
|
||||
|
||||
if (strcmp(setting_name, "test" ) == 0) setting_id = 0;
|
||||
else if (strcmp(setting_name, "train") == 0) setting_id = 1;
|
||||
else if (strcmp(setting_name, "ref" ) == 0) setting_id = 2;
|
||||
else {
|
||||
printf("Invalid mainargs: \"%s\"; "
|
||||
"must be in {test, train, ref}\n", setting_name);
|
||||
halt(1);
|
||||
}
|
||||
|
||||
ioe_init();
|
||||
|
||||
printf("======= Running MicroBench [input *%s*] =======\n", setting_name);
|
||||
|
||||
unsigned long bench_score = 0;
|
||||
int pass = 1;
|
||||
uint32_t t0 = uptime_ms();
|
||||
|
||||
for (int i = 0; i < LENGTH(benchmarks); i ++) {
|
||||
Benchmark *bench = &benchmarks[i];
|
||||
current = bench;
|
||||
setting = &bench->settings[setting_id];
|
||||
const char *msg = bench_check(bench);
|
||||
printf("[%s] %s: ", bench->name, bench->desc);
|
||||
if (msg != NULL) {
|
||||
printf("Ignored %s\n", msg);
|
||||
} else {
|
||||
unsigned long msec = ULONG_MAX;
|
||||
int succ = 1;
|
||||
for (int i = 0; i < REPEAT; i ++) {
|
||||
Result res;
|
||||
run_once(bench, &res);
|
||||
printf(res.pass ? "*" : "X");
|
||||
succ &= res.pass;
|
||||
if (res.msec < msec) msec = res.msec;
|
||||
}
|
||||
|
||||
if (succ) printf(" Passed.");
|
||||
else printf(" Failed.");
|
||||
|
||||
pass &= succ;
|
||||
|
||||
unsigned long cur = score(bench, 0, msec);
|
||||
|
||||
printf("\n");
|
||||
if (setting_id != 0) {
|
||||
printf(" min time: %d ms [%d]\n", (unsigned int)msec, (unsigned int)cur);
|
||||
}
|
||||
|
||||
bench_score += cur;
|
||||
}
|
||||
}
|
||||
uint32_t t1 = uptime_ms();
|
||||
|
||||
bench_score /= LENGTH(benchmarks);
|
||||
|
||||
printf("==================================================\n");
|
||||
printf("MicroBench %s", pass ? "PASS" : "FAIL");
|
||||
if (setting_id == 2) {
|
||||
printf(" %d Marks\n", (unsigned int)bench_score);
|
||||
printf(" vs. %d Marks (%s)\n", REF_SCORE, REF_CPU);
|
||||
} else {
|
||||
printf("\n");
|
||||
}
|
||||
printf("Total time: %d ms\n", t1 - t0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Libraries
|
||||
|
||||
void* bench_alloc(size_t size) {
|
||||
size = (size_t)ROUNDUP(size, 8);
|
||||
char *old = hbrk;
|
||||
hbrk += size;
|
||||
assert((uintptr_t)heap.start <= (uintptr_t)hbrk && (uintptr_t)hbrk < (uintptr_t)heap.end);
|
||||
for (uint64_t *p = (uint64_t *)old; p != (uint64_t *)hbrk; p ++) {
|
||||
*p = 0;
|
||||
}
|
||||
assert((uintptr_t)hbrk - (uintptr_t)heap.start <= setting->mlim);
|
||||
return old;
|
||||
}
|
||||
|
||||
void bench_free(void *ptr) {
|
||||
}
|
||||
|
||||
static uint32_t seed = 1;
|
||||
|
||||
void bench_srand(uint32_t _seed) {
|
||||
seed = _seed & 0x7fff;
|
||||
}
|
||||
|
||||
uint32_t bench_rand() {
|
||||
seed = (seed * (uint32_t)214013L + (uint32_t)2531011L);
|
||||
return (seed >> 16) & 0x7fff;
|
||||
}
|
||||
|
||||
// FNV hash
|
||||
uint32_t checksum(void *start, void *end) {
|
||||
const uint32_t x = 16777619;
|
||||
uint32_t h1 = 2166136261u;
|
||||
for (uint8_t *p = (uint8_t*)start; p + 4 < (uint8_t*)end; p += 4) {
|
||||
for (int i = 0; i < 4; i ++) {
|
||||
h1 = (h1 ^ p[i]) * x;
|
||||
}
|
||||
}
|
||||
int32_t hash = (uint32_t)h1;
|
||||
hash += hash << 13;
|
||||
hash ^= hash >> 7;
|
||||
hash += hash << 3;
|
||||
hash ^= hash >> 17;
|
||||
hash += hash << 5;
|
||||
return hash;
|
||||
}
|
151
micro-bench/src/bf/bf.c
Normal file
151
micro-bench/src/bf/bf.c
Normal file
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
Brainfuck-C ( http://github.com/kgabis/brainfuck-c )
|
||||
Copyright (c) 2012 Krzysztof Gabis
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <benchmark.h>
|
||||
|
||||
static int ARR_SIZE;
|
||||
|
||||
#define CODE ">>+>>>>>,[>+>>,]>+[--[+<<<-]<[<+>-]<[<[->[<<<+>>>>+<-]<<[>>+>[->]<<[<]" \
|
||||
"<-]>]>>>+<[[-]<[>+<-]<]>[[>>>]+<<<-<[<<[<<<]>>+>[>>>]<-]<<[<<<]>[>>[>>" \
|
||||
">]<+<<[<<<]>-]]+<<<]+[->>>]>>]>>[.>>>]"
|
||||
|
||||
#define OP_END 0
|
||||
#define OP_INC_DP 1
|
||||
#define OP_DEC_DP 2
|
||||
#define OP_INC_VAL 3
|
||||
#define OP_DEC_VAL 4
|
||||
#define OP_OUT 5
|
||||
#define OP_IN 6
|
||||
#define OP_JMP_FWD 7
|
||||
#define OP_JMP_BCK 8
|
||||
|
||||
#define SUCCESS 0
|
||||
#define FAILURE 1
|
||||
|
||||
#define PROGRAM_SIZE 4096
|
||||
#define STACK_SIZE 512
|
||||
#define DATA_SIZE 4096
|
||||
|
||||
#define STACK_PUSH(A) (STACK[SP++] = A)
|
||||
#define STACK_POP() (STACK[--SP])
|
||||
#define STACK_EMPTY() (SP == 0)
|
||||
#define STACK_FULL() (SP == STACK_SIZE)
|
||||
|
||||
struct instruction_t {
|
||||
unsigned short operator;
|
||||
unsigned short operand;
|
||||
};
|
||||
|
||||
static struct instruction_t *PROGRAM;
|
||||
static unsigned short *STACK;
|
||||
static unsigned int SP;
|
||||
static const char *code;
|
||||
static char *input;
|
||||
|
||||
static int compile_bf() {
|
||||
unsigned short pc = 0, jmp_pc;
|
||||
for (; *code; code ++) {
|
||||
int c = *code;
|
||||
if (pc >= PROGRAM_SIZE) break;
|
||||
switch (c) {
|
||||
case '>': PROGRAM[pc].operator = OP_INC_DP; break;
|
||||
case '<': PROGRAM[pc].operator = OP_DEC_DP; break;
|
||||
case '+': PROGRAM[pc].operator = OP_INC_VAL; break;
|
||||
case '-': PROGRAM[pc].operator = OP_DEC_VAL; break;
|
||||
case '.': PROGRAM[pc].operator = OP_OUT; break;
|
||||
case ',': PROGRAM[pc].operator = OP_IN; break;
|
||||
case '[':
|
||||
PROGRAM[pc].operator = OP_JMP_FWD;
|
||||
if (STACK_FULL()) {
|
||||
return FAILURE;
|
||||
}
|
||||
STACK_PUSH(pc);
|
||||
break;
|
||||
case ']':
|
||||
if (STACK_EMPTY()) {
|
||||
return FAILURE;
|
||||
}
|
||||
jmp_pc = STACK_POP();
|
||||
PROGRAM[pc].operator = OP_JMP_BCK;
|
||||
PROGRAM[pc].operand = jmp_pc;
|
||||
PROGRAM[jmp_pc].operand = pc;
|
||||
break;
|
||||
default: pc--; break;
|
||||
}
|
||||
pc++;
|
||||
}
|
||||
if (!STACK_EMPTY() || pc == PROGRAM_SIZE) {
|
||||
return FAILURE;
|
||||
}
|
||||
PROGRAM[pc].operator = OP_END;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
static unsigned short *data;
|
||||
static char *output;
|
||||
static int noutput;
|
||||
|
||||
static void execute_bf() {
|
||||
unsigned int pc = 0, ptr = 0;
|
||||
while (PROGRAM[pc].operator != OP_END && ptr < DATA_SIZE) {
|
||||
switch (PROGRAM[pc].operator) {
|
||||
case OP_INC_DP: ptr++; break;
|
||||
case OP_DEC_DP: ptr--; break;
|
||||
case OP_INC_VAL: data[ptr]++; break;
|
||||
case OP_DEC_VAL: data[ptr]--; break;
|
||||
case OP_OUT: output[noutput ++] = data[ptr]; break;
|
||||
case OP_IN: data[ptr] = *(input ++); break;
|
||||
case OP_JMP_FWD: if(!data[ptr]) { pc = PROGRAM[pc].operand; } break;
|
||||
case OP_JMP_BCK: if(data[ptr]) { pc = PROGRAM[pc].operand; } break;
|
||||
default: return;
|
||||
}
|
||||
pc++;
|
||||
}
|
||||
}
|
||||
|
||||
void bench_bf_prepare() {
|
||||
ARR_SIZE = setting->size;
|
||||
SP = 0;
|
||||
PROGRAM = bench_alloc(sizeof(PROGRAM[0]) * PROGRAM_SIZE);
|
||||
STACK = bench_alloc(sizeof(STACK[0]) * STACK_SIZE);
|
||||
data = bench_alloc(sizeof(data[0]) * DATA_SIZE);
|
||||
code = CODE;
|
||||
input = bench_alloc(ARR_SIZE + 1);
|
||||
output = bench_alloc(DATA_SIZE);
|
||||
noutput = 0;
|
||||
|
||||
bench_srand(1);
|
||||
for (int i = 0; i < ARR_SIZE; i ++) {
|
||||
input[i] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"[bench_rand() % 62];
|
||||
}
|
||||
}
|
||||
|
||||
void bench_bf_run() {
|
||||
compile_bf();
|
||||
execute_bf();
|
||||
}
|
||||
|
||||
int bench_bf_validate() {
|
||||
uint32_t cs = checksum(output, output + noutput);
|
||||
return noutput == ARR_SIZE && cs == setting->checksum;
|
||||
}
|
138
micro-bench/src/dinic/dinic.cc
Normal file
138
micro-bench/src/dinic/dinic.cc
Normal file
|
@ -0,0 +1,138 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
static int N;
|
||||
const int INF = 0x3f3f3f;
|
||||
|
||||
struct Edge {
|
||||
int from, to, cap, flow;
|
||||
Edge(){}
|
||||
Edge(int from, int to, int cap, int flow) {
|
||||
this->from = from;
|
||||
this->to = to;
|
||||
this->cap = cap;
|
||||
this->flow = flow;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
static inline T min(T x, T y) {
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
struct Dinic {
|
||||
int n, m, s, t;
|
||||
Edge *edges;
|
||||
int *head, *nxt, *d, *cur, *queue;
|
||||
bool *vis;
|
||||
|
||||
void init(int n) {
|
||||
int nold = (n - 2) / 2;
|
||||
int maxm = (nold * nold + nold * 2) * 2;
|
||||
|
||||
edges = (Edge *)bench_alloc(sizeof(Edge) * maxm);
|
||||
head = (int *)bench_alloc(sizeof(int) * n);
|
||||
nxt = (int *)bench_alloc(sizeof(int) * maxm);
|
||||
vis = (bool *)bench_alloc(sizeof(bool) * n);
|
||||
d = (int *)bench_alloc(sizeof(int) * n);
|
||||
cur = (int *)bench_alloc(sizeof(int) * n);
|
||||
queue = (int *)bench_alloc(sizeof(int) * n);
|
||||
|
||||
this->n = n;
|
||||
for (int i = 0; i < n; i ++) {
|
||||
head[i] = -1;
|
||||
}
|
||||
m = 0;
|
||||
}
|
||||
|
||||
void AddEdge(int u, int v, int c) {
|
||||
if (c == 0) return;
|
||||
edges[m] = Edge(u, v, c, 0);
|
||||
nxt[m] = head[u];
|
||||
head[u] = m++;
|
||||
edges[m] = Edge(v, u, 0, 0);
|
||||
nxt[m] = head[v];
|
||||
head[v] = m++;
|
||||
}
|
||||
|
||||
bool BFS() {
|
||||
for (int i = 0; i < n; i ++) vis[i] = 0;
|
||||
int qf = 0, qr = 0;
|
||||
queue[qr ++] = s;
|
||||
d[s] = 0;
|
||||
vis[s] = 1;
|
||||
while (qf != qr) {
|
||||
int x = queue[qf ++];
|
||||
for (int i = head[x]; i != -1; i = nxt[i]) {
|
||||
Edge& e = edges[i];
|
||||
if (!vis[e.to] && e.cap > e.flow) {
|
||||
vis[e.to] = 1;
|
||||
d[e.to] = d[x] + 1;
|
||||
queue[qr ++] = e.to;
|
||||
}
|
||||
}
|
||||
}
|
||||
return vis[t];
|
||||
}
|
||||
|
||||
int DFS(int x, int a) {
|
||||
if (x == t || a == 0) return a;
|
||||
int flow = 0, f;
|
||||
for (int i = cur[x]; i != -1; i = nxt[i]) {
|
||||
Edge& e = edges[i];
|
||||
if (d[x] + 1 == d[e.to] && (f = DFS(e.to, min(a, e.cap-e.flow))) > 0) {
|
||||
e.flow += f;
|
||||
edges[i^1].flow -= f;
|
||||
flow += f;
|
||||
a -= f;
|
||||
if (a == 0) break;
|
||||
}
|
||||
}
|
||||
return flow;
|
||||
}
|
||||
|
||||
int Maxflow(int s, int t) {
|
||||
this -> s = s; this -> t = t;
|
||||
int flow = 0;
|
||||
while (BFS()) {
|
||||
for (int i = 0; i < n; i++)
|
||||
cur[i] = head[i];
|
||||
flow += DFS(s, INF);
|
||||
}
|
||||
return flow;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
||||
static Dinic *G;
|
||||
static int ans;
|
||||
|
||||
void bench_dinic_prepare() {
|
||||
N = setting->size;
|
||||
bench_srand(1);
|
||||
int s = 2 * N, t = 2 * N + 1;
|
||||
G = (Dinic*)bench_alloc(sizeof(Dinic));
|
||||
G->init(2 * N + 2);
|
||||
for (int i = 0; i < N; i ++)
|
||||
for (int j = 0; j < N; j ++) {
|
||||
G->AddEdge(i, N + j, bench_rand() % 10);
|
||||
}
|
||||
|
||||
for (int i = 0; i < N; i ++) {
|
||||
G->AddEdge(s, i, bench_rand() % 1000);
|
||||
G->AddEdge(N + i, t, bench_rand() % 1000);
|
||||
}
|
||||
}
|
||||
|
||||
void bench_dinic_run() {
|
||||
ans = G->Maxflow(2 * N, 2 * N + 1);
|
||||
}
|
||||
|
||||
int bench_dinic_validate() {
|
||||
return (uint32_t)ans == setting->checksum;
|
||||
}
|
||||
}
|
||||
|
||||
|
64
micro-bench/src/fib/fib.c
Normal file
64
micro-bench/src/fib/fib.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
// f(n) = (f(n-1) + f(n-2) + .. f(n-m)) mod 2^32
|
||||
|
||||
#define N 2147483603
|
||||
static int M;
|
||||
|
||||
static void put(uint32_t *m, int i, int j, uint32_t data) {
|
||||
m[i * M + j] = data;
|
||||
}
|
||||
|
||||
static uint32_t get(uint32_t *m, int i, int j) {
|
||||
return m[i * M + j];
|
||||
}
|
||||
|
||||
static inline void mult(uint32_t *c, uint32_t *a, uint32_t *b) {
|
||||
for (int i = 0; i < M; i ++)
|
||||
for (int j = 0; j < M; j ++) {
|
||||
put(c, i, j, 0);
|
||||
for (int k = 0; k < M; k ++) {
|
||||
put(c, i, j, get(c, i, j) + get(a, i, k) * get(b, k, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void assign(uint32_t *a, uint32_t *b) {
|
||||
for (int i = 0; i < M; i ++)
|
||||
for (int j = 0; j < M; j ++)
|
||||
put(a, i, j, get(b, i, j));
|
||||
}
|
||||
|
||||
static uint32_t *A, *ans, *T, *tmp;
|
||||
|
||||
void bench_fib_prepare() {
|
||||
M = setting->size;
|
||||
int sz = sizeof(uint32_t) * M * M;
|
||||
A = bench_alloc(sz);
|
||||
T = bench_alloc(sz);
|
||||
ans = bench_alloc(sz);
|
||||
tmp = bench_alloc(sz);
|
||||
}
|
||||
|
||||
void bench_fib_run() {
|
||||
for (int i = 0; i < M; i ++)
|
||||
for (int j = 0; j < M; j ++) {
|
||||
uint32_t x = (i == M - 1 || j == i + 1);
|
||||
put(A, i, j, x);
|
||||
put(T, i, j, x);
|
||||
put(ans, i, j, i == j);
|
||||
}
|
||||
|
||||
for (int n = N; n > 0; n >>= 1) {
|
||||
if (n & 1) {
|
||||
mult(tmp, ans, T);
|
||||
assign(ans, tmp);
|
||||
}
|
||||
mult(tmp, T, T);
|
||||
assign(T, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
int bench_fib_validate() {
|
||||
return get(ans, M-1, M-1) == setting->checksum;
|
||||
}
|
29
micro-bench/src/lzip/lzip.c
Normal file
29
micro-bench/src/lzip/lzip.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
#include "quicklz.h"
|
||||
#include <benchmark.h>
|
||||
|
||||
static int SIZE;
|
||||
|
||||
static qlz_state_compress *state;
|
||||
static char *blk;
|
||||
static char *compress;
|
||||
static int len;
|
||||
|
||||
void bench_lzip_prepare() {
|
||||
SIZE = setting->size;
|
||||
bench_srand(1);
|
||||
state = bench_alloc(sizeof(qlz_state_compress));
|
||||
blk = bench_alloc(SIZE);
|
||||
compress = bench_alloc(SIZE + 400);
|
||||
for (int i = 0; i < SIZE; i ++) {
|
||||
blk[i] = 'a' + bench_rand() % 26;
|
||||
}
|
||||
}
|
||||
|
||||
void bench_lzip_run() {
|
||||
len = qlz_compress(blk, compress, SIZE, state);
|
||||
}
|
||||
|
||||
int bench_lzip_validate() {
|
||||
return checksum(compress, compress + len) == setting->checksum;
|
||||
}
|
||||
|
761
micro-bench/src/lzip/quicklz.c
Normal file
761
micro-bench/src/lzip/quicklz.c
Normal file
|
@ -0,0 +1,761 @@
|
|||
// Fast data compression library
|
||||
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
|
||||
// lar@quicklz.com
|
||||
//
|
||||
// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
|
||||
// released into public must be open source) or under a commercial license if such
|
||||
// has been acquired (see http://www.quicklz.com/order.html). The commercial license
|
||||
// does not cover derived or ported versions created by third parties under GPL.
|
||||
|
||||
// 1.5.0 final
|
||||
|
||||
#include "quicklz.h"
|
||||
|
||||
#if QLZ_VERSION_MAJOR != 1 || QLZ_VERSION_MINOR != 5 || QLZ_VERSION_REVISION != 0
|
||||
#error quicklz.c and quicklz.h have different versions
|
||||
#endif
|
||||
|
||||
#define MINOFFSET 2
|
||||
#define UNCONDITIONAL_MATCHLEN 6
|
||||
#define UNCOMPRESSED_END 4
|
||||
#define CWORD_LEN 4
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 1 && defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
|
||||
#define OFFSET_BASE source
|
||||
#define CAST (ui32)(size_t)
|
||||
#else
|
||||
#define OFFSET_BASE 0
|
||||
#define CAST
|
||||
#endif
|
||||
|
||||
int qlz_get_setting(int setting)
|
||||
{
|
||||
switch (setting)
|
||||
{
|
||||
case 0: return QLZ_COMPRESSION_LEVEL;
|
||||
case 1: return sizeof(qlz_state_compress);
|
||||
case 2: return sizeof(qlz_state_decompress);
|
||||
case 3: return QLZ_STREAMING_BUFFER;
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
case 6: return 1;
|
||||
#else
|
||||
case 6: return 0;
|
||||
#endif
|
||||
case 7: return QLZ_VERSION_MAJOR;
|
||||
case 8: return QLZ_VERSION_MINOR;
|
||||
case 9: return QLZ_VERSION_REVISION;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
static int same(const unsigned char *src, size_t n)
|
||||
{
|
||||
while(n > 0 && *(src + n) == *src)
|
||||
n--;
|
||||
return n == 0 ? 1 : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void reset_table_compress(qlz_state_compress *state)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < QLZ_HASH_VALUES; i++)
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
state->hash[i].offset = 0;
|
||||
#else
|
||||
state->hash_counter[i] = 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void reset_table_decompress(qlz_state_decompress *state)
|
||||
{
|
||||
int i;
|
||||
(void)state;
|
||||
(void)i;
|
||||
#if QLZ_COMPRESSION_LEVEL == 2
|
||||
for(i = 0; i < QLZ_HASH_VALUES; i++)
|
||||
{
|
||||
state->hash_counter[i] = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static __inline ui32 hash_func(ui32 i)
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 2
|
||||
return ((i >> 9) ^ (i >> 13) ^ i) & (QLZ_HASH_VALUES - 1);
|
||||
#else
|
||||
return ((i >> 12) ^ i) & (QLZ_HASH_VALUES - 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __inline ui32 fast_read(void const *src, ui32 bytes)
|
||||
{
|
||||
uint32_t ret = 0;
|
||||
if (bytes >= 1 && bytes <= 4) {
|
||||
for (uint32_t i = 0; i < bytes; i ++) {
|
||||
ret |= ((uint8_t*)src)[i] << (i * 8);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __inline ui32 hashat(const unsigned char *src)
|
||||
{
|
||||
ui32 fetch, hash;
|
||||
fetch = fast_read(src, 3);
|
||||
hash = hash_func(fetch);
|
||||
return hash;
|
||||
}
|
||||
|
||||
static __inline void fast_write(ui32 f, void *dst, size_t bytes)
|
||||
{
|
||||
for (size_t i = 0; i != bytes; i ++) {
|
||||
((char*)dst)[i] = ((char*)&f)[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
size_t qlz_size_decompressed(const char *source)
|
||||
{
|
||||
ui32 n, r;
|
||||
n = (((*source) & 2) == 2) ? 4 : 1;
|
||||
r = fast_read(source + 1 + n, n);
|
||||
r = r & (0xffffffff >> ((4 - n)*8));
|
||||
return r;
|
||||
}
|
||||
|
||||
size_t qlz_size_compressed(const char *source)
|
||||
{
|
||||
ui32 n, r;
|
||||
n = (((*source) & 2) == 2) ? 4 : 1;
|
||||
r = fast_read(source + 1, n);
|
||||
r = r & (0xffffffff >> ((4 - n)*8));
|
||||
return r;
|
||||
}
|
||||
|
||||
size_t qlz_size_header(const char *source)
|
||||
{
|
||||
size_t n = 2*((((*source) & 2) == 2) ? 4 : 1) + 1;
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui32 n)
|
||||
{
|
||||
assert(0); // unaligned memory access
|
||||
}
|
||||
|
||||
static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s)
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
ui32 hash;
|
||||
hash = hashat(s);
|
||||
state->hash[hash].offset = s;
|
||||
state->hash_counter[hash] = 1;
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
ui32 hash;
|
||||
unsigned char c;
|
||||
hash = hashat(s);
|
||||
c = state->hash_counter[hash];
|
||||
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = s;
|
||||
c++;
|
||||
state->hash_counter[hash] = c;
|
||||
#endif
|
||||
(void)state;
|
||||
(void)s;
|
||||
}
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
static void update_hash_upto(qlz_state_decompress *state, unsigned char **lh, const unsigned char *max)
|
||||
{
|
||||
while(*lh < max)
|
||||
{
|
||||
(*lh)++;
|
||||
update_hash(state, *lh);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static size_t qlz_compress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_compress *state)
|
||||
{
|
||||
const unsigned char *last_byte = source + size - 1;
|
||||
const unsigned char *src = source;
|
||||
unsigned char *cword_ptr = destination;
|
||||
unsigned char *dst = destination + CWORD_LEN;
|
||||
ui32 cword_val = 1U << 31;
|
||||
const unsigned char *last_matchstart = last_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
|
||||
ui32 fetch = 0;
|
||||
unsigned int lits = 0;
|
||||
|
||||
(void) lits;
|
||||
|
||||
if(src <= last_matchstart)
|
||||
fetch = fast_read(src, 3);
|
||||
|
||||
while(src <= last_matchstart)
|
||||
{
|
||||
if ((cword_val & 1) == 1)
|
||||
{
|
||||
// store uncompressed if compression ratio is too low
|
||||
if (src > source + (size >> 1) && dst - destination > src - source - ((src - source) >> 5))
|
||||
return 0;
|
||||
|
||||
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
|
||||
|
||||
cword_ptr = dst;
|
||||
dst += CWORD_LEN;
|
||||
cword_val = 1U << 31;
|
||||
fetch = fast_read(src, 3);
|
||||
}
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
{
|
||||
const unsigned char *o;
|
||||
ui32 hash, cached;
|
||||
|
||||
hash = hash_func(fetch);
|
||||
cached = fetch ^ state->hash[hash].cache;
|
||||
state->hash[hash].cache = fetch;
|
||||
|
||||
o = state->hash[hash].offset + OFFSET_BASE;
|
||||
state->hash[hash].offset = CAST(src - OFFSET_BASE);
|
||||
|
||||
if (cached == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6))))
|
||||
{
|
||||
if (*(o + 3) != *(src + 3))
|
||||
{
|
||||
hash <<= 4;
|
||||
cword_val = (cword_val >> 1) | (1U << 31);
|
||||
fast_write((3 - 2) | hash, dst, 2);
|
||||
src += 3;
|
||||
dst += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
const unsigned char *old_src = src;
|
||||
size_t matchlen;
|
||||
hash <<= 4;
|
||||
|
||||
cword_val = (cword_val >> 1) | (1U << 31);
|
||||
src += 4;
|
||||
|
||||
if(*(o + (src - old_src)) == *src)
|
||||
{
|
||||
src++;
|
||||
if(*(o + (src - old_src)) == *src)
|
||||
{
|
||||
size_t q = last_byte - UNCOMPRESSED_END - (src - 5) + 1;
|
||||
size_t remaining = q > 255 ? 255 : q;
|
||||
src++;
|
||||
while(*(o + (src - old_src)) == *src && (size_t)(src - old_src) < remaining)
|
||||
src++;
|
||||
}
|
||||
}
|
||||
|
||||
matchlen = src - old_src;
|
||||
if (matchlen < 18)
|
||||
{
|
||||
fast_write((ui32)(matchlen - 2) | hash, dst, 2);
|
||||
dst += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
fast_write((ui32)(matchlen << 16) | hash, dst, 3);
|
||||
dst += 3;
|
||||
}
|
||||
}
|
||||
fetch = fast_read(src, 3);
|
||||
lits = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
lits++;
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
cword_val = (cword_val >> 1);
|
||||
fetch = (fetch >> 8 & 0xffff) | (*(src + 2) << 16);
|
||||
}
|
||||
}
|
||||
#elif QLZ_COMPRESSION_LEVEL >= 2
|
||||
{
|
||||
const unsigned char *o, *offset2;
|
||||
ui32 hash, matchlen, k, m, best_k = 0;
|
||||
unsigned char c;
|
||||
size_t remaining = (last_byte - UNCOMPRESSED_END - src + 1) > 255 ? 255 : (last_byte - UNCOMPRESSED_END - src + 1);
|
||||
(void)best_k;
|
||||
|
||||
|
||||
//hash = hashat(src);
|
||||
fetch = fast_read(src, 3);
|
||||
hash = hash_func(fetch);
|
||||
|
||||
c = state->hash_counter[hash];
|
||||
|
||||
offset2 = state->hash[hash].offset[0];
|
||||
if(offset2 < src - MINOFFSET && c > 0 && ((fast_read(offset2, 3) ^ fetch) & 0xffffff) == 0)
|
||||
{
|
||||
matchlen = 3;
|
||||
if(*(offset2 + matchlen) == *(src + matchlen))
|
||||
{
|
||||
matchlen = 4;
|
||||
while(*(offset2 + matchlen) == *(src + matchlen) && matchlen < remaining)
|
||||
matchlen++;
|
||||
}
|
||||
}
|
||||
else
|
||||
matchlen = 0;
|
||||
for(k = 1; k < QLZ_POINTERS && c > k; k++)
|
||||
{
|
||||
o = state->hash[hash].offset[k];
|
||||
#if QLZ_COMPRESSION_LEVEL == 3
|
||||
if(((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
if(*(src + matchlen) == *(o + matchlen) && ((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
|
||||
#endif
|
||||
{
|
||||
m = 3;
|
||||
while(*(o + m) == *(src + m) && m < remaining)
|
||||
m++;
|
||||
#if QLZ_COMPRESSION_LEVEL == 3
|
||||
if ((m > matchlen) || (m == matchlen && o > offset2))
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
if (m > matchlen)
|
||||
#endif
|
||||
{
|
||||
offset2 = o;
|
||||
matchlen = m;
|
||||
best_k = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
o = offset2;
|
||||
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
|
||||
c++;
|
||||
state->hash_counter[hash] = c;
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 3
|
||||
if(matchlen > 2 && src - o < 131071)
|
||||
{
|
||||
ui32 u;
|
||||
size_t offset = src - o;
|
||||
|
||||
for(u = 1; u < matchlen; u++)
|
||||
{
|
||||
hash = hashat(src + u);
|
||||
c = state->hash_counter[hash]++;
|
||||
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src + u;
|
||||
}
|
||||
|
||||
cword_val = (cword_val >> 1) | (1U << 31);
|
||||
src += matchlen;
|
||||
|
||||
if(matchlen == 3 && offset <= 63)
|
||||
{
|
||||
*dst = (unsigned char)(offset << 2);
|
||||
dst++;
|
||||
}
|
||||
else if (matchlen == 3 && offset <= 16383)
|
||||
{
|
||||
ui32 f = (ui32)((offset << 2) | 1);
|
||||
fast_write(f, dst, 2);
|
||||
dst += 2;
|
||||
}
|
||||
else if (matchlen <= 18 && offset <= 1023)
|
||||
{
|
||||
ui32 f = ((matchlen - 3) << 2) | ((ui32)offset << 6) | 2;
|
||||
fast_write(f, dst, 2);
|
||||
dst += 2;
|
||||
}
|
||||
|
||||
else if(matchlen <= 33)
|
||||
{
|
||||
ui32 f = ((matchlen - 2) << 2) | ((ui32)offset << 7) | 3;
|
||||
fast_write(f, dst, 3);
|
||||
dst += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
ui32 f = ((matchlen - 3) << 7) | ((ui32)offset << 15) | 3;
|
||||
fast_write(f, dst, 4);
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
cword_val = (cword_val >> 1);
|
||||
}
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
|
||||
if(matchlen > 2)
|
||||
{
|
||||
cword_val = (cword_val >> 1) | (1U << 31);
|
||||
src += matchlen;
|
||||
|
||||
if (matchlen < 10)
|
||||
{
|
||||
ui32 f = best_k | ((matchlen - 2) << 2) | (hash << 5);
|
||||
fast_write(f, dst, 2);
|
||||
dst += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
ui32 f = best_k | (matchlen << 16) | (hash << 5);
|
||||
fast_write(f, dst, 3);
|
||||
dst += 3;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
cword_val = (cword_val >> 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
while (src <= last_byte)
|
||||
{
|
||||
if ((cword_val & 1) == 1)
|
||||
{
|
||||
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
|
||||
cword_ptr = dst;
|
||||
dst += CWORD_LEN;
|
||||
cword_val = 1U << 31;
|
||||
}
|
||||
#if QLZ_COMPRESSION_LEVEL < 3
|
||||
if (src <= last_byte - 3)
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
ui32 hash, fetch;
|
||||
fetch = fast_read(src, 3);
|
||||
hash = hash_func(fetch);
|
||||
state->hash[hash].offset = CAST(src - OFFSET_BASE);
|
||||
state->hash[hash].cache = fetch;
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
ui32 hash;
|
||||
unsigned char c;
|
||||
hash = hashat(src);
|
||||
c = state->hash_counter[hash];
|
||||
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
|
||||
c++;
|
||||
state->hash_counter[hash] = c;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
cword_val = (cword_val >> 1);
|
||||
}
|
||||
|
||||
while((cword_val & 1) != 1)
|
||||
cword_val = (cword_val >> 1);
|
||||
|
||||
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
|
||||
|
||||
// min. size must be 9 bytes so that the qlz_size functions can take 9 bytes as argument
|
||||
return dst - destination < 9 ? 9 : dst - destination;
|
||||
}
|
||||
|
||||
static size_t qlz_decompress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_decompress *state, const unsigned char *history)
|
||||
{
|
||||
const unsigned char *src = source + qlz_size_header((const char *)source);
|
||||
unsigned char *dst = destination;
|
||||
const unsigned char *last_destination_byte = destination + size - 1;
|
||||
ui32 cword_val = 1;
|
||||
const unsigned char *last_matchstart = last_destination_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
|
||||
unsigned char *last_hashed = destination - 1;
|
||||
const unsigned char *last_source_byte = source + qlz_size_compressed((const char *)source) - 1;
|
||||
static const ui32 bitlut[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
|
||||
|
||||
(void) last_source_byte;
|
||||
(void) last_hashed;
|
||||
(void) state;
|
||||
(void) history;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
ui32 fetch;
|
||||
|
||||
if (cword_val == 1)
|
||||
{
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
if(src + CWORD_LEN - 1 > last_source_byte)
|
||||
return 0;
|
||||
#endif
|
||||
cword_val = fast_read(src, CWORD_LEN);
|
||||
src += CWORD_LEN;
|
||||
}
|
||||
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
if(src + 4 - 1 > last_source_byte)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
fetch = fast_read(src, 4);
|
||||
|
||||
if ((cword_val & 1) == 1)
|
||||
{
|
||||
ui32 matchlen;
|
||||
const unsigned char *offset2;
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
ui32 hash;
|
||||
cword_val = cword_val >> 1;
|
||||
hash = (fetch >> 4) & 0xfff;
|
||||
offset2 = (const unsigned char *)(size_t)state->hash[hash].offset;
|
||||
|
||||
if((fetch & 0xf) != 0)
|
||||
{
|
||||
matchlen = (fetch & 0xf) + 2;
|
||||
src += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
matchlen = *(src + 2);
|
||||
src += 3;
|
||||
}
|
||||
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
ui32 hash;
|
||||
unsigned char c;
|
||||
cword_val = cword_val >> 1;
|
||||
hash = (fetch >> 5) & 0x7ff;
|
||||
c = (unsigned char)(fetch & 0x3);
|
||||
offset2 = state->hash[hash].offset[c];
|
||||
|
||||
if((fetch & (28)) != 0)
|
||||
{
|
||||
matchlen = ((fetch >> 2) & 0x7) + 2;
|
||||
src += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
matchlen = *(src + 2);
|
||||
src += 3;
|
||||
}
|
||||
|
||||
#elif QLZ_COMPRESSION_LEVEL == 3
|
||||
ui32 offset;
|
||||
cword_val = cword_val >> 1;
|
||||
if ((fetch & 3) == 0)
|
||||
{
|
||||
offset = (fetch & 0xff) >> 2;
|
||||
matchlen = 3;
|
||||
src++;
|
||||
}
|
||||
else if ((fetch & 2) == 0)
|
||||
{
|
||||
offset = (fetch & 0xffff) >> 2;
|
||||
matchlen = 3;
|
||||
src += 2;
|
||||
}
|
||||
else if ((fetch & 1) == 0)
|
||||
{
|
||||
offset = (fetch & 0xffff) >> 6;
|
||||
matchlen = ((fetch >> 2) & 15) + 3;
|
||||
src += 2;
|
||||
}
|
||||
else if ((fetch & 127) != 3)
|
||||
{
|
||||
offset = (fetch >> 7) & 0x1ffff;
|
||||
matchlen = ((fetch >> 2) & 0x1f) + 2;
|
||||
src += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
offset = (fetch >> 15);
|
||||
matchlen = ((fetch >> 7) & 255) + 3;
|
||||
src += 4;
|
||||
}
|
||||
|
||||
offset2 = dst - offset;
|
||||
#endif
|
||||
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
if(offset2 < history || offset2 > dst - MINOFFSET - 1)
|
||||
return 0;
|
||||
|
||||
if(matchlen > (ui32)(last_destination_byte - dst - UNCOMPRESSED_END + 1))
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
memcpy_up(dst, offset2, matchlen);
|
||||
dst += matchlen;
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
update_hash_upto(state, &last_hashed, dst - matchlen);
|
||||
last_hashed = dst - 1;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dst < last_matchstart)
|
||||
{
|
||||
unsigned int n = bitlut[cword_val & 0xf];
|
||||
memcpy_up(dst, src, 4);
|
||||
cword_val = cword_val >> n;
|
||||
dst += n;
|
||||
src += n;
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
update_hash_upto(state, &last_hashed, dst - 3);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
while(dst <= last_destination_byte)
|
||||
{
|
||||
if (cword_val == 1)
|
||||
{
|
||||
src += CWORD_LEN;
|
||||
cword_val = 1U << 31;
|
||||
}
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
if(src >= last_source_byte + 1)
|
||||
return 0;
|
||||
#endif
|
||||
*dst = *src;
|
||||
dst++;
|
||||
src++;
|
||||
cword_val = cword_val >> 1;
|
||||
}
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
update_hash_upto(state, &last_hashed, last_destination_byte - 3); // todo, use constant
|
||||
#endif
|
||||
return size;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state)
|
||||
{
|
||||
size_t r;
|
||||
ui32 compressed;
|
||||
size_t base;
|
||||
|
||||
if(size == 0 || size > 0xffffffff - 400)
|
||||
return 0;
|
||||
|
||||
if(size < 216)
|
||||
base = 3;
|
||||
else
|
||||
base = 9;
|
||||
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
if (state->stream_counter + size - 1 >= QLZ_STREAMING_BUFFER)
|
||||
#endif
|
||||
{
|
||||
reset_table_compress(state);
|
||||
r = base + qlz_compress_core((const unsigned char *)source, (unsigned char*)destination + base, size, state);
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
reset_table_compress(state);
|
||||
#endif
|
||||
if(r == base)
|
||||
{
|
||||
bench_memcpy(destination + base, source, size);
|
||||
r = size + base;
|
||||
compressed = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
compressed = 1;
|
||||
}
|
||||
state->stream_counter = 0;
|
||||
}
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
else
|
||||
{
|
||||
unsigned char *src = state->stream_buffer + state->stream_counter;
|
||||
|
||||
bench_memcpy(src, source, size);
|
||||
r = base + qlz_compress_core(src, (unsigned char*)destination + base, size, state);
|
||||
|
||||
if(r == base)
|
||||
{
|
||||
bench_memcpy(destination + base, src, size);
|
||||
r = size + base;
|
||||
compressed = 0;
|
||||
reset_table_compress(state);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressed = 1;
|
||||
}
|
||||
state->stream_counter += size;
|
||||
}
|
||||
#endif
|
||||
if(base == 3)
|
||||
{
|
||||
*destination = (unsigned char)(0 | compressed);
|
||||
*(destination + 1) = (unsigned char)r;
|
||||
*(destination + 2) = (unsigned char)size;
|
||||
}
|
||||
else
|
||||
{
|
||||
*destination = (unsigned char)(2 | compressed);
|
||||
fast_write((ui32)r, destination + 1, 4);
|
||||
fast_write((ui32)size, destination + 5, 4);
|
||||
}
|
||||
|
||||
*destination |= (QLZ_COMPRESSION_LEVEL << 2);
|
||||
*destination |= (1 << 6);
|
||||
*destination |= ((QLZ_STREAMING_BUFFER == 0 ? 0 : (QLZ_STREAMING_BUFFER == 100000 ? 1 : (QLZ_STREAMING_BUFFER == 1000000 ? 2 : 3))) << 4);
|
||||
|
||||
// 76543210
|
||||
// 01SSLLHC
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state)
|
||||
{
|
||||
size_t dsiz = qlz_size_decompressed(source);
|
||||
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
if (state->stream_counter + qlz_size_decompressed(source) - 1 >= QLZ_STREAMING_BUFFER)
|
||||
#endif
|
||||
{
|
||||
if((*source & 1) == 1)
|
||||
{
|
||||
reset_table_decompress(state);
|
||||
dsiz = qlz_decompress_core((const unsigned char *)source, (unsigned char *)destination, dsiz, state, (const unsigned char *)destination);
|
||||
}
|
||||
else
|
||||
{
|
||||
bench_memcpy(destination, source + qlz_size_header(source), dsiz);
|
||||
}
|
||||
state->stream_counter = 0;
|
||||
reset_table_decompress(state);
|
||||
}
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
else
|
||||
{
|
||||
unsigned char *dst = state->stream_buffer + state->stream_counter;
|
||||
if((*source & 1) == 1)
|
||||
{
|
||||
dsiz = qlz_decompress_core((const unsigned char *)source, dst, dsiz, state, (const unsigned char *)state->stream_buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
bench_memcpy(dst, source + qlz_size_header(source), dsiz);
|
||||
reset_table_decompress(state);
|
||||
}
|
||||
bench_memcpy(destination, dst, dsiz);
|
||||
state->stream_counter += dsiz;
|
||||
}
|
||||
#endif
|
||||
return dsiz;
|
||||
}
|
||||
|
164
micro-bench/src/lzip/quicklz.h
Normal file
164
micro-bench/src/lzip/quicklz.h
Normal file
|
@ -0,0 +1,164 @@
|
|||
#ifndef QLZ_HEADER
|
||||
#define QLZ_HEADER
|
||||
|
||||
#include <am.h>
|
||||
#include <klib.h>
|
||||
|
||||
static inline void* bench_memcpy(void* dst, const void* src, size_t n){
|
||||
assert(dst&&src);
|
||||
const char* s;
|
||||
char* d;
|
||||
if(src+n>dst&&src<dst){
|
||||
s=src+n;
|
||||
d=dst+n;
|
||||
while(n-->0)*--d=*--s;
|
||||
}
|
||||
else{
|
||||
s=src;
|
||||
d=dst;
|
||||
while(n-->0)*d++=*s++;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
// Fast data compression library
|
||||
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
|
||||
// lar@quicklz.com
|
||||
//
|
||||
// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
|
||||
// released into public must be open source) or under a commercial license if such
|
||||
// has been acquired (see http://www.quicklz.com/order.html). The commercial license
|
||||
// does not cover derived or ported versions created by third parties under GPL.
|
||||
|
||||
// You can edit following user settings. Data must be decompressed with the same
|
||||
// setting of QLZ_COMPRESSION_LEVEL and QLZ_STREAMING_BUFFER as it was compressed
|
||||
// (see manual). If QLZ_STREAMING_BUFFER > 0, scratch buffers must be initially
|
||||
// zeroed out (see manual). First #ifndef makes it possible to define settings from
|
||||
// the outside like the compiler command line.
|
||||
|
||||
// 1.5.0 final
|
||||
|
||||
#ifndef QLZ_COMPRESSION_LEVEL
|
||||
|
||||
// 1 gives fastest compression speed. 3 gives fastest decompression speed and best
|
||||
// compression ratio.
|
||||
//#define QLZ_COMPRESSION_LEVEL 1
|
||||
//#define QLZ_COMPRESSION_LEVEL 2
|
||||
//#define QLZ_COMPRESSION_LEVEL 3
|
||||
#define QLZ_COMPRESSION_LEVEL 2
|
||||
|
||||
// If > 0, zero out both states prior to first call to qlz_compress() or qlz_decompress()
|
||||
// and decompress packets in the same order as they were compressed
|
||||
#define QLZ_STREAMING_BUFFER 0
|
||||
//#define QLZ_STREAMING_BUFFER 100000
|
||||
//#define QLZ_STREAMING_BUFFER 1000000
|
||||
|
||||
// Guarantees that decompression of corrupted data cannot crash. Decreases decompression
|
||||
// speed 10-20%. Compression speed not affected.
|
||||
//#define QLZ_MEMORY_SAFE
|
||||
#endif
|
||||
|
||||
#define QLZ_VERSION_MAJOR 1
|
||||
#define QLZ_VERSION_MINOR 5
|
||||
#define QLZ_VERSION_REVISION 0
|
||||
|
||||
// Verify compression level
|
||||
#if QLZ_COMPRESSION_LEVEL != 1 && QLZ_COMPRESSION_LEVEL != 2 && QLZ_COMPRESSION_LEVEL != 3
|
||||
#error QLZ_COMPRESSION_LEVEL must be 1, 2 or 3
|
||||
#endif
|
||||
|
||||
typedef unsigned int ui32;
|
||||
typedef unsigned short int ui16;
|
||||
|
||||
// Decrease QLZ_POINTERS for level 3 to increase compression speed. Do not touch any other values!
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
#define QLZ_POINTERS 1
|
||||
#define QLZ_HASH_VALUES 4096
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
#define QLZ_POINTERS 4
|
||||
#define QLZ_HASH_VALUES 2048
|
||||
#elif QLZ_COMPRESSION_LEVEL == 3
|
||||
#define QLZ_POINTERS 16
|
||||
#define QLZ_HASH_VALUES 4096
|
||||
#endif
|
||||
|
||||
// hash entry
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
ui32 cache;
|
||||
#if defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
|
||||
unsigned int offset;
|
||||
#else
|
||||
const unsigned char *offset;
|
||||
#endif
|
||||
#else
|
||||
const unsigned char *offset[QLZ_POINTERS];
|
||||
#endif
|
||||
|
||||
} qlz_hash_compress;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
const unsigned char *offset;
|
||||
#else
|
||||
const unsigned char *offset[QLZ_POINTERS];
|
||||
#endif
|
||||
} qlz_hash_decompress;
|
||||
|
||||
|
||||
// states
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||
#endif
|
||||
size_t stream_counter;
|
||||
qlz_hash_compress hash[QLZ_HASH_VALUES];
|
||||
unsigned char hash_counter[QLZ_HASH_VALUES];
|
||||
} qlz_state_compress;
|
||||
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 1 || QLZ_COMPRESSION_LEVEL == 2
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||
#endif
|
||||
qlz_hash_decompress hash[QLZ_HASH_VALUES];
|
||||
unsigned char hash_counter[QLZ_HASH_VALUES];
|
||||
size_t stream_counter;
|
||||
} qlz_state_decompress;
|
||||
#elif QLZ_COMPRESSION_LEVEL == 3
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||
#endif
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
qlz_hash_decompress hash[QLZ_HASH_VALUES];
|
||||
#endif
|
||||
size_t stream_counter;
|
||||
} qlz_state_decompress;
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Public functions of QuickLZ
|
||||
size_t qlz_size_decompressed(const char *source);
|
||||
size_t qlz_size_compressed(const char *source);
|
||||
size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state);
|
||||
size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state);
|
||||
int qlz_get_setting(int setting);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
159
micro-bench/src/md5/md5.c
Normal file
159
micro-bench/src/md5/md5.c
Normal file
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Simple MD5 implementation (github.com/pod32g/md5)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <benchmark.h>
|
||||
|
||||
static int N;
|
||||
|
||||
// Constants are the integer part of the sines of integers (in radians) * 2^32.
|
||||
const uint32_t k[64] = {
|
||||
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee ,
|
||||
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 ,
|
||||
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be ,
|
||||
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 ,
|
||||
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa ,
|
||||
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 ,
|
||||
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed ,
|
||||
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a ,
|
||||
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c ,
|
||||
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 ,
|
||||
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 ,
|
||||
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 ,
|
||||
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 ,
|
||||
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 ,
|
||||
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 ,
|
||||
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
|
||||
|
||||
// r specifies the per-round shift amounts
|
||||
static const uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
|
||||
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
|
||||
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
|
||||
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
|
||||
|
||||
// leftrotate function definition
|
||||
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
|
||||
|
||||
static void to_bytes(uint32_t val, uint8_t *bytes)
|
||||
{
|
||||
bytes[0] = (uint8_t) val;
|
||||
bytes[1] = (uint8_t) (val >> 8);
|
||||
bytes[2] = (uint8_t) (val >> 16);
|
||||
bytes[3] = (uint8_t) (val >> 24);
|
||||
}
|
||||
|
||||
static uint32_t to_int32(const uint8_t *bytes)
|
||||
{
|
||||
return (uint32_t) bytes[0]
|
||||
| ((uint32_t) bytes[1] << 8)
|
||||
| ((uint32_t) bytes[2] << 16)
|
||||
| ((uint32_t) bytes[3] << 24);
|
||||
}
|
||||
|
||||
static void md5(uint8_t *msg, size_t initial_len, uint8_t *digest) {
|
||||
|
||||
// These vars will contain the hash
|
||||
uint32_t h0, h1, h2, h3;
|
||||
|
||||
size_t new_len, offset;
|
||||
uint32_t w[16];
|
||||
uint32_t a, b, c, d, i, f, g, temp;
|
||||
|
||||
// Initialize variables - simple count in nibbles:
|
||||
h0 = 0x67452301;
|
||||
h1 = 0xefcdab89;
|
||||
h2 = 0x98badcfe;
|
||||
h3 = 0x10325476;
|
||||
|
||||
//Pre-processing:
|
||||
//append "1" bit to message
|
||||
//append "0" bits until message length in bits ≡ 448 (mod 512)
|
||||
//append length mod (2^64) to message
|
||||
|
||||
for (new_len = initial_len + 1; new_len % (512/8) != 448/8; new_len++)
|
||||
;
|
||||
|
||||
msg[initial_len] = 0x80; // append the "1" bit; most significant bit is "first"
|
||||
for (offset = initial_len + 1; offset < new_len; offset++)
|
||||
msg[offset] = 0; // append "0" bits
|
||||
|
||||
// append the len in bits at the end of the buffer.
|
||||
to_bytes(initial_len*8, msg + new_len);
|
||||
// initial_len>>29 == initial_len*8>>32, but avoids overflow.
|
||||
to_bytes(initial_len>>29, msg + new_len + 4);
|
||||
|
||||
// Process the message in successive 512-bit chunks:
|
||||
//for each 512-bit chunk of message:
|
||||
for(offset=0; offset<new_len; offset += (512/8)) {
|
||||
|
||||
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
|
||||
for (i = 0; i < 16; i++)
|
||||
w[i] = to_int32(msg + offset + i*4);
|
||||
|
||||
// Initialize hash value for this chunk:
|
||||
a = h0;
|
||||
b = h1;
|
||||
c = h2;
|
||||
d = h3;
|
||||
|
||||
// Main loop:
|
||||
for(i = 0; i<64; i++) {
|
||||
|
||||
if (i < 16) {
|
||||
f = (b & c) | ((~b) & d);
|
||||
g = i;
|
||||
} else if (i < 32) {
|
||||
f = (d & b) | ((~d) & c);
|
||||
g = (5*i + 1) % 16;
|
||||
} else if (i < 48) {
|
||||
f = b ^ c ^ d;
|
||||
g = (3*i + 5) % 16;
|
||||
} else {
|
||||
f = c ^ (b | (~d));
|
||||
g = (7*i) % 16;
|
||||
}
|
||||
|
||||
temp = d;
|
||||
d = c;
|
||||
c = b;
|
||||
b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
|
||||
a = temp;
|
||||
|
||||
}
|
||||
|
||||
// Add this chunk's hash to result so far:
|
||||
h0 += a;
|
||||
h1 += b;
|
||||
h2 += c;
|
||||
h3 += d;
|
||||
|
||||
}
|
||||
|
||||
//var char digest[16] := h0 append h1 append h2 append h3 //(Output is in little-endian)
|
||||
to_bytes(h0, digest);
|
||||
to_bytes(h1, digest + 4);
|
||||
to_bytes(h2, digest + 8);
|
||||
to_bytes(h3, digest + 12);
|
||||
}
|
||||
|
||||
static uint8_t *str;
|
||||
static uint8_t *digest;
|
||||
|
||||
void bench_md5_prepare() {
|
||||
N = setting->size;
|
||||
bench_srand(1);
|
||||
str = bench_alloc(N);
|
||||
for (int i = 0; i < N; i ++) {
|
||||
str[i] = bench_rand();
|
||||
}
|
||||
digest = bench_alloc(16);
|
||||
}
|
||||
|
||||
void bench_md5_run() {
|
||||
md5(str, N, digest);
|
||||
}
|
||||
|
||||
int bench_md5_validate() {
|
||||
return checksum(digest, digest + 16) == setting->checksum;
|
||||
}
|
44
micro-bench/src/qsort/qsort.c
Normal file
44
micro-bench/src/qsort/qsort.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
static int N, *data;
|
||||
|
||||
void bench_qsort_prepare() {
|
||||
bench_srand(1);
|
||||
|
||||
N = setting->size;
|
||||
|
||||
data = bench_alloc(N * sizeof(int));
|
||||
for (int i = 0; i < N; i ++) {
|
||||
int a = bench_rand();
|
||||
int b = bench_rand();
|
||||
data[i] = (a << 16) | b;
|
||||
}
|
||||
}
|
||||
|
||||
static void swap(int *a, int *b) {
|
||||
int t = *a;
|
||||
*a = *b;
|
||||
*b = t;
|
||||
}
|
||||
|
||||
static void myqsort(int *a, int l, int r) {
|
||||
if (l < r) {
|
||||
int p = a[l], pivot = l, j;
|
||||
for (j = l + 1; j < r; j ++) {
|
||||
if (a[j] < p) {
|
||||
swap(&a[++pivot], &a[j]);
|
||||
}
|
||||
}
|
||||
swap(&a[pivot], &a[l]);
|
||||
myqsort(a, l, pivot);
|
||||
myqsort(a, pivot + 1, r);
|
||||
}
|
||||
}
|
||||
|
||||
void bench_qsort_run() {
|
||||
myqsort(data, 0, N);
|
||||
}
|
||||
|
||||
int bench_qsort_validate() {
|
||||
return checksum(data, data + N) == setting->checksum;
|
||||
}
|
32
micro-bench/src/queen/queen.c
Normal file
32
micro-bench/src/queen/queen.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
static unsigned int FULL;
|
||||
|
||||
static unsigned int dfs(unsigned int row, unsigned int ld, unsigned int rd) {
|
||||
if (row == FULL) {
|
||||
return 1;
|
||||
} else {
|
||||
unsigned int pos = FULL & (~(row | ld | rd)), ans = 0;
|
||||
while (pos) {
|
||||
unsigned int p = (pos & (~pos + 1));
|
||||
pos -= p;
|
||||
ans += dfs(row | p, (ld | p) << 1, (rd | p) >> 1);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int ans;
|
||||
|
||||
void bench_queen_prepare() {
|
||||
ans = 0;
|
||||
FULL = (1 << setting->size) - 1;
|
||||
}
|
||||
|
||||
void bench_queen_run() {
|
||||
ans = dfs(0, 0, 0);
|
||||
}
|
||||
|
||||
int bench_queen_validate() {
|
||||
return ans == setting->checksum;
|
||||
}
|
42
micro-bench/src/sieve/sieve.c
Normal file
42
micro-bench/src/sieve/sieve.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
static int N;
|
||||
|
||||
static int ans;
|
||||
static uint32_t *primes;
|
||||
|
||||
static inline int get(int n) {
|
||||
return (primes[n >> 5] >> (n & 31)) & 1;
|
||||
}
|
||||
|
||||
static inline void clear(int n) {
|
||||
primes[n >> 5] &= ~(1ul << (n & 31));
|
||||
}
|
||||
|
||||
void bench_sieve_prepare() {
|
||||
N = setting->size;
|
||||
primes = (uint32_t*)bench_alloc(N / 8 + 128);
|
||||
for (int i = 0; i <= N / 32; i ++) {
|
||||
primes[i] = 0xffffffff;
|
||||
}
|
||||
}
|
||||
|
||||
void bench_sieve_run() {
|
||||
for (int i = 1; i <= N; i ++)
|
||||
if (!get(i)) return;
|
||||
for (int i = 2; i * i <= N; i ++) {
|
||||
if (get(i)) {
|
||||
for (int j = i + i; j <= N; j += i)
|
||||
clear(j);
|
||||
}
|
||||
}
|
||||
ans = 0;
|
||||
for (int i = 2; i <= N; i ++)
|
||||
if (get(i)) {
|
||||
ans ++;
|
||||
}
|
||||
}
|
||||
|
||||
int bench_sieve_validate() {
|
||||
return ans == setting->checksum;
|
||||
}
|
111
micro-bench/src/ssort/ssort.cc
Normal file
111
micro-bench/src/ssort/ssort.cc
Normal file
|
@ -0,0 +1,111 @@
|
|||
// This is the Skew algorithm's reference implementation.
|
||||
|
||||
#include <benchmark.h>
|
||||
|
||||
static int N;
|
||||
|
||||
inline bool leq(int a1, int a2, int b1, int b2) { // lexic. order for pairs
|
||||
return(a1 < b1 || (a1 == b1 && a2 <= b2));
|
||||
} // and triples
|
||||
inline bool leq(int a1, int a2, int a3, int b1, int b2, int b3) {
|
||||
return(a1 < b1 || (a1 == b1 && leq(a2,a3, b2,b3)));
|
||||
}
|
||||
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
|
||||
static void radixPass(int* a, int* b, int* r, int n, int K)
|
||||
{ // count occurrences
|
||||
int* c = (int*)bench_alloc(sizeof(int)*(K+1));
|
||||
for (int i = 0; i <= K; i++) c[i] = 0; // reset counters
|
||||
for (int i = 0; i < n; i++) c[r[a[i]]]++; // count occurences
|
||||
for (int i = 0, sum = 0; i <= K; i++) { // exclusive prefix sums
|
||||
int t = c[i]; c[i] = sum; sum += t;
|
||||
}
|
||||
for (int i = 0; i < n; i++) b[c[r[a[i]]]++] = a[i]; // sort
|
||||
}
|
||||
|
||||
// find the suffix array SA of s[0..n-1] in {1..K}^n
|
||||
// require s[n]=s[n+1]=s[n+2]=0, n>=2
|
||||
void suffixArray(int* s, int* SA, int n, int K) {
|
||||
int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2;
|
||||
int* s12 = (int*)bench_alloc(sizeof(int)*(n02+3)); s12[n02]= s12[n02+1]= s12[n02+2]=0;
|
||||
int* SA12 = (int*)bench_alloc(sizeof(int)*(n02+3)); SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
|
||||
int* s0 = (int*)bench_alloc(sizeof(int)*n0);
|
||||
int* SA0 = (int*)bench_alloc(sizeof(int)*n0);
|
||||
|
||||
// generate positions of mod 1 and mod 2 suffixes
|
||||
// the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
|
||||
for (int i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i;
|
||||
|
||||
// lsb radix sort the mod 1 and mod 2 triples
|
||||
radixPass(s12 , SA12, s+2, n02, K);
|
||||
radixPass(SA12, s12 , s+1, n02, K);
|
||||
radixPass(s12 , SA12, s , n02, K);
|
||||
|
||||
// find lexicographic names of triples
|
||||
int name = 0, c0 = -1, c1 = -1, c2 = -1;
|
||||
for (int i = 0; i < n02; i++) {
|
||||
if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) {
|
||||
name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2];
|
||||
}
|
||||
if (SA12[i] % 3 == 1) { s12[SA12[i]/3] = name; } // left half
|
||||
else { s12[SA12[i]/3 + n0] = name; } // right half
|
||||
}
|
||||
|
||||
// recurse if names are not yet unique
|
||||
if (name < n02) {
|
||||
suffixArray(s12, SA12, n02, name);
|
||||
// store unique names in s12 using the suffix array
|
||||
for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1;
|
||||
} else // generate the suffix array of s12 directly
|
||||
for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i;
|
||||
|
||||
// stably sort the mod 0 suffixes from SA12 by their first character
|
||||
for (int i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
|
||||
radixPass(s0, SA0, s, n0, K);
|
||||
|
||||
// merge sorted SA0 suffixes and sorted SA12 suffixes
|
||||
for (int p=0, t=n0-n1, k=0; k < n; k++) {
|
||||
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
|
||||
int i = GetI(); // pos of current offset 12 suffix
|
||||
int j = SA0[p]; // pos of current offset 0 suffix
|
||||
if (SA12[t] < n0 ?
|
||||
leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]) :
|
||||
leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
|
||||
{ // suffix from SA12 is smaller
|
||||
SA[k] = i; t++;
|
||||
if (t == n02) { // done --- only SA0 suffixes left
|
||||
for (k++; p < n0; p++, k++) SA[k] = SA0[p];
|
||||
}
|
||||
} else {
|
||||
SA[k] = j; p++;
|
||||
if (p == n0) { // done --- only SA12 suffixes left
|
||||
for (k++; t < n02; t++, k++) SA[k] = GetI();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
static int *s, *sa;
|
||||
|
||||
void bench_ssort_prepare() {
|
||||
N = setting->size;
|
||||
bench_srand(1);
|
||||
s = (int*)bench_alloc(sizeof(int)*(N+10));
|
||||
sa = (int*)bench_alloc(sizeof(int)*(N+10));
|
||||
|
||||
for (int i = 0; i < N; i ++) {
|
||||
s[i] = bench_rand() % 26;
|
||||
}
|
||||
}
|
||||
|
||||
void bench_ssort_run() {
|
||||
suffixArray(s, sa, N, 26);
|
||||
}
|
||||
|
||||
int bench_ssort_validate() {
|
||||
return checksum(sa, sa + N) == setting->checksum;
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue