This commit is contained in:
Yanyan Jiang 2020-08-12 05:59:59 +00:00 committed by Zihao Yu
parent 0ce11d082c
commit f8e9232a64
20 changed files with 4 additions and 4 deletions

View file

@ -0,0 +1,111 @@
// This is the Skew algorithm's reference implementation.
#include <benchmark.h>
static int N;
inline bool leq(int a1, int a2, int b1, int b2) { // lexic. order for pairs
return(a1 < b1 || (a1 == b1 && a2 <= b2));
} // and triples
inline bool leq(int a1, int a2, int a3, int b1, int b2, int b3) {
return(a1 < b1 || (a1 == b1 && leq(a2,a3, b2,b3)));
}
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
static void radixPass(int* a, int* b, int* r, int n, int K)
{ // count occurrences
int* c = (int*)bench_alloc(sizeof(int)*(K+1));
for (int i = 0; i <= K; i++) c[i] = 0; // reset counters
for (int i = 0; i < n; i++) c[r[a[i]]]++; // count occurences
for (int i = 0, sum = 0; i <= K; i++) { // exclusive prefix sums
int t = c[i]; c[i] = sum; sum += t;
}
for (int i = 0; i < n; i++) b[c[r[a[i]]]++] = a[i]; // sort
}
// find the suffix array SA of s[0..n-1] in {1..K}^n
// require s[n]=s[n+1]=s[n+2]=0, n>=2
void suffixArray(int* s, int* SA, int n, int K) {
int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2;
int* s12 = (int*)bench_alloc(sizeof(int)*(n02+3)); s12[n02]= s12[n02+1]= s12[n02+2]=0;
int* SA12 = (int*)bench_alloc(sizeof(int)*(n02+3)); SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
int* s0 = (int*)bench_alloc(sizeof(int)*n0);
int* SA0 = (int*)bench_alloc(sizeof(int)*n0);
// generate positions of mod 1 and mod 2 suffixes
// the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
for (int i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i;
// lsb radix sort the mod 1 and mod 2 triples
radixPass(s12 , SA12, s+2, n02, K);
radixPass(SA12, s12 , s+1, n02, K);
radixPass(s12 , SA12, s , n02, K);
// find lexicographic names of triples
int name = 0, c0 = -1, c1 = -1, c2 = -1;
for (int i = 0; i < n02; i++) {
if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) {
name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2];
}
if (SA12[i] % 3 == 1) { s12[SA12[i]/3] = name; } // left half
else { s12[SA12[i]/3 + n0] = name; } // right half
}
// recurse if names are not yet unique
if (name < n02) {
suffixArray(s12, SA12, n02, name);
// store unique names in s12 using the suffix array
for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1;
} else // generate the suffix array of s12 directly
for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i;
// stably sort the mod 0 suffixes from SA12 by their first character
for (int i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
radixPass(s0, SA0, s, n0, K);
// merge sorted SA0 suffixes and sorted SA12 suffixes
for (int p=0, t=n0-n1, k=0; k < n; k++) {
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
int i = GetI(); // pos of current offset 12 suffix
int j = SA0[p]; // pos of current offset 0 suffix
if (SA12[t] < n0 ?
leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]) :
leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
{ // suffix from SA12 is smaller
SA[k] = i; t++;
if (t == n02) { // done --- only SA0 suffixes left
for (k++; p < n0; p++, k++) SA[k] = SA0[p];
}
} else {
SA[k] = j; p++;
if (p == n0) { // done --- only SA12 suffixes left
for (k++; t < n02; t++, k++) SA[k] = GetI();
}
}
}
}
extern "C" {
static int *s, *sa;
void bench_ssort_prepare() {
N = setting->size;
bench_srand(1);
s = (int*)bench_alloc(sizeof(int)*(N+10));
sa = (int*)bench_alloc(sizeof(int)*(N+10));
for (int i = 0; i < N; i ++) {
s[i] = bench_rand() % 26;
}
}
void bench_ssort_run() {
suffixArray(s, sa, N, 26);
}
int bench_ssort_validate() {
return checksum(sa, sa + N) == setting->checksum;
}
}