Suffix Array

#include<bits/stdc++.h>
#define ll long long

// Usage:
//      SuffixArray sa;
//      sa.init(s);
//      sa.compute_suffix_array();
//      may use sa.getP().
struct SuffixArray {

    string s;
    int logs;
    int len;
    int **posi;
    int **c;
    int alphabet_size;
    
    SuffixArray() {}
    
    void init (string s_) {
        s = s_ + "$";
        logs = log2(s.size()) + 1;
        
        posi = new int*[logs+1];
        c    = new int*[logs+1];
        
        for (int i = 0; i <= logs; ++i) {
            posi[i] = new int[s.size()];
            c[i]    = new int[s.size()];
        }
        
        len = s.size();
        alphabet_size = 256;
    } 
    
    void compute_suffix_array() {
        
        // first loop
        int *cnt = new int[alphabet_size];
        fill(cnt, cnt + alphabet_size, 0);
        
        for (int i = 0; i < len; ++i)
            cnt[s[i]]++;
        for (int i = 1; i < alphabet_size; ++i)
            cnt[i] += cnt[i-1];
        
        for (int i = len-1; i >= 0; --i) 
            posi[0][--cnt[s[i]]] = i;
        
        int classes = 0;
        c[0][posi[0][0]] = 0;
        for (int i = 1; i < len; ++i)
            if (s[posi[0][i]] == s[posi[0][i-1]])
                c[0][posi[0][i]] = classes;
            else
                c[0][posi[0][i]] = ++classes;
        classes++;
        
        // second to logs-th loop
        int *pn = new int[len];
        cnt     = new int[len];
        for (int h = 0; h < logs; ++h) {
            for (int i = 0; i < len; ++i) {
                pn[i] = posi[h][i] - (1 << h);
                if (pn[i] < 0)
                    pn[i] += len;
            }
            
            fill(cnt, cnt + classes, 0);
            for (int i = 0; i < len; i++)
                cnt[c[h][pn[i]]]++;
            for (int i = 1; i < classes; i++)
                cnt[i] += cnt[i-1];
            for (int i = len-1; i >= 0; i--) { 
                posi[h+1][--cnt[c[h][pn[i]]]] = pn[i];
            }
            
            c[h+1][posi[h+1][0]] = 0;
            classes = 1;
            for (int i = 1; i < len; i++) {
                pair<int, int> cur =  {c[h][posi[h+1][i]],   c[h][(posi[h+1][i] + (1 << h))   % len]};
                pair<int, int> prev = {c[h][posi[h+1][i-1]], c[h][(posi[h+1][i-1] + (1 << h)) % len]};
                if (cur != prev)
                    ++classes;
                c[h+1][posi[h+1][i]] = classes - 1;
            }
        }
        
        // remove the '$' appended at the end of string s
        for (int h = 0; h <= logs; ++h) {
            for (int i = 0; i <= len - 2; ++i) {
                posi[h][i] = posi[h][i+1];
            }               
        }
        
    }
    
    int* getP () {
        return posi[logs];
    }

};

Leave a Reply