Longest prefix string length for all the suffixes

Find the length of the longest prefix string for all the suffixes of the string.

For example suffixes of the string ababaa are ababaa, babaa, abaa, baa, aa and a. The similarities of each of these strings with the string “ababaa” are 6,0,3,0,1,1 respectively. Thus the answer is 6 + 0 + 3 + 0 + 1 + 1 = 11.

It took me almost 2 weeks to come to conclusion and solve this problem. I tried suffix tree, suffix array and naive approach to solve this problem. But nothing helped to cross the time limit which was 3 seconds.

Here is the history of various code I tried.

First solution I tried, which worked but is very very slow.

#include <string>
#include <algorithm>
#include <iostream>
#include <vector>

using std::vector;
using std::string;
size_t longest_common_prefix( const vector<string>& inputs ) {
    vector<string>::const_iterator itr = inputs.begin() ;
    size_t prefix_count = itr->length() ;
    string comp_str = *itr ;
    for ( itr = inputs.begin() + 1; itr != inputs.end() ; itr++ ) {
        std::pair<string::const_iterator , string::const_iterator> p = std::mismatch( comp_str.begin(), comp_str.end(), itr->begin()) ;
        if (( p.first - comp_str.begin()) < (int)prefix_count )
            prefix_count = p.first - comp_str.begin() ;
    }
    return prefix_count;
}

int main ( int argc, char **argv) {
    int T;
    std::cin >> T;
    std::vector<size_t> result;
    for ( int i = 0; i < T; i++) {
        std::string str;
        std::cin >> str;
        size_t sol = 0;
        for ( string::iterator it = str.begin(); it != str.end(); ++it) {
            vector<string> strings;
            strings.push_back(std::string(it,str.end()));
            strings.push_back(str);
            
           sol += longest_common_prefix(strings);

            strings.clear();
            
        }
        result.push_back(sol);
    }
    for ( std::vector<size_t>::const_iterator it = result.begin(); it != result.end(); ++it) {
        std::cout << *it << std::endl;
    }
    result.clear();
    return 0;
}

Removed usage of STL and other algorithm calls and came up with simple way and improvement was very good but not as per expectation.

#include <string>
#include <iostream>

inline void solve(const std::string& input) {
    int len = input.length();
    int count = 0;
    int total = 0;
    for( register int i = 1;i < len;++i) {
        count=0;
        for( register int j = i;j < len;++j) {
            if(input[j - i] == input[j])
                count++;
            else
                break;
        }
        total += count;
    }
    std::cout << total + len << std::endl;;
}

int main ( int argc, char **argv) {
    int T;
    std::cin >> T;
    for ( int i = 0; i < T; i++) {
        std::string str;
        std::cin >> str;
        solve ( str );
    }
    return 0;
}

Trying suffix array approach, it does work, but still it was slow as my mark to finish was 3s.

#include <iostream>
#include <string>
#include <algorithm>
#include <string.h>
#include <vector>

std::string str;
size_t len;

struct Comp {
    const char* str;
    Comp(const char* str): str(str) {}
    bool operator()(size_t s1, size_t s2) const {
        return strcmp(str+s1, str+s2) < 0;
    }
};

size_t lcp(size_t t) {
    for (size_t i = 0; i < len - t; i++) {
        if (str[i] != str[i + t])
            return i;
    }
    return len - t;
}
int main(int argc, char **argv) {
    size_t T;
    std::cin >> T;
    for ( size_t i = 0; i < T; i++) {
        std::cin >> str;
        std::vector<size_t> SA;
        len = str.size();
        /* build Suffix array */
        SA.resize(len + 1);
        for (size_t i = 0; i < len + 1; ++i) {
            SA[i] = i;
        }
        std::sort(SA.begin(), SA.end(), Comp(str.c_str()));

        size_t total = 0;
        for ( std::vector<size_t>::iterator it = SA.begin(); it != SA.end(); ++it) {
            if (*it == 0 )
                break;
            total += lcp(*it);
        }
        std::cout << total + len << std::endl;
    }
    return 0;
}

On one side Indian bowlers were beating there heads against Clark and Hussy in the morning, I came up with another approach in-place string comparison, It does show quite a bit of improvement on all my previous approaches. But not quite enough for string with 1 lakh characters where all characters are same.

#include <iostream>
#include <string>

int main ( int argc, char **argv) {
    size_t T;
    std::cin >> T;
    char input[100000];
    for ( size_t i = 0; i < T; i++) {
        std::cin >> input;
        size_t len = strlen(input);
        char *left = input;
        char *right = input + len - 1;
        int sol = 0;
        int end_count = 1;
        while ( left < right ) {
            if ( *right != '\0') {
                if ( *left == *right ) {
                    sol++;
                    left++;right++;
                    continue;
                }
            }
            end_count++;
            left = input;
            right = input + len - end_count;
        }
        std::cout << sol + len << std::endl;
    }
}

Finally when I was about to stop further try, Came across amazing algorithm which is Z-algorithm. Here is the final solution.

#include <iostream>
#include <string.h>
#include <numeric>

int main ( int argc, char **argv) {
    size_t T;
    std::cin >> T;
    char input[100000];
    int Z[100000];
    for ( register size_t i = 0; i < T; ++i) {
        std::cin >> input;
        int N = strlen ( input);

        Z[0] = N;
        int a = 0, b = 0;
        for (int i = 1; i < N; ++i) {
            int k = i < b ? std::min(b - i, Z[i - a]) : 0;
            while (i + k < N && input[i + k] == input[k]) 
                ++k;
            Z[i] = k;
            if (i + k > b) { 
                a = i; b = i + k; 
            }
        }
        int sol = 0;
        for ( int i = 0; i < N; i++) {
            sol += Z[i];
        }
        std::cout << sol << std::endl;
    }
}
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s