N frequent item in a file.

You have given a file, consists of line, each line is a search string. Aim is to find top n frequent lines in the file. This problem should not be confused with the data stream data input i.e. online algorithms . In this question entire file is available for analysis.

#include <iostream>
#include <fstream>
#include <string>
#include <map>

typedef std::map<std::string, int> freq_table_t;
typedef std::multimap<std::size_t, freq_table_t::const_iterator> top_n_t;
size_t n = 5;

int main ( int argc, char **argv) {
    std::ifstream ifs(argv[1]);
    if ( ifs.peek() == EOF ) 
        return 1;

    std::string line; 
    freq_table_t freq_table;

    while( ifs.good()&& std::getline(ifs,line) ) {
        if ( freq_table.insert(std::make_pair(line, 1 )).second == false ) 
            freq_table[line]++;
    }
    ifs.close();

    top_n_t top5;
    for (freq_table_t::const_iterator it = freq_table.begin(); it != freq_table.end(); ++it) {
        if (top5.size() < n || it->second > top5.begin()->first)
            top5.insert(std::make_pair(it->second, it));
        if (top5.size() == n + 1)
            top5.erase(top5.begin());
    }
    for ( top_n_t::reverse_iterator ritr = top5.rbegin(); ritr != top5.rend(); ++ritr)
        std::cout << ritr->second->first << std::endl;

    return 0;
}
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s