Try me Trie

Have you ever used Auto-complete feature ?
It’s implemented using Trie. Trie is a data structure which is very efficient for searching word .
However, it has one very big disadvantage of using a lot of memory as every node contains character array of alphabet size.
It marks down the ending of word by assigning it as leaf node.
Searching a word in trie has complexity of O(n) ,where n is the length of a word searched.
Time as well as space complexity can be reduced by using Compressed Trie.

Let us write a code for Trie Datastructure

Here are some pre-processors defined which will be used later.

#define ALPHABETS_SIZE (94)
#define PIVOT_CHARACTER ((int)'!')
#define TRIE_NODE_NULLPTR (trie_t*)0
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
#define FOUND 1
#define NOT_FOUND 0
#define INVALID_WORD -1
#define BUFFERSIZE (1024)

C structure definition is as follows

typedef struct trie_t {
  char value;
  int isLeaf;
  int count;
  struct trie_t *trieArray[ALPHABETS_SIZE];
} trie_t;

How to create a new node of Trie.

trie_t *getNewNode() {
  int index = 0;
  trie_t *tmp = (trie_t*) malloc ( sizeof ( trie_t));
  tmp->isLeaf = 0;
  tmp->count =  0;
  for ( index = 0; index < ALPHABETS_SIZE; index++){
    tmp->trieArray[index] = TRIE_NODE_NULLPTR;
  }
  return tmp;
}

How to get the position of new character in the trieArray

int getPosition(char ch ) {
  int pos = (ch) - PIVOT_CHARACTER;
#ifdef DEBUGME 
  if ( pos > ALPHABETS_SIZE ) {
    printf("RANGE ERROR for ALPHABETS_SIZE\n");
  }
#endif
  return pos;
}

How to insert new character in the Trie

void insert(const char *word) {
  int charIndex = 0;
  trie_t *pCurrentNode = gRoot;
  if ( !word && strlen(word) != 0 ) {
    return;
  }
#ifdef DEBUGME 
  printf("Word -> %s\n", word);
#endif

  for ( charIndex = 0; charIndex < strlen(word); charIndex++) {
    /* Find the position where new character will be stored */
    int position = getPosition(word[charIndex]);
    /* If current slot is NULL create new one */
    if ( pCurrentNode->trieArray[position] ==  TRIE_NODE_NULLPTR) {
        pCurrentNode->trieArray[position] = getNewNode();
    }
    pCurrentNode->value = word[charIndex];
    pCurrentNode = pCurrentNode->trieArray[position];
  }
  pCurrentNode->isLeaf = 1;
  pCurrentNode->count++;
}

How to search a string in trie

int search(const char *word) {
  int length = 0;
  int index = 0;


  trie_t *pCurrentNode = gRoot;

  if ( !word && strlen(word) != 0 ) {
    return INVALID_WORD;
  }
  length = strlen(word);
  for ( index = 0; index < length; index++) {
    int position = getPosition(word[index]);
    if ( pCurrentNode->trieArray[position] == TRIE_NODE_NULLPTR ) {
      return NOT_FOUND;
    }
    pCurrentNode = pCurrentNode->trieArray[position];
  } 
  
#ifdef DEBUGME 
  printf("Count for %s = %d\n", word, pCurrentNode->count);
#endif        
  return FOUND;
}       

Here is running program

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define ALPHABETS_SIZE (94)
#define PIVOT_CHARACTER ((int)'!')
#define TRIE_NODE_NULLPTR (trie_t*)0
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
#define FOUND 1
#define NOT_FOUND 0
#define INVALID_WORD -1
#define BUFFERSIZE (1024)


typedef struct trie_t {
  char value;
  int isLeaf;
  int count;
  struct trie_t *trieArray[ALPHABETS_SIZE];
} trie_t;

trie_t *gRoot = (trie_t*)0;

trie_t *getNewNode() {
  int index = 0;
  trie_t *tmp = (trie_t*) malloc ( sizeof ( trie_t));
  tmp->isLeaf = 0;
  tmp->count =  0;
  for ( index = 0; index < ALPHABETS_SIZE; index++){
    tmp->trieArray[index] = TRIE_NODE_NULLPTR;
  }
  return tmp;
}

int getPosition(char ch ) {
  int pos = (ch) - PIVOT_CHARACTER;
#ifdef DEBUGME 
  if ( pos > ALPHABETS_SIZE ) {
    printf("RANGE ERROR for ALPHABETS_SIZE\n");
  }
#endif
  return pos;
}

void insert(const char *word) {
  int charIndex = 0;
  trie_t *pCurrentNode = gRoot; 
  if ( !word && strlen(word) != 0 ) {
    return;
  }
#ifdef DEBUGME 
  printf("Word -> %s\n", word);
#endif

  for ( charIndex = 0; charIndex < strlen(word); charIndex++) {
    /* Find the position where new character will be stored */
    int position = getPosition(word[charIndex]);
    /* If current slot is NULL create new one */
    if ( pCurrentNode->trieArray[position] ==  TRIE_NODE_NULLPTR) {
        pCurrentNode->trieArray[position] = getNewNode();
    }
    pCurrentNode->value = word[charIndex];
    pCurrentNode = pCurrentNode->trieArray[position];
  }
  pCurrentNode->isLeaf = 1;
  pCurrentNode->count++;
}

int search(const char *word) {
  int length = 0;
  int index = 0;


  trie_t *pCurrentNode = gRoot; 

  if ( !word && strlen(word) != 0 ) {
    return INVALID_WORD;
  }
  length = strlen(word);
  for ( index = 0; index < length; index++) {
    int position = getPosition(word[index]);
    if ( pCurrentNode->trieArray[position] == TRIE_NODE_NULLPTR ) {
      return NOT_FOUND;
    }
    pCurrentNode = pCurrentNode->trieArray[position];
  }
  
#ifdef DEBUGME 
  printf("Count for %s = %d\n", word, pCurrentNode->count);
#endif
  return FOUND;
}

int main( int argc, char **argv) {
  gRoot = getNewNode();

  char buffer[BUFFERSIZE];
  while ( 1 ) {
    printf("TrieShell > ");
    fgets(buffer, BUFFERSIZE, stdin);

    if (buffer[strlen(buffer) - 1] == '\n') {
        buffer[strlen(buffer) - 1] = '\0';
    }

    if(!strcmp(buffer, "exit") || 
       !strcmp(buffer, "quit") || 
       !strcmp(buffer, "q")) {
      exit(0);
    }

    if(strstr(buffer, "insert")) {
      char *token;
      const char delim[2] = " ";
      token = strtok(buffer, delim);
      token = strtok(NULL, delim);
      while( token != NULL )  {
        insert(token);
        token = strtok(NULL, delim);
      }
    }


    if(!strcmp(buffer, "help")) {
      printf("Commands :\n");
      printf("insert <string|URL>:\n");
      printf("quit | exit | q <To exist from the shell>\n");
    }
  }
#ifdef DEBUGME 
  char keys[][8] = {"the", "a", "there", "answer", "any", "by", "bye", "their"};
  gRoot = getNewNode();
  int index;
  for(index = 0; index < ARRAY_SIZE(keys); index++) {
    insert(keys[index]);
  }
  for(index = 0; index < ARRAY_SIZE(keys); index++) {
    insert(keys[index]);
  }
  printf("Insert Done !!! \n");

  for(index = 0; index < ARRAY_SIZE(keys); index++) {
    int found = search(keys[index]);
    printf("%s --> %s\n", keys[index], found == FOUND ? "FOUND" : "NOT FOUND");
  }
  
#endif
  return 0;
}