Non-Intrusive Linked List

Non-Intrusive Linked List
Long time back I have seen some interesting implementation of linked list.
Recently I saw some discussion on hackernews, got again intested to understand that list implementation.
Similar implementation is used by Linux kernel linked list

Normally when a developer defines a linked list, he/she will add data part as member of linked list node
say you want to create linked list of Integers then one would have struct as follows

struct intList {
   int data;
   struct intList *prev;
   struct intList *next;
}

– Now this list is attached to data-type integer.
– You cannot have same node part of two different list.
– This is called as Intrusive Linked List.
– You have to do lot of careful surgery of list, need to check pointers is null or not.

Non-Intrusive Linked list is amazing way of getting rid of these restrictions.

Let us define a new Non-Intrusive linked list which will work with any POD in C language.

struct llhead {
  struct llhead *prev, *next;
};

Note that there is no mention of the type it is storing. This seems strange at first.
Intrusive linked lists flip the memory layout inside out. Instead of the list node providing memory for
a POD, POD provides memory for a list node. The ‘intrusive’ part of the name comes from the fact that
we store the list node inside the type POD.

Let us define some operation on this list

#define LL_INIT(N)      ((N)->next = (N)->prev = (N))
#define LL_HEAD(H)      struct llhead H = { &H, &H }
#define LL_ENTRY(P,T,N) ((T *)((char *)(P) - offsetof(T, N)))
#define LL_TAIL(H, N)                   \
  do {                                  \
    ((H)->prev)->next = (N);            \
    (N)->prev = ((H)->prev);            \
    (N)->next = (H);                    \
    (H)->prev = (N);                    \
  } while (0)
#define LL_DEL(N)                       \
  do {                                  \
    ((N)->next)->prev = ((N)->prev);    \
    ((N)->prev)->next = ((N)->next);    \
    LL_INIT(N);                         \
  } while (0)
#define LL_FOREACH(H,N) for (N = (H)->next; N != (H); N = (N)->next)
#define LL_FOREACH_SAFE(H,N,T)         \
  for (N = (H)->next, T = (N)->next; N != (H);     \
      N = (T), T = (N)->next)

Remember this is actually Circular doubly linked list.

Now I want to create a list of integers, this is how I will be writing my POD

struct integerList {
  struct llhead link;
  int data;
};

Initialize the head of the list.

LL_HEAD(mylist);

Here is the complete working example.

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include "ilist.h"


struct llhead {
  struct llhead *prev, *next;
};

#define LL_INIT(N)      ((N)->next = (N)->prev = (N))
#define LL_HEAD(H)      struct llhead H = { &H, &H }
#define LL_ENTRY(P,T,N) ((T *)((char *)(P) - offsetof(T, N)))
#define LL_TAIL(H, N)                   \
  do {                                  \
    ((H)->prev)->next = (N);            \
    (N)->prev = ((H)->prev);            \
    (N)->next = (H);                    \
    (H)->prev = (N);                    \
  } while (0)
#define LL_DEL(N)                       \
  do {                                  \
    ((N)->next)->prev = ((N)->prev);    \
    ((N)->prev)->next = ((N)->next);    \
    LL_INIT(N);                         \
  } while (0)
#define LL_FOREACH(H,N) for (N = (H)->next; N != (H); N = (N)->next)
#define LL_FOREACH_SAFE(H,N,T)         \
  for (N = (H)->next, T = (N)->next; N != (H);     \
      N = (T), T = (N)->next)


struct integerList {
  struct llhead link;
  int data;
};

int main(int argc, char **argv) {
  int k = 0; 
  struct llhead *head;
  static LL_HEAD(mylist);

  for ( k = 0; k < 10; k++) {
    struct integerList *elem = 
          (struct integerList *)malloc(sizeof(struct integerList));
    elem->data = k;
    LL_TAIL(&mylist, &elem->link);
  }

  LL_FOREACH(&mylist, head) {
    struct integerList *elem = LL_ENTRY(head, struct integerList, link);
    printf("%d\n", elem->data );
  }

  return 0;
}
Advertisements

Try me Trie

Have you ever used Auto-complete feature ?
It’s implemented using Trie. Trie is a data structure which is very efficient for searching word .
However, it has one very big disadvantage of using a lot of memory as every node contains character array of alphabet size.
It marks down the ending of word by assigning it as leaf node.
Searching a word in trie has complexity of O(n) ,where n is the length of a word searched.
Time as well as space complexity can be reduced by using Compressed Trie.

Let us write a code for Trie Datastructure

Here are some pre-processors defined which will be used later.

#define ALPHABETS_SIZE (94)
#define PIVOT_CHARACTER ((int)'!')
#define TRIE_NODE_NULLPTR (trie_t*)0
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
#define FOUND 1
#define NOT_FOUND 0
#define INVALID_WORD -1
#define BUFFERSIZE (1024)

C structure definition is as follows

typedef struct trie_t {
  char value;
  int isLeaf;
  int count;
  struct trie_t *trieArray[ALPHABETS_SIZE];
} trie_t;

How to create a new node of Trie.

trie_t *getNewNode() {
  int index = 0;
  trie_t *tmp = (trie_t*) malloc ( sizeof ( trie_t));
  tmp->isLeaf = 0;
  tmp->count =  0;
  for ( index = 0; index < ALPHABETS_SIZE; index++){
    tmp->trieArray[index] = TRIE_NODE_NULLPTR;
  }
  return tmp;
}

How to get the position of new character in the trieArray

int getPosition(char ch ) {
  int pos = (ch) - PIVOT_CHARACTER;
#ifdef DEBUGME 
  if ( pos > ALPHABETS_SIZE ) {
    printf("RANGE ERROR for ALPHABETS_SIZE\n");
  }
#endif
  return pos;
}

How to insert new character in the Trie

void insert(const char *word) {
  int charIndex = 0;
  trie_t *pCurrentNode = gRoot;
  if ( !word && strlen(word) != 0 ) {
    return;
  }
#ifdef DEBUGME 
  printf("Word -> %s\n", word);
#endif

  for ( charIndex = 0; charIndex < strlen(word); charIndex++) {
    /* Find the position where new character will be stored */
    int position = getPosition(word[charIndex]);
    /* If current slot is NULL create new one */
    if ( pCurrentNode->trieArray[position] ==  TRIE_NODE_NULLPTR) {
        pCurrentNode->trieArray[position] = getNewNode();
    }
    pCurrentNode->value = word[charIndex];
    pCurrentNode = pCurrentNode->trieArray[position];
  }
  pCurrentNode->isLeaf = 1;
  pCurrentNode->count++;
}

How to search a string in trie

int search(const char *word) {
  int length = 0;
  int index = 0;


  trie_t *pCurrentNode = gRoot;

  if ( !word && strlen(word) != 0 ) {
    return INVALID_WORD;
  }
  length = strlen(word);
  for ( index = 0; index < length; index++) {
    int position = getPosition(word[index]);
    if ( pCurrentNode->trieArray[position] == TRIE_NODE_NULLPTR ) {
      return NOT_FOUND;
    }
    pCurrentNode = pCurrentNode->trieArray[position];
  } 
  
#ifdef DEBUGME 
  printf("Count for %s = %d\n", word, pCurrentNode->count);
#endif        
  return FOUND;
}       

Here is running program

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define ALPHABETS_SIZE (94)
#define PIVOT_CHARACTER ((int)'!')
#define TRIE_NODE_NULLPTR (trie_t*)0
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
#define FOUND 1
#define NOT_FOUND 0
#define INVALID_WORD -1
#define BUFFERSIZE (1024)


typedef struct trie_t {
  char value;
  int isLeaf;
  int count;
  struct trie_t *trieArray[ALPHABETS_SIZE];
} trie_t;

trie_t *gRoot = (trie_t*)0;

trie_t *getNewNode() {
  int index = 0;
  trie_t *tmp = (trie_t*) malloc ( sizeof ( trie_t));
  tmp->isLeaf = 0;
  tmp->count =  0;
  for ( index = 0; index < ALPHABETS_SIZE; index++){
    tmp->trieArray[index] = TRIE_NODE_NULLPTR;
  }
  return tmp;
}

int getPosition(char ch ) {
  int pos = (ch) - PIVOT_CHARACTER;
#ifdef DEBUGME 
  if ( pos > ALPHABETS_SIZE ) {
    printf("RANGE ERROR for ALPHABETS_SIZE\n");
  }
#endif
  return pos;
}

void insert(const char *word) {
  int charIndex = 0;
  trie_t *pCurrentNode = gRoot; 
  if ( !word && strlen(word) != 0 ) {
    return;
  }
#ifdef DEBUGME 
  printf("Word -> %s\n", word);
#endif

  for ( charIndex = 0; charIndex < strlen(word); charIndex++) {
    /* Find the position where new character will be stored */
    int position = getPosition(word[charIndex]);
    /* If current slot is NULL create new one */
    if ( pCurrentNode->trieArray[position] ==  TRIE_NODE_NULLPTR) {
        pCurrentNode->trieArray[position] = getNewNode();
    }
    pCurrentNode->value = word[charIndex];
    pCurrentNode = pCurrentNode->trieArray[position];
  }
  pCurrentNode->isLeaf = 1;
  pCurrentNode->count++;
}

int search(const char *word) {
  int length = 0;
  int index = 0;


  trie_t *pCurrentNode = gRoot; 

  if ( !word && strlen(word) != 0 ) {
    return INVALID_WORD;
  }
  length = strlen(word);
  for ( index = 0; index < length; index++) {
    int position = getPosition(word[index]);
    if ( pCurrentNode->trieArray[position] == TRIE_NODE_NULLPTR ) {
      return NOT_FOUND;
    }
    pCurrentNode = pCurrentNode->trieArray[position];
  }
  
#ifdef DEBUGME 
  printf("Count for %s = %d\n", word, pCurrentNode->count);
#endif
  return FOUND;
}

int main( int argc, char **argv) {
  gRoot = getNewNode();

  char buffer[BUFFERSIZE];
  while ( 1 ) {
    printf("TrieShell > ");
    fgets(buffer, BUFFERSIZE, stdin);

    if (buffer[strlen(buffer) - 1] == '\n') {
        buffer[strlen(buffer) - 1] = '\0';
    }

    if(!strcmp(buffer, "exit") || 
       !strcmp(buffer, "quit") || 
       !strcmp(buffer, "q")) {
      exit(0);
    }

    if(strstr(buffer, "insert")) {
      char *token;
      const char delim[2] = " ";
      token = strtok(buffer, delim);
      token = strtok(NULL, delim);
      while( token != NULL )  {
        insert(token);
        token = strtok(NULL, delim);
      }
    }


    if(!strcmp(buffer, "help")) {
      printf("Commands :\n");
      printf("insert <string|URL>:\n");
      printf("quit | exit | q <To exist from the shell>\n");
    }
  }
#ifdef DEBUGME 
  char keys[][8] = {"the", "a", "there", "answer", "any", "by", "bye", "their"};
  gRoot = getNewNode();
  int index;
  for(index = 0; index < ARRAY_SIZE(keys); index++) {
    insert(keys[index]);
  }
  for(index = 0; index < ARRAY_SIZE(keys); index++) {
    insert(keys[index]);
  }
  printf("Insert Done !!! \n");

  for(index = 0; index < ARRAY_SIZE(keys); index++) {
    int found = search(keys[index]);
    printf("%s --> %s\n", keys[index], found == FOUND ? "FOUND" : "NOT FOUND");
  }
  
#endif
  return 0;
}

Magical C Language, Loop Un-Rolling and Duffs Device

After so many years of working with C language. It always Surprises me. Today I came across Duff’s Device. It is really a cleaver technique.
Suppose you want to copy a buffer of 100 bytes, typical code you will write will have following construct

for ( int i = 0; i < 100; i++ ) {
    *dest++ = *source++
}

There are bunch of instruction involves here, Compare Instruction, Increment Instruction …
But important thing is this all is done for 100 times.

Duff’s Device is intelligent method of solving this following as given in following code.

#include <stdio.h>
#include <string.h>

#define SIZE 10000

void duffsDevice(const char *source, char *destination, int length) {
  int numberOfPass = 0;
  int n = (length + 7) / 8;
  switch (length % 8) {
    case 0:
      do {
        *destination++ = *source++; 
        case 7:   *destination++ = *source++; 
        case 6:   *destination++ = *source++; 
        case 5:   *destination++ = *source++; 
        case 4:   *destination++ = *source++; 
        case 3:   *destination++ = *source++; 
        case 2:   *destination++ = *source++; 
        case 1:   *destination++ = *source++; 
      } while (numberOfPass++,--n > 0);
  }

  printf("Number of Loops = %d\n", numberOfPass);

}


int main(int argc, char** argv) {
  char source[SIZE + 1] = { 'a' };
  char destination[SIZE + 1] = { 'c' };

  memset(source, 'x', SIZE);
  memset(source, 'z', SIZE);

  printf("Source = [%s]\n", source);

  memcpy(destination, source, SIZE);

  duffsDevice(source, destination, SIZE);
  destination[SIZE] = '\0';

  printf("Destination = [%s]\n", destination);
}

For copying 10000 bytes it take only 1250 loops.