SORTING AND SEARCHING

The ANSI C standard defines qsort(), a function for sorting a table of data. The function follows the format;

qsort(void *base,size_t elements,size_t width,int (*cmp)(void *, void *));

The following short program illustrates the use of qsort() with a character array.

#include <string.h>

main()

{

int n;

char data[10][20];

/* Initialise some arbirary data */

strcpy(data[0],"RED");

strcpy(data[1],"BLUE");

strcpy(data[2],"GREEN");

strcpy(data[3],"YELLOW");

strcpy(data[4],"INDIGO");

strcpy(data[5],"BROWN");

strcpy(data[6],"BLACK");

strcpy(data[7],"ORANGE");

strcpy(data[8],"PINK");

strcpy(data[9],"CYAN");

/* Sort the data table */

qsort(data[0],10,20,strcmp);

/* Print the data table */

for(n = 0; n < 10; n++)

puts(data[n]);

}

Here is a program that implements the shell sort algorithm (this one is based on the routine in K & R), which sorts arrays of pointers based upon the data pointed to by the pointers;

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#define LINELEN 80

#define MAXLINES 2000

char *lines[MAXLINES];

int lastone;

void SHELL(void);

void SHELL()

{

/* SHELL Sort Courtesy of K & R */

int gap;

int i;

int j;

char temp[LINELEN];

for(gap = lastone / 2; gap > 0; gap /= 2)

for(i = gap; i < lastone; i++)

for(j = i - gap; j >= 0 && strcmp(lines[j] , lines[j + gap]) >

0; j -= gap)

{

strcpy(temp,lines[j]);

strcpy(lines[j] , lines[j + gap]);

strcpy(lines[j + gap] , temp);

}

}

void main(int argc, char *argv[])

{

FILE *fp;

char buff[100];

int n;

/* Check command line parameter has been given */

if (argc != 2)

{

printf ("\nError: Usage is SERVSORT file");

exit(0);

}

/* Attempt to open file for updating */

fp = fopen(argv[1],"r+");

if (fp == NULL)

{

printf ("\nError: Unable to open %s",argv[1]);

exit(0);

}

/* Initialise element counter to zero */

lastone = 0;

/* Read file to be sorted */

while((fgets(buff,100,fp)) != NULL)

{

/* Allocate memory block*/

lines[lastone] = malloc (LINELEN);

if (lines[lastone] == NULL)

{

printf ("\nError: Unable to allocate memory");

fclose (fp);

exit(0);

}

strcpy(lines[lastone],buff);

lastone++;

if (lastone > MAXLINES)

{

printf ("\nError: Too many lines in source file");

exit(0);

}

}

/* Call sort function */

SHELL();

/* Close file */

fclose (fp);

/* Reopen file in create mode */

fp = fopen(argv[1],"w+");

/* Copy sorted data from memory to disk */

for(n = 0; n < lastone; n++)

fputs(lines[n],fp);

/* Close file finally */

fclose (fp);

/* Return to calling program */

return(1);

}

If we want to use qsort() with a table of pointers we have to be a bit more clever than usual.

This example uses the colours again, but this time they are stored in main memory and indexed by a table of pointers. Because we have a table of pointers to sort there are two differences between this program's qsort() and the previous one;

First we can't use strcmp() as the qsort() comparison function, secondly the width of the table being sorted is sizeof(char *), that is the size of a character pointer.

Notice the comparison function cmp() that receives two parameters, both are pointers to a pointer. qsort() sends to this function the values held in data[], which are in turn pointers to the data. So we need to use this indirection to locate the data, otherwise we would be comparing the addresses at which the data is held rather than the data itself!

#include <alloc .h>

#include <string.h>

/* Function prototype for comparison function */

int (cmp)(char **,char **);

int cmp(char **s1, char **s2)

{

/* comparison function using pointers to pointers */

return(strcmp(*s1,*s2));

}

main()

{

int n;

char *data[10];

for(n = 0; n < 10; n++)

data[n] = malloc (20);

strcpy(data[0],"RED");

strcpy(data[1],"BLUE");

strcpy(data[2],"GREEN");

strcpy(data[3],"YELLOW");

strcpy(data[4],"INDIGO");

strcpy(data[5],"BROWN");

strcpy(data[6],"BLACK");

strcpy(data[7],"ORANGE");

strcpy(data[8],"PINK");

strcpy(data[9],"CYAN");

/* The data table is comprised of pointers */

/* so the call to qsort() must reflect this */

qsort(data,10,sizeof(char *),cmp);

for(n = 0; n < 10; n++)

puts(data[n]);

}

The quick sort is a fast sorting algorithm that works by subdividing the data table into two sub-tables and then subdividing the sub-tables. As it subdivides the table, so it compares the elements in the table and swaps them as required.

The following program implements the quick sort algorithm, which is usually already used by qsort();

#include <string.h>

#define MAXELE 2000

char data[10][20];

int lastone;

void QKSORT()

{

/* Implementation of QUICKSORT algorithm */

int i;

int j;

int l;

int p;

int r;

int s;

char temp[100];

static int sl[MAXELE][2];

/* sl[] is an index to the sub-table */

l = 0;

r = lastone;

p = 0;

do

{

while(l < r)

{

i = l;

j = r;

s = -1;

while(i < j)

{

if (strcmp(data[i],data[j]) > 0)

{

strcpy(temp,data[i]);

strcpy(data[i],data[j]);

strcpy(data[j],temp);

s = 0 - s;

}

if (s == 1)

i++;

else

j--;

}

if (i + 1 < r)

{

p++;

sl[p][0] = i + 1;

sl[p][1] = r;

}

r = i - 1;

}

if (p != 0)

{

l = sl[p][0];

r = sl[p][1];

p--;

}

}

while(p > 0);

}

main()

{

int n;

/* Initialise arbitrary data */

strcpy(data[0],"RED");

strcpy(data[1],"BLUE");

strcpy(data[2],"GREEN");

strcpy(data[3],"YELLOW");

strcpy(data[4],"INDIGO");

strcpy(data[5],"BROWN");

strcpy(data[6],"BLACK");

strcpy(data[7],"ORANGE");

strcpy(data[8],"PINK");

strcpy(data[9],"CYAN");

/* Set last element indicator */

lastone = 9;

/* Call quick sort function */

QKSORT();

/* Display sorted list */

for(n = 0; n < 10; n++)

puts(data[n]);

}

A table sorted into ascending order can be searched with bsearch(), this takes the format;

bsearch(key,base,num_elements,width,int (*cmp)(void *, void *));

bsearch() returns a pointer to the first element in the table that matches the key, or zero if no match is found.

Or you can write your own binary search function thus;

int BSRCH(char *key, void *data, int numele, int width)

{

/* A binary search function returning one if found */

/* Zero if not found */

int bp;

int tp;

int mp;

int result;

char *p;

bp = 0;

tp = numele;

mp = (tp + bp) / 2;

/* Locate element mp in table by assigning pointer to start */

/* and incrementing it by width * mp */

p = data;

p += width * mp;

while((result = strcmp(p,key)) != 0)

{

if (mp >= tp)

/* Not found! */

return(0);

if (result < 0)

bp = mp + 1;

else

tp = mp - 1;

mp = (bp + tp) / 2;

p = data;

p += width * mp;

}

return(1);

}

void main()

{

int result;

char data[10][20];

/* Initialise some arbirary data */

strcpy(data[0],"RED");

strcpy(data[1],"BLUE");

strcpy(data[2],"GREEN");

strcpy(data[3],"YELLOW");

strcpy(data[4],"INDIGO");

strcpy(data[5],"BROWN");

strcpy(data[6],"BLACK");

strcpy(data[7],"ORANGE");

strcpy(data[8],"PINK");

strcpy(data[9],"CYAN");

/* Sort the data table */

qsort(data[0],10,20,strcmp);

result = BSRCH("CYAN",data[0],10,20);

printf ("\n%s\n",(result == 0) ? "Not found" : "Located okay");

}

There are other sorting algorithms as well. This program incorporates the QUICK SORT, BUBBLE SORT, FAST BUBBLE SORT, INSERTION SORT and SHELL SORT for comparing how each performs on a random 1000 item string list;

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

char data[1000][4];

char save[1000][4];

int lastone;

void INITDATA(void);

void QKSORT(void);

void SHELL(void);

void BUBBLE(void);

void FBUBBLE(void);

void INSERTION(void);

void MKDATA(void);

void QKSORT()

{

/* Implementation of QUICKSORT algorithm */

int i;

int j;

int l;

int p;

int r;

int s;

char temp[20];

static int sl[1000][2];

l = 0;

r = lastone;

p = 0;

do

{

while(l < r)

{

i = l;

j = r;

s = -1;

while(i < j)

{

if (strcmp(data[i],data[j]) > 0)

{

strcpy(temp,data[i]);

strcpy(data[i],data[j]);

strcpy(data[j],temp);

s = 0 - s;

}

if (s == 1)

i++;

else

j--;

}

if (i + 1 < r)

{

p++;

sl[p][0] = i + 1;

sl[p][1] = r;

}

r = i - 1;

}

if (p != 0)

{

l = sl[p][0];

r = sl[p][1];

p--;

}

}

while(p > 0);

}

void SHELL()

{

/* SHELL Sort Courtesy of K & R */

int gap;

int i;

int j;

char temp[20];

for(gap = lastone / 2; gap > 0; gap /= 2)

for(i = gap; i < lastone; i++)

for(j = i - gap; j >= 0 && strcmp(data[j] , data[j + gap]) > 0;

j -= gap)

{

strcpy(temp,data[j]);

strcpy(data[j] , data[j + gap]);

strcpy(data[j + gap] , temp);

}

}

void BUBBLE()

{

int a;

int b;

char temp[20];

for(a = lastone; a >= 0; a--)

{

for(b = 0; b < a; b++)

{

if(strcmp(data[b],data[b + 1]) > 0)

{

strcpy(temp,data[b]);

strcpy(data[b] , data[b + 1]);

strcpy(data[b + 1] , temp);

}

}

}

}

void FBUBBLE()

{

/* bubble sort with swap flag*/

int a;

int b;

int s;

char temp[20];

s = 1;

for(a = lastone; a >= 0 && s == 1; a--)

{

s = 0;

for(b = 0; b < a; b++)

{

if(strcmp(data[b],data[b + 1]) > 0)

{

strcpy(temp,data[b]);

strcpy(data[b] , data[b + 1]);

strcpy(data[b + 1] , temp);

s = 1;

}

}

}

}

void INSERTION()

{

int a;

int b;

char temp[20];

for(a = 0; a < lastone; a++)

{

b = a;

strcpy(temp,data[a + 1]);

while(b >= 0)

{

if (strcmp(temp,data[b]) < 0)

{

strcpy(data[b+1],data[b]);

b--;

}

else

break ;

}

strcpy(data[b+1],temp);

}

}

void MKDATA()

{

/* Initialise arbitrary data */

/* Uses random(), which is not ANSI C */

/* Returns a random number between 0 and n - 1 */

int n;

for(n = 0; n < 1000; n++)

sprintf(save[n],"%d",random(1000));

}

void INITDATA()

{

int n;

for(n = 0 ; n < 1000; n++)

strcpy(data[n],save[n]);

}

void main()

{

MKDATA();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 999;

/* Call quick sort function */

QKSORT();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 1000;

/* Call shell sort function */

SHELL();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 999;

/* Call bubble sort function */

BUBBLE();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 999;

/* Call bubble sort with swap flag function */

FBUBBLE();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 999;

/* Call insertion sort function */

INSERTION();

}

Here are the profiler results of the above test program run on 1000 and 5000 random items;

STRING SORT - 1000 RANDOM ITEMS

FBUBBLE 26.436 sec 41% |********************************************

BUBBLE 26.315 sec 41% |*******************************************

INSERTION 10.210 sec 15% |***************

SHELL 0.8050 sec 1% |*

QKSORT 0.3252 sec <1% |

STRING SORT - 5000 RANDOM ITEMS

FBUBBLE 563.70 sec 41% |********************************************

BUBBLE 558.01 sec 41% |********************************************

INSERTION 220.61 sec 16% |***************

SHELL 5.2531 sec <1% |

QKSORT 0.8379 sec <1% |

Here is the same test program amended for sorting tables of integers;

/* Integer sort test program */

#include <stdio.h>

#include <stdlib.h>

void INITDATA(void);

void QKSORT(void);

void SHELL(void);

void BUBBLE(void);

void FBUBBLE(void);

void INSERTION(void);

void MKDATA(void);

int data[1000];

int save[1000];

int lastone;

void QKSORT()

{

/* Implementation of QUICKSORT algorithm */

int i;

int j;

int l;

int p;

int r;

int s;

int temp;

static int sl[1000][2];

l = 0;

r = lastone;

p = 0;

do

{

while(l < r)

{

i = l;

j = r;

s = -1;

while(i < j)

{

if (data[i] > data[j])

{

temp = data[i];

data[i] = data[j];

data[j] = temp;

s = 0 - s;

}

if (s == 1)

i++;

else

j--;

}

if (i + 1 < r)

{

p++;

sl[p][0] = i + 1;

sl[p][1] = r;

}

r = i - 1;

}

if (p != 0)

{

l = sl[p][0];

r = sl[p][1];

p--;

}

}

while(p > 0);

}

void SHELL()

{

/* SHELL Sort Courtesy of K & R */

int gap;

int i;

int j;

int temp;

for(gap = lastone / 2; gap > 0; gap /= 2)

for(i = gap; i < lastone; i++)

for(j = i - gap; j >= 0 && data[j] > data[j + gap];

j -= gap)

{

temp = data[j];

data[j] = data[j + gap];

data[j + gap] = temp;

}

}

void BUBBLE()

{

int a;

int b;

int temp;

for(a = lastone; a >= 0; a--)

{

for(b = 0; b < a; b++)

{

if(data[b] > data[b + 1])

{

temp = data[b];

data[b] = data[b + 1];

data[b + 1] = temp;

}

}

}

}

void FBUBBLE()

{

/* bubble sort with swap flag */

int a;

int b;

int s;

int temp;

s = 1;

for(a = lastone; a >= 0 && s == 1; a--)

{

s = 0;

for(b = 0; b < lastone - a; b++)

{

if(data[b] > data[b + 1])

{

temp = data[b];

data[b] = data[b + 1];

data[b + 1] = temp;

s = 1;

}

}

}

}

void INSERTION()

{

int a;

int b;

int temp;

for(a = 0; a < lastone; a++)

{

b = a;

temp = data[a + 1];

while(b >= 0)

{

if (temp < data[b])

{

data[b+1] = data[b];

b--;

}

else

break ;

}

data[b+1] = temp;

}

}

void MKDATA()

{

int n;

for(n = 0; n < 1000; n++)

save[n] = random(1000);

}

void INITDATA()

{

int n;

for(n = 0; n < 1000; n++)

data[n] = save[n];

}

void main()

{

int n;

/* Create 1000 random elements */

MKDATA();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 999;

/* Call quick sort function */

QKSORT();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 1000;

/* Call shell sort function */

SHELL();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 999;

/* Call bubble sort function */

BUBBLE();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 999;

/* Call bubble sort with swap flag function */

FBUBBLE();

/* Initialise arbitrary data */

INITDATA();

/* Set last element indicator */

lastone = 999;

/* Call insertion sort function */

INSERTION();

}

And here are the profiler results for this program;

INTEGER SORTS - 1000 RANDOM NUMBERS (0 - 999)

FBUBBLE 3.7197 sec 41% |********************************************

BUBBLE 3.5981 sec 39% |******************************************

INSERTION 1.4258 sec 15% |***************

SHELL 0.1207 sec 1% |*

QKSORT 0.0081 sec <1% |

INTEGER SORTS - 5000 RANDOM NUMBERS (0 - 999)

FBUBBLE 92.749 sec 42% |********************************************

BUBBLE 89.731 sec 41% |********************************************

INSERTION 35.201 sec 16% |***************

SHELL 0.9838 sec <1% |

QKSORT 0.0420 sec <1% |

INTEGER SORTS - 5000 RANDOM NUMBERS (0 - 99)

FBUBBLE 92.594 sec 42% |*****************************************

BUBBLE 89.595 sec 40% |****************************************

INSERTION 35.026 sec 16% |***************

SHELL 0.7563 sec <1% |

QKSORT 0.6018 sec <1% |

INTEGER SORTS - 5000 RANDOM NUMBERS (0 - 9)

FBUBBLE 89.003 sec 41% |*******************************************

BUBBLE 86.921 sec 40% |*******************************************

INSERTION 31.544 sec 14% |***************

QKSORT 6.0358 sec 2% |**

SHELL 0.5424 sec <1% |

INTEGER SORTS - 5000 DESCENDING ORDERED NUMBERS

FBUBBLE 122.99 sec 39% |******************************************

BUBBLE 117.22 sec 37% |****************************************

INSERTION 70.595 sec 22% |**********************

SHELL 0.6438 sec <1% |

QKSORT 0.0741 sec <1% |

INTEGER SORTS - 5000 ORDERED NUMBERS

BUBBLE 62.908 sec 99% |******************************************

SHELL 0.3971 sec <1% |

INSERTION 0.0510 sec <1% |

QKSORT 0.0382 sec <1% |

FBUBBLE 0.0251 sec <1% |

INTEGER SORTS - 10000 RANDOM NUMBERS (0 - 999)

FBUBBLE 371.18 sec 42% |****************************************

BUBBLE 359.06 sec 41% |***************************************

INSERTION 140.88 sec 16% |**************

SHELL 2.0423 sec <1% |

QKSORT 0.6183 sec <1% |

Theory has it that the performance of a sorting algorithm is dependant upon;

a) the number of items to be sorted and

b) how unsorted the list is to start with.

With this in mind it is worth testing the various sorting routines described here to determine which one will best suit your particular application. If you examine the above profiler results you will see that:

1) With an already sorted list FBUBBLE() executes fastest

2) With a random list of small variations between the values SHELL()

executes fastest

3) With a random list of large variations between the values QKSORT()

executes the fastest

What the profiler does not highlight is that when the comparison aspect of a sort function takes a disproportionately long time to execute in relation to the rest of the sort function, then the bubble sort with a swap flag will execute faster than the bubble sort with out a swap flag.

When considering a sort routine take into consideration memory constraints and the type of data to be sorted as well as the relative performances of the sort functions. Generally, the faster a sort operates, the more memory it requires. Compare the simple bubble sort with the quick sort, and you will see that the quick sort requires far more memory than the bubble sort.

 

  Dynamic Memory Allocation, and Variable Arguments.

 

Links:  Home C Programming Guide C++ programming Guide

Contact Me