ProxMap Sort

Sort Type: Address Calculation Sorting


Algorithm

Description:

ProxMap Sorting uses a different approach to sorting, which conceptually is similar to hashing. This technique uses a variation on hashing with buckets, but with buckets of different sizes.


Data holder: Filled array of unsorted data structures and an empty array of the same size into which the structures will be copied in sorted order.

Technique:
  1. Create 4 arrays and initialize
    1. int HitList[ARRAYSIZE] -- Keeps a count of the number of hits at each index in the sorted array. HitList[x] holds a count of the number of items whose keys hashed to x. Initialize to all 0.
    2. int Location[ARRAYSIZE] -- Indices in the sorted array calculated using the hash function. Item x in the unsorted array has its hash index stored in Location[x]. Does not need to be initialized.
    3. int ProxMap[ARRAYSIZE] -- Starting index in the sorted array for each bucket. If HitList[x] is not 0 then ProxMap[x] contains the starting index for the bucket of keys hashing to x. Initialize to all keys to -1 (unused).
    4. StructType DataArray2[ARRAYSIZE] -- Array to hold the sorted array. Initialize to all -1 (unused).
  2. Use the keys of the unsorted array and a carefully chosen hash function to generate the indices into the sorted array and save these. The hash function must compute indices always in ascending order. Store each hash index in the Location[] array. Location[i] will hold the calculated hash index for the ith structure in the unsorted array.
    HIdx = Hash(DataArray[i]);
    Location[i] = HIdx;

    Care must be taken in selecting the hash function so that the keys are mapped to the entire range of indexes in the array. A good approach is to convert the keys to integer values if they are strings, then map all keys to floats in the range 0<= Key < 1. Finally, map these floats to the array indices using the following formulas:
          /* Map all integer keys to floats in range 0<= Key < 1 */
         KeyFloat = KeyInt / (1 + MAXKEYINTVALUE);
          /* Map all float keys to indices in range 0<= Index < ARRAYSIZE */
         Index = floor(ARRAYSIZE * KeyFloat); 
    

    This will then produce indices insuring that all the keys are kept in ascending order (hashs computed using a mod operator will not.)
  3. Keep a count of the number of hits at each hash index. HitList[Hidx]++
  4. Create the ProxMap (short for proximity map) from the hit list giving the starting index in the sorted array for each bucket.
         RunningTotal = 0; /* Init counter */
         for(i=0; i<ARRAYSIZE; i++)
         {
             if(HitList[i] > 0)    /* There were hits at this address */
             {
                 ProxMap[i] = RunningTotal;    /* Set start index for this set */
                 RunningTotal += HitList[i];
             }
         } 
    
  5. Move keys from the unsorted array to the sorted array using an insertion sort technique for each bucket.


    In this diagram 5 sets of structures are sorted when delta = 5


Analysis: ProxMap sorting runs in a surprisingly fast O(n) time.

Sample Code:

 /***************************************/
 /* ProxmapSort()                       */
 /*                                     */
 /* Sort records on integer key using   */
 /*  a proxmap sort.                    */
 /***************************************/
 void ProxmapSort(StructType DataArray[], StructType DataArray2[],int count)
 {
     int i;
     int HitList[ARRAYSIZE];
     int Hidx;                  /* Hashed index */
     int ProxMap[ARRAYSIZE];
     int RunningTotal;          /* Number of hits */
     int Location[ARRAYSIZE];
     int KeyMax, KeyMin;        /* Used in Hash() */

     /* Initialize hit list and proxmap */
     for(i=0; i<count; i++)
     {
         HitList[i] = 0;           /* Init to all 0 hits */
         ProxMap[i] = -1;          /* Init to all unused */
         DataArray2[i].key = -1;   /* Init to all empty */
     }
      /* Find the largest key for use in computing the hash */
     KeyMax = 0;        /* Guaranteed to be less than the smallest key */
     KeyMin = 32767;    /* Guaranteed to be more than the largest key */
     for(i=0; i<count; i++)
     {
         if(DataArray[i].key > KeyMax) KeyMax = DataArray[i].key;
         if(DataArray[i].key < KeyMin) KeyMin = DataArray[i].key;
     }
      /* Compute the hit count list (note this is not a collision count, but
         a collision count+1 */
     for(i=0; i<count; i++)
     {
         Hidx = Hash(DataArray[i].key, KeyMax, KeyMin, count); /* Calculate hash index */
         Location[i] = Hidx; /* Save this for later. (Step 1) */
         HitList[Hidx]++;    /* Update the hit count (Step 2) */
     }
      /* Create the proxmap from the hit list. (Step 3) */
     RunningTotal = 0;        /* Init counter */
     for(i=0; i<count; i++)
     {
         if(HitList[i] > 0)    /* There were hits at this address */
         {
             ProxMap[i] = RunningTotal;    /* Set start index for this set */
             RunningTotal += HitList[i];
         }
     }
     // NOTE: UNCOMMENT THE FOLLOWING SECTION TO SEE WHAT IS IN THE ARRAYS, BUT
     //       COMMENT IT OUT WHEN DOING A TEST RUN AS PRINTING IS VERY SLOW AND
     //       WILL RESULT IN AN INACCURATE TIME FOR PROXMAP SORT.
     /* ---------------------------------------------------- 
     // Print HitList[] to see what it looks like
     printf("HitList:\n");
     for(i=0; i<count; i++)
         printf("%d ", HitList[i]);
     printf("\n\n");
     getch();
      // Print ProxMap[] to see what it looks like
     printf("ProxMap:\n");
     for(i=0; i<count; i++)
         printf("%d ", ProxMap[i]);
     printf("\n\n");
     getch();
      // Print Location[] to see what it looks like
     printf("Location:\n");
     for(i=0; i<count; i++)
         printf("%d ", Location[i]);
     printf("\n\n");
     getch();
     ---------------------------------------------  */
     /* Move the keys from A1 to A2 */
     /* Assumes A2 has been initialized to all empty slots (key = -1)*/
     for(i=0; i<count; i++)
     {
         if((DataArray2[ProxMap[Location[i]]].key == -1))  /* If the location in A2 is empty...*/
         {
             /* Move the structure into the sorted array */
             DataArray2[ProxMap[Location[i]]] = DataArray[i];
         }
         else    /* Insert the structure using an insertion sort */
         {
             ProxMapInsertionSort(DataArray2, &DataArray[i], ProxMap[Location[i]], HitList[Location[i]]);
         }
     }
  }

 /***************************************/
 /* Hash()                              */
 /*                                     */
 /* Calculate a hash index.             */
 /***************************************/
 int Hash(int key, int KeyMax, int KeyMin, int count)
 {
     float    keyFloat;      /* Map integer key to float in the range 0 <= key < 1 */
     keyFloat = (float)(key - KeyMin) / (float)(1 + KeyMax - KeyMin);
     /* Map float key to indices in range 0 <= index < count */
     return((int)floor(count * keyFloat));
 }

 /***************************************/
 /* ProxMapInsertionSort()              */
 /*                                     */
 /* Use insertion sort to insert a      */
 /*   struct into a subarray.           */
 /***************************************/
 void ProxMapInsertionSort(StructType DataArray[], StructType *theStruct,
                            int startIdx, int listLen)
 {
     /* Args:    DataArray - Partly sorted array
                 *theStruct - Structure to insert
                 startIdx - Index of start of subarray
                 listLen - Number of items in the subarray */
     int i;      /* Find the end of the subarray */
     i = startIdx + listLen - 1;
     while(DataArray[i-1].key == -1) i--;          /* Find the location to insert the key */
      while((DataArray[i-1].key > theStruct->key) && (i > startIdx))
     {
         DataArray[i] = DataArray[i-1];
         i--;
     }
      /* Insert the key */
      DataArray[i] = *theStruct;
 }