Alien-boost-mini

 view release on metacpan or  search on metacpan

include/boost/move/algo/adaptive_sort.hpp  view on Meta::CPAN

      if(xbuf.capacity() >= n_key_plus_buf){
         buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
      }
      else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
         stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
         stable_merge(first, first+n_keys, first+len, comp, xbuf);
      }
      else{
         stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
      }
   }
   BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After final_merge   : ", len);
}

template<class RandIt, class Compare, class Unsigned, class XBuf>
bool adaptive_sort_build_params
   (RandIt first, Unsigned const len, Compare comp
   , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
   , XBuf & xbuf
   )
{
   typedef Unsigned size_type;

   //Calculate ideal parameters and try to collect needed unique keys
   l_base = 0u;

   //Try to find a value near sqrt(len) that is 2^N*l_base where
   //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
   //as build_blocks merges to the left iteratively duplicating the
   //merged size and all the buffer must be used just before the final
   //merge to right step. This guarantees "build_blocks" produces 
   //segments of size l_build_buf*2, maximizing the classic merge phase.
   l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));

   //The internal buffer can be expanded if there is enough external memory
   while(xbuf.capacity() >= l_intbuf*2){
      l_intbuf *= 2;
   }

   //This is the minimum number of keys to implement the ideal algorithm
   //
   //l_intbuf is used as buffer plus the key count
   size_type n_min_ideal_keys = l_intbuf-1;
   while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
      --n_min_ideal_keys;
   }
   n_min_ideal_keys += 1;
   BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);

   if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
      n_keys = 0u;
      l_build_buf = l_intbuf;
   }
   else{
      //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
      //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
      //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
      //
      //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
      //(to be used for keys in combine_all_blocks) as the whole l_build_buf
      //will be backuped in the buffer during build_blocks.
      bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
      size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
      size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);

      //If available memory is 2*sqrt(l), then for "build_params" 
      //the situation is the same as if 2*l_intbuf were collected.
      if(non_unique_buf && collected == n_min_ideal_keys){
         l_build_buf = l_intbuf;
         n_keys = n_min_ideal_keys;
      }
      else if(collected == 2*l_intbuf){
         //l_intbuf*2 elements found. Use all of them in the build phase 
         l_build_buf = l_intbuf*2;
         n_keys = l_intbuf;
      }
      else if(collected == (n_min_ideal_keys+l_intbuf)){ 
         l_build_buf = l_intbuf;
         n_keys = n_min_ideal_keys;
      }
      //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
      //is possible (due to very low unique keys), then go to a slow sort based on rotations.
      else{
         BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
         if(collected < 4){  //No combination possible with less that 4 keys
            return false;
         }
         n_keys = l_intbuf;
         while(n_keys&(n_keys-1)){
            n_keys &= n_keys-1;  // make it power or 2
         }
         while(n_keys > collected){
            n_keys/=2;
         }
         //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
         l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
         l_intbuf = 0;
         l_build_buf = n_keys;
      }
      BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
   }

   return true;
}

// Main explanation of the sort algorithm.
//
// csqrtlen = ceil(sqrt(len));
//
// * First, 2*csqrtlen unique elements elements are extracted from elements to be
//   sorted and placed in the beginning of the range.
//
// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
//   will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
//   are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
//   2*csqrtlen unique elements are again the leading elements of the whole range.
//
// * Step "combine_blocks": pairs of previously formed blocks are merged with a different
//   ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
//   "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
//   elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.



( run in 0.698 second using v1.01-cache-2.11-cpan-8644d7adfcd )