Algorithm-RectanglesContainingDot_XS
view release on metacpan or search on metacpan
* 1 12 34 5
* 2 1234 5
* 3 12345
* 4 12345
*
* Adjacent pairs are merged in "grand sweeps" through the input.
* This means, on pass 1, the records in runs 1 and 2 aren't revisited until
* runs 3 and 4 are merged and the runs from run 5 have been copied.
* The only cache that matters is one large enough to hold *all* the input.
* On some platforms, this may be many times slower than smaller caches.
*
* The following pseudo-code uses the same basic merge algorithm,
* but in a divide-and-conquer way.
*
* # merge $runs runs at offset $offset of list $list1 into $list2.
* # all unmerged runs ($runs == 1) originate in list $base.
* sub mgsort2 {
* my ($offset, $runs, $base, $list1, $list2) = @_;
*
* if ($runs == 1) {
* if ($list1 is $base) copy run to $list2
* return offset of end of list (or copy)
* } else {
* $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
* mgsort2($off2, $runs/2, $base, $list2, $list1)
* merge the adjacent runs at $offset of $list1 into $list2
* return the offset of the end of the merged runs
* }
* }
* mgsort2(0, $runs, $base, $aux, $base);
*
* For our 5 runs, the tree of calls looks like
*
* 5
* 3 2
* 2 1 1 1
* 1 1
*
* 1 2 3 4 5
*
* and the corresponding activity looks like
*
* copy runs 1 and 2 from base to aux
* merge runs 1 and 2 from aux to base
* (run 3 is where it belongs, no copy needed)
* merge runs 12 and 3 from base to aux
* (runs 4 and 5 are where they belong, no copy needed)
* merge runs 4 and 5 from base to aux
* merge runs 123 and 45 from aux to base
*
* Note that we merge runs 1 and 2 immediately after copying them,
* while they are still likely to be in fast cache. Similarly,
* run 3 is merged with run 12 while it still may be lingering in cache.
* This implementation should therefore enjoy much of the cache-friendly
* behavior that quicksort does. In addition, it does less copying
* than the original mergesort implementation (only runs 1 and 2 are copied)
* and the "balancing" of merges is better (merged runs comprise more nearly
* equal numbers of original runs).
*
* The actual cache-friendly implementation will use a pseudo-stack
* to avoid recursion, and will unroll processing of runs of length 2,
* but it is otherwise similar to the recursive implementation.
*/
typedef struct {
IV offset; /* offset of 1st of 2 runs at this level */
IV runs; /* how many runs must be combined into 1 */
} off_runs; /* pseudo-stack element */
static void
sortsv(pTHX_ SV **base, size_t nmemb, SVCOMPARE_t cmp)
{
IV i, run, runs, offset;
I32 sense, level;
int iwhich;
register SV **f1, **f2, **t, **b, **p, **tp2, **l1, **l2, **q;
SV **aux, **list1, **list2;
SV **p1;
SV * small[SMALLSORT];
SV **which[3];
off_runs stack[60], *stackp;
SVCOMPARE_t savecmp = 0;
if (nmemb <= 1) return; /* sorted trivially */
if (nmemb <= SMALLSORT) aux = small; /* use stack for aux array */
else { New(799,aux,nmemb,SV *); } /* allocate auxilliary array */
level = 0;
stackp = stack;
stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
stackp->offset = offset = 0;
which[0] = which[2] = base;
which[1] = aux;
for (;;) {
/* On levels where both runs have be constructed (stackp->runs == 0),
* merge them, and note the offset of their end, in case the offset
* is needed at the next level up. Hop up a level, and,
* as long as stackp->runs is 0, keep merging.
*/
if ((runs = stackp->runs) == 0) {
iwhich = level & 1;
list1 = which[iwhich]; /* area where runs are now */
list2 = which[++iwhich]; /* area for merged runs */
do {
offset = stackp->offset;
f1 = p1 = list1 + offset; /* start of first run */
p = tp2 = list2 + offset; /* where merged run will go */
t = NEXT(p); /* where first run ends */
f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
t = NEXT(t); /* where second runs ends */
l2 = POTHER(t, list2, list1); /* ... on the other side */
offset = PNELEM(list2, t);
while (f1 < l1 && f2 < l2) {
/* If head 1 is larger than head 2, find ALL the elements
** in list 2 strictly less than head1, write them all,
** then head 1. Then compare the new heads, and repeat,
** until one or both lists are exhausted.
**
** In all comparisons (after establishing
** which head to merge) the item to merge
** (at pointer q) is the first operand of
for (i = 1, run = 0 ;;) {
if ((p = PINDEX(b, i)) >= t) {
/* off the end */
if (((p = PINDEX(t, -1)) > b) &&
(cmp(aTHX_ *q, *p) <= sense))
t = p;
else b = p;
break;
} else if (cmp(aTHX_ *q, *p) <= sense) {
t = p;
break;
} else b = p;
if (++run >= RTHRESH) i += i;
}
/* q is known to follow b and must be inserted before t.
** Increment b, so the range of possibilities is [b,t).
** Round binary split down, to favor early appearance.
** Adjust b and t until q belongs just before t.
*/
b++;
while (b < t) {
p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
if (cmp(aTHX_ *q, *p) <= sense) {
t = p;
} else b = p + 1;
}
/* Copy all the strictly low elements */
if (q == f1) {
FROMTOUPTO(f2, tp2, t);
*tp2++ = *f1++;
} else {
FROMTOUPTO(f1, tp2, t);
*tp2++ = *f2++;
}
}
/* Run out remaining list */
if (f1 == l1) {
if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
} else FROMTOUPTO(f1, tp2, l1);
p1 = NEXT(p1) = POTHER(tp2, list2, list1);
if (--level == 0) goto done;
--stackp;
t = list1; list1 = list2; list2 = t; /* swap lists */
} while ((runs = stackp->runs) == 0);
}
stackp->runs = 0; /* current run will finish level */
/* While there are more than 2 runs remaining,
* turn them into exactly 2 runs (at the "other" level),
* each made up of approximately half the runs.
* Stack the second half for later processing,
* and set about producing the first half now.
*/
while (runs > 2) {
++level;
++stackp;
stackp->offset = offset;
runs -= stackp->runs = runs / 2;
}
/* We must construct a single run from 1 or 2 runs.
* All the original runs are in which[0] == base.
* The run we construct must end up in which[level&1].
*/
iwhich = level & 1;
if (runs == 1) {
/* Constructing a single run from a single run.
* If it's where it belongs already, there's nothing to do.
* Otherwise, copy it to where it belongs.
* A run of 1 is either a singleton at level 0,
* or the second half of a split 3. In neither event
* is it necessary to set offset. It will be set by the merge
* that immediately follows.
*/
if (iwhich) { /* Belongs in aux, currently in base */
f1 = b = PINDEX(base, offset); /* where list starts */
f2 = PINDEX(aux, offset); /* where list goes */
t = NEXT(f2); /* where list will end */
offset = PNELEM(aux, t); /* offset thereof */
t = PINDEX(base, offset); /* where it currently ends */
FROMTOUPTO(f1, f2, t); /* copy */
NEXT(b) = t; /* set up parallel pointer */
} else if (level == 0) goto done; /* single run at level 0 */
} else {
/* Constructing a single run from two runs.
* The merge code at the top will do that.
* We need only make sure the two runs are in the "other" array,
* so they'll end up in the correct array after the merge.
*/
++level;
++stackp;
stackp->offset = offset;
stackp->runs = 0; /* take care of both runs, trigger merge */
if (!iwhich) { /* Merged runs belong in aux, copy 1st */
f1 = b = PINDEX(base, offset); /* where first run starts */
f2 = PINDEX(aux, offset); /* where it will be copied */
t = NEXT(f2); /* where first run will end */
offset = PNELEM(aux, t); /* offset thereof */
p = PINDEX(base, offset); /* end of first run */
t = NEXT(t); /* where second run will end */
t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
FROMTOUPTO(f1, f2, t); /* copy both runs */
NEXT(b) = p; /* paralled pointer for 1st */
NEXT(p) = t; /* ... and for second */
}
}
}
done:
if (aux != small) Safefree(aux); /* free iff allocated */
return;
}
( run in 0.958 second using v1.01-cache-2.11-cpan-4991d5b9bd9 )