Random commentary about Machine Learning, BigData, Spark, Deep Learning, C++, STL, Boost, Perl, Python, Algorithms, Problem Solving and Web Search

Tuesday, May 31, 2011

Three arrays and a triple search

We are given three sorted arrays A[1.. n], B[1.. n], and C[1.. n], and an integer k. Describe an algorithm to find the kth smallest element in A U B U C in O(log n) time.

My solution is a bit complicated, but I believe it works.

Convention: whenever I write A[x], B[x], or C[x], for x>n, I mean a very large element "+infinity".

Set a=b=c=0. We shall keep the invariant that the answer is kth smallest element among A[a+1..n] U B[b+1..n] U C[c+1..n].

We repeat the following reduction procedure.

1. If k=1 return the smallest out of A[a+1], B[b+1], and C[c+1] and stop execution. 2. If k is not divisible by 3, choose (any) smallest out of A[a+1], B[b+1], and C[c+1], increment the respective index by 1, decrement k by 1. 3. Otherwise, denote j=k/3. Choose (any) smallest out of A[a+j], B[b+j], C[c+j], increment the respective index by j, decrement k by j, and start over from 1.

It works, because there exists an ordering of the remaining elements such that all the entries we "skip over" in step 3 are at most (n-2)nd smallest.

The approach mentioned above reduces k to at most 2k/3 in O(1) time. By known theory, it means that the execution time is O(log k)<O(log n).

Maybe: take minimum of A[k/3], B[k/3], C[k/3] and search for it in two other arrays (2*log n) and reduce problem to sub-arrays and reduce k - depends on few cases.

// copy T * sorted = new T[ary_num*kth]; T * cur = sorted; for (size_t i = 0; i < ary_num; i++) for (size_t j = 0; j < size[i] && j < kth; j++) *cur++ = ary[i][start[i]+j];

// selection sort up to kth for (size_t i = 0; i < kth ; i++) { // find min index int iMin = i; for (size_t j = i+1; j < (size_t)(cur - sorted); j++) { if (sorted[j] < sorted[iMin]) iMin = j; } // swap if (iMin != i) { T temp = sorted[i]; sorted[i] = sorted[iMin]; sorted[iMin] = temp; } }

// return kth smallest element T found = sorted[kth-1]; delete[] sorted; return found; }

My solution is a bit complicated, but I believe it works.

ReplyDeleteConvention: whenever I write A[x], B[x], or C[x], for x>n, I mean a very large element "+infinity".

Set a=b=c=0. We shall keep the invariant that the answer is kth smallest element among A[a+1..n] U B[b+1..n] U C[c+1..n].

We repeat the following reduction procedure.

1. If k=1 return the smallest out of A[a+1], B[b+1], and C[c+1] and stop execution.

2. If k is not divisible by 3, choose (any) smallest out of A[a+1], B[b+1], and C[c+1], increment the respective index by 1, decrement k by 1.

3. Otherwise, denote j=k/3. Choose (any) smallest out of A[a+j], B[b+j], C[c+j], increment the respective index by j, decrement k by j, and start over from 1.

It works, because there exists an ordering of the remaining elements such that all the entries we "skip over" in step 3 are at most (n-2)nd smallest.

The approach mentioned above reduces k to at most 2k/3 in O(1) time. By known theory, it means that the execution time is O(log k)<O(log n).

Maybe: take minimum of A[k/3], B[k/3], C[k/3] and search for it in two other arrays (2*log n) and reduce problem to sub-arrays and reduce k - depends on few cases.

ReplyDeleteint _tmain(int argc, _TCHAR* argv[])

ReplyDelete{

char ary1[] = "abcdefghij";

char ary2[] = "opqrstuvwx";

char ary3[] = "0123456789";

size_t ary_len = strlen(ary1);

size_t kth = 17; // not zero-indexing !

char found2 = TripleSearch(ary1, ary2, ary3, ary_len, kth);

return;

}

template

T TripleSearch(T ary1[], T ary2[], T ary3[], size_t ary_len, size_t kth)

{

T* ary[3] = {ary1, ary2, ary3};

size_t start[3] = {0,0,0};

size_t size[3] = {ary_len, ary_len, ary_len};

return MultipleSearch(ary, start, size, 3, kth);

}

template

T MultipleSearch( T* ary[], size_t start[], size_t size[], size_t ary_num,

size_t kth)

{

// Assert

size_t size_sum = 0;

for (size_t i = 0; i < ary_num; i++)

size_sum += size[i];

assert(1 <= kth && kth <= size_sum);

// escape recursive call

if (kth <= ary_num)

return StupidSelectNearMinimal(ary, start, size, ary_num, kth);

// Divide

const size_t remove_default = (kth-1)/ary_num;

int remove_ary = -1;

T remove_min;

for (size_t i = 0; i < ary_num; i++)

{

if (0 < size[i])

{

size_t remove_index = remove_default < size[i] ? remove_default : size[i];

T remove_i = ary[i][start[i] + remove_index -1];

if (remove_ary == -1 || remove_i < remove_min)

{

remove_min = remove_i;

remove_ary = i;

}

}

}

size_t remove_size = remove_default < size[remove_ary] ? remove_default: size[remove_ary];

start[remove_ary] += remove_size;

size[remove_ary] -= remove_size;

kth -= remove_size;

// Conquer

return MultipleSearch(ary, start, size, ary_num, kth);

// No Combine

}

template

T StupidSelectNearMinimal( T* ary[], size_t start[], size_t size[], size_t ary_num,

size_t kth)

{

assert(1 <= kth && kth <= ary_num);

// copy

T * sorted = new T[ary_num*kth];

T * cur = sorted;

for (size_t i = 0; i < ary_num; i++)

for (size_t j = 0; j < size[i] && j < kth; j++)

*cur++ = ary[i][start[i]+j];

// selection sort up to kth

for (size_t i = 0; i < kth ; i++)

{

// find min index

int iMin = i;

for (size_t j = i+1; j < (size_t)(cur - sorted); j++)

{

if (sorted[j] < sorted[iMin])

iMin = j;

}

// swap

if (iMin != i)

{

T temp = sorted[i];

sorted[i] = sorted[iMin];

sorted[iMin] = temp;

}

}

// return kth smallest element

T found = sorted[kth-1];

delete[] sorted;

return found;

}