Random commentary about Machine Learning, BigData, Spark, Deep Learning, C++, STL, Boost, Perl, Python, Algorithms, Problem Solving and Web Search
Tuesday, May 31, 2011
Three arrays and a triple search
We are given three sorted arrays A[1.. n], B[1.. n], and C[1.. n], and an integer k. Describe an algorithm to find the kth smallest element in A U B U C in O(log n) time.
My solution is a bit complicated, but I believe it works.
Convention: whenever I write A[x], B[x], or C[x], for x>n, I mean a very large element "+infinity".
Set a=b=c=0. We shall keep the invariant that the answer is kth smallest element among A[a+1..n] U B[b+1..n] U C[c+1..n].
We repeat the following reduction procedure.
1. If k=1 return the smallest out of A[a+1], B[b+1], and C[c+1] and stop execution. 2. If k is not divisible by 3, choose (any) smallest out of A[a+1], B[b+1], and C[c+1], increment the respective index by 1, decrement k by 1. 3. Otherwise, denote j=k/3. Choose (any) smallest out of A[a+j], B[b+j], C[c+j], increment the respective index by j, decrement k by j, and start over from 1.
It works, because there exists an ordering of the remaining elements such that all the entries we "skip over" in step 3 are at most (n-2)nd smallest.
The approach mentioned above reduces k to at most 2k/3 in O(1) time. By known theory, it means that the execution time is O(log k)<O(log n).
Maybe: take minimum of A[k/3], B[k/3], C[k/3] and search for it in two other arrays (2*log n) and reduce problem to sub-arrays and reduce k - depends on few cases.
// copy T * sorted = new T[ary_num*kth]; T * cur = sorted; for (size_t i = 0; i < ary_num; i++) for (size_t j = 0; j < size[i] && j < kth; j++) *cur++ = ary[i][start[i]+j];
// selection sort up to kth for (size_t i = 0; i < kth ; i++) { // find min index int iMin = i; for (size_t j = i+1; j < (size_t)(cur - sorted); j++) { if (sorted[j] < sorted[iMin]) iMin = j; } // swap if (iMin != i) { T temp = sorted[i]; sorted[i] = sorted[iMin]; sorted[iMin] = temp; } }
// return kth smallest element T found = sorted[kth-1]; delete[] sorted; return found; }
My solution is a bit complicated, but I believe it works.
ReplyDeleteConvention: whenever I write A[x], B[x], or C[x], for x>n, I mean a very large element "+infinity".
Set a=b=c=0. We shall keep the invariant that the answer is kth smallest element among A[a+1..n] U B[b+1..n] U C[c+1..n].
We repeat the following reduction procedure.
1. If k=1 return the smallest out of A[a+1], B[b+1], and C[c+1] and stop execution.
2. If k is not divisible by 3, choose (any) smallest out of A[a+1], B[b+1], and C[c+1], increment the respective index by 1, decrement k by 1.
3. Otherwise, denote j=k/3. Choose (any) smallest out of A[a+j], B[b+j], C[c+j], increment the respective index by j, decrement k by j, and start over from 1.
It works, because there exists an ordering of the remaining elements such that all the entries we "skip over" in step 3 are at most (n-2)nd smallest.
The approach mentioned above reduces k to at most 2k/3 in O(1) time. By known theory, it means that the execution time is O(log k)<O(log n).
Maybe: take minimum of A[k/3], B[k/3], C[k/3] and search for it in two other arrays (2*log n) and reduce problem to sub-arrays and reduce k - depends on few cases.
ReplyDeleteint _tmain(int argc, _TCHAR* argv[])
ReplyDelete{
char ary1[] = "abcdefghij";
char ary2[] = "opqrstuvwx";
char ary3[] = "0123456789";
size_t ary_len = strlen(ary1);
size_t kth = 17; // not zero-indexing !
char found2 = TripleSearch(ary1, ary2, ary3, ary_len, kth);
return;
}
template
T TripleSearch(T ary1[], T ary2[], T ary3[], size_t ary_len, size_t kth)
{
T* ary[3] = {ary1, ary2, ary3};
size_t start[3] = {0,0,0};
size_t size[3] = {ary_len, ary_len, ary_len};
return MultipleSearch(ary, start, size, 3, kth);
}
template
T MultipleSearch( T* ary[], size_t start[], size_t size[], size_t ary_num,
size_t kth)
{
// Assert
size_t size_sum = 0;
for (size_t i = 0; i < ary_num; i++)
size_sum += size[i];
assert(1 <= kth && kth <= size_sum);
// escape recursive call
if (kth <= ary_num)
return StupidSelectNearMinimal(ary, start, size, ary_num, kth);
// Divide
const size_t remove_default = (kth-1)/ary_num;
int remove_ary = -1;
T remove_min;
for (size_t i = 0; i < ary_num; i++)
{
if (0 < size[i])
{
size_t remove_index = remove_default < size[i] ? remove_default : size[i];
T remove_i = ary[i][start[i] + remove_index -1];
if (remove_ary == -1 || remove_i < remove_min)
{
remove_min = remove_i;
remove_ary = i;
}
}
}
size_t remove_size = remove_default < size[remove_ary] ? remove_default: size[remove_ary];
start[remove_ary] += remove_size;
size[remove_ary] -= remove_size;
kth -= remove_size;
// Conquer
return MultipleSearch(ary, start, size, ary_num, kth);
// No Combine
}
template
T StupidSelectNearMinimal( T* ary[], size_t start[], size_t size[], size_t ary_num,
size_t kth)
{
assert(1 <= kth && kth <= ary_num);
// copy
T * sorted = new T[ary_num*kth];
T * cur = sorted;
for (size_t i = 0; i < ary_num; i++)
for (size_t j = 0; j < size[i] && j < kth; j++)
*cur++ = ary[i][start[i]+j];
// selection sort up to kth
for (size_t i = 0; i < kth ; i++)
{
// find min index
int iMin = i;
for (size_t j = i+1; j < (size_t)(cur - sorted); j++)
{
if (sorted[j] < sorted[iMin])
iMin = j;
}
// swap
if (iMin != i)
{
T temp = sorted[i];
sorted[i] = sorted[iMin];
sorted[iMin] = temp;
}
}
// return kth smallest element
T found = sorted[kth-1];
delete[] sorted;
return found;
}