mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-15 14:16:07 -06:00
207 lines
5.5 KiB
C++
207 lines
5.5 KiB
C++
#include "TextDiffCtrl.h"
|
|
|
|
namespace Upp {
|
|
|
|
template <class I>
|
|
static int CompareGetCount(I a, I b, int max_count)
|
|
{
|
|
if(max_count <= 0 || *a != *b)
|
|
return 0;
|
|
int left;
|
|
for(left = max_count; --left > 0;)
|
|
if(*++a != *++b)
|
|
return max_count - left;
|
|
return max_count;
|
|
}
|
|
|
|
Vector<String> GetLineMap(Stream& stream, bool ignore_indent)
|
|
{
|
|
Vector<String> out;
|
|
int emp = 0;
|
|
if(stream.IsOpen())
|
|
while(!stream.IsEof()) {
|
|
String s = stream.GetLine();
|
|
const char *p = s, *e = s.end(), *f = e;
|
|
if(ignore_indent)
|
|
while((byte)*p <= 32 && p != e)
|
|
p++;
|
|
while(e > p && (byte)e[-1] < ' ')
|
|
e--;
|
|
if(e == p)
|
|
emp++;
|
|
else
|
|
{
|
|
while(emp-- > 0)
|
|
out.Add(Null);
|
|
if(p != s.begin())
|
|
s = String(p, e);
|
|
else
|
|
if(e != f)
|
|
s.Trim(int(e - p));
|
|
out.Add(s);
|
|
emp = 0;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
class TextComparator
|
|
{
|
|
public:
|
|
TextComparator(const Vector<String>& f1, const Vector<String>& f2);
|
|
|
|
Array<TextSection> GetSections() const;
|
|
|
|
private:
|
|
bool Find(int start1, int end1, int start2, int end2, int& best_match, int& best_count) const;
|
|
void Split(Array<TextSection>& dest, int start1, int end1, int start2, int end2) const;
|
|
|
|
private:
|
|
Vector<Index<dword>> hash1;
|
|
Vector<Index<dword>> hash2;
|
|
const Vector<String>& file1;
|
|
const Vector<String>& file2;
|
|
};
|
|
|
|
Array<TextSection> CompareLineMaps(const Vector<String>& s1, const Vector<String>& s2)
|
|
{
|
|
return TextComparator(s1, s2).GetSections();
|
|
}
|
|
|
|
static void CalcHash(Vector<Index<dword>>& hash, const Vector<String>& file, int limit)
|
|
{
|
|
{ // 1st row
|
|
Index<dword>& first = hash.Add();
|
|
for(int i = 0; i < file.GetCount(); i++)
|
|
first.Add(FoldHash(GetHashValue(file[i])));
|
|
}
|
|
static const int prime[] =
|
|
{
|
|
3, 5, 7, 11, 13, 17, 19, 21,
|
|
23, 29, 31, 37, 41, 43, 47, 51,
|
|
53, 61, 67, 71, 73, 79, 83, 87,
|
|
89, 97, 101, 103, 107, 109, 113, 117,
|
|
};
|
|
const int *pp = prime;
|
|
for(int l = 1; l < limit; l <<= 1) {
|
|
Index<dword>& nhash = hash.Add();
|
|
const Index<dword>& ohash = hash[hash.GetCount() - 2];
|
|
int pri = *pp++;
|
|
int t;
|
|
for(t = l; t < ohash.GetCount(); t++)
|
|
nhash.Add(ohash[t - l] + pri * ohash[t]);
|
|
for(t -= l; t < ohash.GetCount(); t++)
|
|
nhash.Add(ohash[t]);
|
|
}
|
|
}
|
|
|
|
TextComparator::TextComparator(const Vector<String>& f1, const Vector<String>& f2)
|
|
: file1(f1), file2(f2)
|
|
{
|
|
int limit = min(f1.GetCount(), f2.GetCount());
|
|
CalcHash(hash1, f1, limit);
|
|
CalcHash(hash2, f2, limit);
|
|
}
|
|
|
|
static bool CompareSection(const TextSection& ta, const TextSection& tb)
|
|
{
|
|
return ta.start1 < tb.start1 || ta.start1 == tb.start1 && ta.start2 < tb.start2;
|
|
}
|
|
|
|
Array<TextSection> TextComparator::GetSections() const
|
|
{
|
|
Array<TextSection> output;
|
|
Split(output, 0, file1.GetCount(), 0, file2.GetCount());
|
|
Sort(output, &CompareSection);
|
|
return output;
|
|
}
|
|
|
|
static int GetHashLevel(int min_count, int hash_count)
|
|
{
|
|
int l = 0;
|
|
hash_count--;
|
|
while(min_count > 1 && l < hash_count)
|
|
{
|
|
min_count >>= 1;
|
|
l++;
|
|
}
|
|
return l;
|
|
}
|
|
|
|
bool TextComparator::Find(int start1, int end1, int start2, int end2, int& best_match, int& best_count) const
|
|
{
|
|
ASSERT(end1 > start1 && end2 > start2);
|
|
bool done = false;
|
|
const String *f1 = file1.Begin() + start1;
|
|
int len1 = end1 - start1;
|
|
int lvl = GetHashLevel(best_count + 1, hash1.GetCount());
|
|
int chunk = 1 << lvl;
|
|
int last = max(best_count - chunk + 1, 0);
|
|
const Index<dword> *hp1 = &hash1[lvl];
|
|
const Index<dword> *hp2 = &hash2[lvl];
|
|
const dword *h1 = hp1->begin() + start1;
|
|
|
|
int i = hp2->Find(*h1);
|
|
while(i >= 0)
|
|
if(i + best_count >= end2)
|
|
return done;
|
|
else {
|
|
if(i >= start2 && h1[last] == (*hp2)[i + last]) {
|
|
int top = min(len1, end2 - i);
|
|
int hc = CompareGetCount(h1, hp2->begin() + i, top) + chunk - 1;
|
|
int cnt = CompareGetCount(f1, file2.begin() + i, min(hc, top));
|
|
if(cnt > best_count) {
|
|
best_count = cnt;
|
|
best_match = i;
|
|
done = true;
|
|
last = best_count - chunk + 1;
|
|
if(best_count + 1 >= 2 * chunk)
|
|
{
|
|
lvl = GetHashLevel(best_count + 1, hash1.GetCount());
|
|
chunk = 1 << lvl;
|
|
last = best_count - chunk + 1;
|
|
hp1 = &hash1[lvl];
|
|
hp2 = &hash2[lvl];
|
|
h1 = hp1->begin() + start1;
|
|
int oi = i;
|
|
for(i = hp2->Find(*h1); i >= 0 && i <= oi; i = hp2->FindNext(i))
|
|
;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
i = hp2->FindNext(i);
|
|
}
|
|
return done;
|
|
}
|
|
|
|
void TextComparator::Split(Array<TextSection>& dest, int start1, int end1, int start2, int end2) const
|
|
{
|
|
ASSERT(start1 <= end1 && start2 <= end2);
|
|
while(start1 < end1 && start2 < end2) {
|
|
int new1 = -1, new2 = -1, count = 0;
|
|
for(int i = start1; i + count < end1; i++)
|
|
if(Find(i, end1, start2, end2, new2, count))
|
|
new1 = i;
|
|
if(count == 0)
|
|
break; // no match at all
|
|
ASSERT(new1 >= start1 && new1 + count <= end1);
|
|
ASSERT(new2 >= start2 && new2 + count <= end2);
|
|
dest.Add(TextSection(new1, count, new2, count, true));
|
|
if(new1 - start1 >= end1 - new1 - count) { // head is longer - recurse for tail
|
|
Split(dest, new1 + count, end1, new2 + count, end2);
|
|
end1 = new1;
|
|
end2 = new2;
|
|
}
|
|
else { // tail is longer - recurse for head
|
|
Split(dest, start1, new1, start2, new2);
|
|
start1 = new1 + count;
|
|
start2 = new2 + count;
|
|
}
|
|
ASSERT(start1 <= end1 && start2 <= end2);
|
|
}
|
|
if(start1 < end1 || start2 < end2)
|
|
dest.Add(TextSection(start1, end1 - start1, start2, end2 - start2, false));
|
|
}
|
|
|
|
};
|