added defines MIN/MAX (uppercase) as replacement for min/max (lowercase); started...
[public/netxms.git] / src / libnetxms / diff.cpp
CommitLineData
18a7c1e9
VK
1/*
2 * Copyright 2008 Google Inc. All Rights Reserved.
3 * Author: fraser@google.com (Neil Fraser)
4 * Author: mikeslemmer@gmail.com (Mike Slemmer)
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * Diff Match and Patch
19 * http://code.google.com/p/google-diff-match-patch/
20 */
21
22#include "libnetxms.h"
18a7c1e9
VK
23#include "diff.h"
24
25// Define some regex patterns for matching boundaries.
26#define BLANKLINEEND _T("\\n\\r?\\n$")
27#define BLANKLINESTART _T("^\\r?\\n\\r?\\n")
28
29template<typename T> class MutableListIterator
30{
31private:
32 ObjectArray<T> *m_array;
33 int m_pos;
34 bool m_forward;
35
36public:
37 MutableListIterator(ObjectArray<T> *array)
38 {
39 m_array = array;
40 m_pos = 0;
41 m_forward = true;
42 }
43
44 bool hasNext() { return m_pos < m_array->size(); }
45 bool hasPrevious() { return m_pos > 0; }
46
47 T *next() { m_forward = true; return hasNext() ? m_array->get(m_pos++) : NULL; }
48 T *previous() { m_forward = false; return hasPrevious() ? m_array->get(--m_pos) : NULL; }
49
50 void insert(T *value) { m_array->insert(m_pos++, value); }
51 void remove()
52 {
53 if (m_forward)
54 {
55 m_array->remove(m_pos - 1);
56 m_pos--;
57 }
58 else
59 {
60 m_array->remove(m_pos);
61 }
62 }
63
64 void setValue(T *v) { m_array->replace(m_forward ? (m_pos - 1) : m_pos, v); }
65
66 void toFront() { m_pos = 0; }
67 void toBack() { m_pos = m_array->size(); }
68};
69
70template<typename T> class Stack
71{
72private:
73 Array m_data;
74
75public:
76 Stack() : m_data(16, 16, false) { }
77
78 void push(T *v) { m_data.add((void *)v); }
79 T *pop() { int p = m_data.size(); T *v = (T *)m_data.get(p - 1); m_data.remove(p - 1); return v; }
80 T *top() { return (T *)m_data.get(m_data.size() - 1); }
81 void clear() { m_data.clear(); }
82 bool isEmpty() { return m_data.isEmpty(); }
83};
84
85//////////////////////////
86//
87// Diff Class
88//
89//////////////////////////
90
91/**
92 * Constructor. Initializes the diff with the provided values.
93 * @param operation One of INSERT, DELETE or EQUAL
94 * @param text The text being applied
95 */
c3c060b8 96Diff::Diff(DiffOperation _operation, const String &_text) : text(_text)
18a7c1e9
VK
97{
98 operation = _operation;
99}
100
101Diff::Diff(const Diff *src)
102{
103 operation = src->operation;
104 text = src->text;
105}
106
107Diff::Diff(const Diff &src)
108{
109 operation = src.operation;
110 text = src.text;
111}
112
113Diff::Diff()
114{
c3c060b8 115 operation = DIFF_EQUAL;
18a7c1e9
VK
116}
117
c3c060b8 118String Diff::strOperation(DiffOperation op)
18a7c1e9
VK
119{
120 switch(op)
121 {
c3c060b8 122 case DIFF_INSERT:
18a7c1e9 123 return _T("INSERT");
c3c060b8 124 case DIFF_DELETE:
18a7c1e9 125 return _T("DELETE");
c3c060b8 126 case DIFF_EQUAL:
18a7c1e9
VK
127 return _T("EQUAL");
128 }
129 return _T("Invalid operation");
130}
131
132/**
133 * Display a human-readable version of this Diff.
134 * @return text version
135 */
136String Diff::toString() const
137{
138 String prettyText = _T("Diff(");
139 prettyText.append(strOperation(operation));
140 prettyText.append(_T(",\""));
141 prettyText.append(text);
142 prettyText.append(_T("\")"));
143 // Replace linebreaks with Pilcrow signs.
144 return prettyText;
145}
146
147/**
148 * Is this Diff equivalent to another Diff?
149 * @param d Another Diff to compare against
150 * @return true or false
151 */
152bool Diff::operator==(const Diff &d) const
153{
154 return (d.operation == this->operation) && (d.text == this->text);
155}
156
157bool Diff::operator!=(const Diff &d) const
158{
159 return !(operator == (d));
160}
161
162/////////////////////////////////////////////
163//
164// DiffEngine Class
165//
166/////////////////////////////////////////////
167
c3c060b8 168DiffEngine::DiffEngine()
18a7c1e9 169{
c3c060b8
VK
170 Diff_Timeout = 5000;
171 Diff_EditCost = 4;
18a7c1e9
VK
172}
173
174ObjectArray<Diff> *DiffEngine::diff_main(const String &text1, const String &text2)
175{
176 return diff_main(text1, text2, true);
177}
178
179ObjectArray<Diff> *DiffEngine::diff_main(const String &text1, const String &text2, bool checklines)
180{
181 // Set a deadline by which time the diff must be complete.
c3c060b8 182 INT64 deadline;
18a7c1e9
VK
183 if (Diff_Timeout <= 0)
184 {
c3c060b8 185 deadline = _LL(0x7FFFFFFFFFFFFFFF);
18a7c1e9
VK
186 }
187 else
188 {
c3c060b8 189 deadline = GetCurrentTimeMs() + Diff_Timeout;
18a7c1e9
VK
190 }
191 return diff_main(text1, text2, checklines, deadline);
192}
193
c3c060b8 194ObjectArray<Diff> *DiffEngine::diff_main(const String &text1, const String &text2, bool checklines, INT64 deadline)
18a7c1e9
VK
195{
196 // Check for equality (speedup).
197 if (text1 == text2)
198 {
199 ObjectArray<Diff> *diffs = new ObjectArray<Diff>(16, 16, true);
200 if (!text1.isEmpty())
201 {
c3c060b8 202 diffs->add(new Diff(DIFF_EQUAL, text1));
18a7c1e9
VK
203 }
204 return diffs;
205 }
206
207 ObjectArray<Diff> *diffs;
208 if (checklines)
209 {
210 diffs = diff_compute(text1, text2, checklines, deadline);
211 }
212 else
213 {
214 // Trim off common prefix (speedup).
215 int commonlength = diff_commonPrefix(text1, text2);
216 const String &commonprefix = text1.left(commonlength);
217 String textChopped1 = text1.substring(commonlength, -1);
218 String textChopped2 = text2.substring(commonlength, -1);
219
220 // Trim off common suffix (speedup).
221 commonlength = diff_commonSuffix(textChopped1, textChopped2);
222 const String &commonsuffix = textChopped1.right(commonlength);
223 textChopped1 = textChopped1.left(textChopped1.length() - commonlength);
224 textChopped2 = textChopped2.left(textChopped2.length() - commonlength);
225
226 // Compute the diff on the middle block.
227 diffs = diff_compute(textChopped1, textChopped2, checklines, deadline);
228
229 // Restore the prefix and suffix.
230 if (!commonprefix.isEmpty())
231 {
c3c060b8 232 diffs->insert(0, new Diff(DIFF_EQUAL, commonprefix));
18a7c1e9
VK
233 }
234 if (!commonsuffix.isEmpty())
235 {
c3c060b8 236 diffs->add(new Diff(DIFF_EQUAL, commonsuffix));
18a7c1e9
VK
237 }
238
239 diff_cleanupMerge(*diffs);
240 }
241 return diffs;
242}
243
c3c060b8 244ObjectArray<Diff> *DiffEngine::diff_compute(String text1, String text2, bool checklines, INT64 deadline)
18a7c1e9
VK
245{
246 if (text1.isEmpty())
247 {
248 // Just add some text (speedup).
249 ObjectArray<Diff> *diffs = new ObjectArray<Diff>(64, 64, true);
c3c060b8 250 diffs->add(new Diff(DIFF_INSERT, text2));
18a7c1e9
VK
251 return diffs;
252 }
253
254 if (text2.isEmpty())
255 {
256 // Just delete some text (speedup).
257 ObjectArray<Diff> *diffs = new ObjectArray<Diff>(64, 64, true);
c3c060b8 258 diffs->add(new Diff(DIFF_DELETE, text1));
18a7c1e9
VK
259 return diffs;
260 }
261
262 if (!checklines)
263 {
264 ObjectArray<Diff> *diffs = new ObjectArray<Diff>(64, 64, true);
265 const String longtext = text1.length() > text2.length() ? text1 : text2;
266 const String shorttext = text1.length() > text2.length() ? text2 : text1;
267 const int i = longtext.find(shorttext);
268 if (i != String::npos)
269 {
270 // Shorter text is inside the longer text (speedup).
c3c060b8 271 const DiffOperation op = (text1.length() > text2.length()) ? DIFF_DELETE : DIFF_INSERT;
18a7c1e9 272 diffs->add(new Diff(op, longtext.left(i)));
c3c060b8 273 diffs->add(new Diff(DIFF_EQUAL, shorttext));
18a7c1e9
VK
274 diffs->add(new Diff(op, safeMid(longtext, i + shorttext.length())));
275 return diffs;
276 }
277
278 if (shorttext.length() == 1)
279 {
280 // Single character string.
281 // After the previous speedup, the character can't be an equality.
c3c060b8
VK
282 diffs->add(new Diff(DIFF_DELETE, text1));
283 diffs->add(new Diff(DIFF_INSERT, text2));
18a7c1e9
VK
284 return diffs;
285 }
286 delete diffs;
287 }
288
289 // Check to see if the problem can be split in two.
290 if (!checklines)
291 {
292 const StringList *hm = diff_halfMatch(text1, text2);
293 if (hm->size() > 0)
294 {
295 // A half-match was found, sort out the return data.
296 // Send both pairs off for separate processing.
297 ObjectArray<Diff> *diffs_a = diff_main(hm->get(0), hm->get(2), checklines, deadline);
298 ObjectArray<Diff> *diffs_b = diff_main(hm->get(1), hm->get(3), checklines, deadline);
299 // Merge the results.
c3c060b8 300 diffs_a->add(new Diff(DIFF_EQUAL, hm->get(4)));
18a7c1e9
VK
301 for(int i = 0; i < diffs_b->size(); i++)
302 diffs_a->add(diffs_b->get(i));
303 diffs_b->setOwner(false);
304 delete diffs_b;
305 delete hm;
306 return diffs_a;
307 }
308 delete hm;
309 }
310
311 // Perform a real diff.
312 if (checklines && !text1.isEmpty() && !text2.isEmpty())
313 {
314 return diff_lineMode(text1, text2, deadline);
315 }
316
317 return diff_bisect(text1, text2, deadline);
318}
319
c3c060b8 320ObjectArray<Diff> *DiffEngine::diff_lineMode(const String& text1, const String& text2, INT64 deadline)
18a7c1e9
VK
321{
322 // Scan the text on a line-by-line basis first.
323 Array *b = diff_linesToChars(text1, text2);
324 String *ltext1 = (String *)b->get(0);
325 String *ltext2 = (String *)b->get(1);
326 StringList *linearray = (StringList *)b->get(2);
327 delete b;
328
329 ObjectArray<Diff> *diffs = diff_main(*ltext1, *ltext2, false, deadline);
330 delete ltext1;
331 delete ltext2;
332
333 // Convert the diff back to original text.
334 diff_charsToLines(diffs, linearray);
335 delete linearray;
336
337 // Eliminate freak matches (e.g. blank lines)
338 diff_cleanupSemantic(*diffs);
339
340 return diffs;
341}
342
c3c060b8 343ObjectArray<Diff> *DiffEngine::diff_bisect(const String &text1, const String &text2, INT64 deadline)
18a7c1e9
VK
344{
345 // Cache the text lengths to prevent multiple calls.
c3c060b8
VK
346 const int text1_length = (int)text1.length();
347 const int text2_length = (int)text2.length();
18a7c1e9
VK
348 const int max_d = (text1_length + text2_length + 1) / 2;
349 const int v_offset = max_d;
350 const int v_length = 2 * max_d;
351 int *v1 = new int[v_length];
352 int *v2 = new int[v_length];
353 for(int x = 0; x < v_length; x++)
354 {
355 v1[x] = -1;
356 v2[x] = -1;
357 }
358 v1[v_offset + 1] = 0;
359 v2[v_offset + 1] = 0;
360 const int delta = text1_length - text2_length;
361 // If the total number of characters is odd, then the front path will
362 // collide with the reverse path.
363 const bool front = (delta % 2 != 0);
364 // Offsets for start and end of k loop.
365 // Prevents mapping of space beyond the grid.
366 int k1start = 0;
367 int k1end = 0;
368 int k2start = 0;
369 int k2end = 0;
370 for(int d = 0; d < max_d; d++)
371 {
372 // Bail out if deadline is reached.
c3c060b8 373 if (GetCurrentTimeMs() > deadline)
18a7c1e9
VK
374 {
375 break;
376 }
377
378 // Walk the front path one step.
379 for(int k1 = -d + k1start; k1 <= d - k1end; k1 += 2)
380 {
381 const int k1_offset = v_offset + k1;
382 int x1;
383 if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]))
384 {
385 x1 = v1[k1_offset + 1];
386 }
387 else
388 {
389 x1 = v1[k1_offset - 1] + 1;
390 }
391 int y1 = x1 - k1;
392 while(x1 < text1_length && y1 < text2_length && text1[x1] == text2[y1])
393 {
394 x1++;
395 y1++;
396 }
397 v1[k1_offset] = x1;
398 if (x1 > text1_length)
399 {
400 // Ran off the right of the graph.
401 k1end += 2;
402 }
403 else if (y1 > text2_length)
404 {
405 // Ran off the bottom of the graph.
406 k1start += 2;
407 }
408 else if (front)
409 {
410 int k2_offset = v_offset + delta - k1;
411 if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1)
412 {
413 // Mirror x2 onto top-left coordinate system.
414 int x2 = text1_length - v2[k2_offset];
415 if (x1 >= x2)
416 {
417 // Overlap detected.
418 delete[] v1;
419 delete[] v2;
420 return diff_bisectSplit(text1, text2, x1, y1, deadline);
421 }
422 }
423 }
424 }
425
426 // Walk the reverse path one step.
427 for(int k2 = -d + k2start; k2 <= d - k2end; k2 += 2)
428 {
429 const int k2_offset = v_offset + k2;
430 int x2;
431 if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]))
432 {
433 x2 = v2[k2_offset + 1];
434 }
435 else
436 {
437 x2 = v2[k2_offset - 1] + 1;
438 }
439 int y2 = x2 - k2;
440 while(x2 < text1_length && y2 < text2_length && text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1])
441 {
442 x2++;
443 y2++;
444 }
445 v2[k2_offset] = x2;
446 if (x2 > text1_length)
447 {
448 // Ran off the left of the graph.
449 k2end += 2;
450 }
451 else if (y2 > text2_length)
452 {
453 // Ran off the top of the graph.
454 k2start += 2;
455 }
456 else if (!front)
457 {
458 int k1_offset = v_offset + delta - k2;
459 if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1)
460 {
461 int x1 = v1[k1_offset];
462 int y1 = v_offset + x1 - k1_offset;
463 // Mirror x2 onto top-left coordinate system.
464 x2 = text1_length - x2;
465 if (x1 >= x2)
466 {
467 // Overlap detected.
468 delete[] v1;
469 delete[] v2;
470 return diff_bisectSplit(text1, text2, x1, y1, deadline);
471 }
472 }
473 }
474 }
475 }
476 delete[] v1;
477 delete[] v2;
478 // Diff took too long and hit the deadline or
479 // number of diffs equals number of characters, no commonality at all.
480 ObjectArray<Diff> *diffs = new ObjectArray<Diff>(16, 16, true);
c3c060b8
VK
481 diffs->add(new Diff(DIFF_DELETE, text1));
482 diffs->add(new Diff(DIFF_INSERT, text2));
18a7c1e9
VK
483 return diffs;
484}
485
c3c060b8 486ObjectArray<Diff> *DiffEngine::diff_bisectSplit(const String &text1, const String &text2, int x, int y, INT64 deadline)
18a7c1e9
VK
487{
488 String text1a = text1.left(x);
489 String text2a = text2.left(y);
490 String text1b = safeMid(text1, x);
491 String text2b = safeMid(text2, y);
492
493 // Compute both diffs serially.
494 ObjectArray<Diff> *diffs = diff_main(text1a, text2a, false, deadline);
495 ObjectArray<Diff> *diffsb = diff_main(text1b, text2b, false, deadline);
496 for(int i = 0; i < diffsb->size(); i++)
497 diffs->add(diffsb->get(i));
498 diffsb->setOwner(false);
499 delete diffsb;
500
501 return diffs;
502}
503
504Array *DiffEngine::diff_linesToChars(const String &text1, const String &text2)
505{
506 StringList *lineArray = new StringList();
507 StringIntMap<int> lineHash;
508 // e.g. linearray[4] == "Hello\n"
509 // e.g. linehash.get("Hello\n") == 4
510
511 // "\x00" is a valid character, but various debuggers don't like it.
512 // So we'll insert a junk entry to avoid generating a null character.
513 lineArray->add(_T(""));
514
515 const String chars1 = diff_linesToCharsMunge(text1, *lineArray, lineHash);
516 const String chars2 = diff_linesToCharsMunge(text2, *lineArray, lineHash);
517
518 Array *listRet = new Array(3, 3, false);
519 listRet->add(new String(chars1));
520 listRet->add(new String(chars2));
521 listRet->add(lineArray);
522 return listRet;
523}
524
525String DiffEngine::diff_linesToCharsMunge(const String &text, StringList &lineArray, StringIntMap<int>& lineHash)
526{
527 size_t lineStart = 0;
528 size_t lineEnd = 0;
529 String line;
530 String chars;
531 // Walk the text, pulling out a substring for each line.
532 // text.split('\n') would would temporarily double our memory footprint.
533 // Modifying text would create many large strings to garbage collect.
534 while(lineEnd < text.length())
535 {
536 lineEnd = text.find(_T("\n"), lineStart);
537 if (lineEnd == String::npos)
538 {
539 lineEnd = text.length();
540 }
c3c060b8 541 line = safeMid(text, lineStart, (int)(lineEnd - lineStart + 1));
18a7c1e9
VK
542 lineStart = lineEnd + 1;
543
544 if (lineHash.contains(line))
545 {
546 chars.append(static_cast<WCHAR>(lineHash.get(line)));
547 }
548 else
549 {
550 lineArray.add(line);
551 lineHash.set(line, lineArray.size() - 1);
552 chars.append(static_cast<WCHAR>(lineArray.size() - 1));
553 }
554 }
555 return chars;
556}
557
558void DiffEngine::diff_charsToLines(ObjectArray<Diff> *diffs, const StringList &lineArray)
559{
560 MutableListIterator<Diff> i(diffs);
561 while(i.hasNext())
562 {
563 Diff *diff = i.next();
564 String text;
1554532b 565 for(int y = 0; y < (int)diff->text.length(); y++)
18a7c1e9 566 {
c3c060b8 567 text += lineArray.get(static_cast<int>(diff->text.charAt(y)));
18a7c1e9
VK
568 }
569 diff->text = text;
570 }
571}
572
573int DiffEngine::diff_commonPrefix(const String &text1, const String &text2)
574{
575 // Performance analysis: http://neil.fraser.name/news/2007/10/09/
b9792835 576 const int n = (int)std::min(text1.length(), text2.length());
18a7c1e9
VK
577 for(int i = 0; i < n; i++)
578 {
579 if (text1.charAt(i) != text2.charAt(i))
580 {
581 return i;
582 }
583 }
584 return n;
585}
586
587int DiffEngine::diff_commonSuffix(const String &text1, const String &text2)
588{
589 // Performance analysis: http://neil.fraser.name/news/2007/10/09/
c3c060b8
VK
590 const int text1_length = (int)text1.length();
591 const int text2_length = (int)text2.length();
b9792835 592 const int n = std::min(text1_length, text2_length);
18a7c1e9
VK
593 for(int i = 1; i <= n; i++)
594 {
595 if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i))
596 {
597 return i - 1;
598 }
599 }
600 return n;
601}
602
c3c060b8 603size_t DiffEngine::diff_commonOverlap(const String &text1, const String &text2)
18a7c1e9
VK
604{
605 // Cache the text lengths to prevent multiple calls.
c3c060b8
VK
606 const size_t text1_length = text1.length();
607 const size_t text2_length = text2.length();
18a7c1e9
VK
608 // Eliminate the null case.
609 if (text1_length == 0 || text2_length == 0)
610 {
611 return 0;
612 }
613 // Truncate the longer string.
614 String text1_trunc = text1;
615 String text2_trunc = text2;
616 if (text1_length > text2_length)
617 {
618 text1_trunc = text1.right(text2_length);
619 }
620 else if (text1_length < text2_length)
621 {
622 text2_trunc = text2.left(text1_length);
623 }
b9792835 624 const size_t text_length = std::min(text1_length, text2_length);
18a7c1e9
VK
625 // Quick check for the worst case.
626 if (text1_trunc == text2_trunc)
627 {
628 return text_length;
629 }
630
631 // Start by looking for a single character match
632 // and increase length until no match is found.
633 // Performance analysis: http://neil.fraser.name/news/2010/11/04/
c3c060b8
VK
634 size_t best = 0;
635 size_t length = 1;
18a7c1e9
VK
636 while(true)
637 {
638 String pattern = text1_trunc.right(length);
639 int found = text2_trunc.find(pattern);
c3c060b8 640 if (found == String::npos)
18a7c1e9
VK
641 {
642 return best;
643 }
644 length += found;
645 if (found == 0 || text1_trunc.right(length) == text2_trunc.left(length))
646 {
647 best = length;
648 length++;
649 }
650 }
651}
652
653StringList *DiffEngine::diff_halfMatch(const String &text1, const String &text2)
654{
655 if (Diff_Timeout <= 0)
656 {
657 // Don't risk returning a non-optimal diff if we have unlimited time.
658 return new StringList();
659 }
660 const String longtext = text1.length() > text2.length() ? text1 : text2;
661 const String shorttext = text1.length() > text2.length() ? text2 : text1;
662 if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length())
663 {
664 return new StringList(); // Pointless.
665 }
666
667 // First check if the second quarter is the seed for a half-match.
c3c060b8 668 StringList *hm1 = diff_halfMatchI(longtext, shorttext, ((int)longtext.length() + 3) / 4);
18a7c1e9 669 // Check again based on the third quarter.
c3c060b8 670 StringList *hm2 = diff_halfMatchI(longtext, shorttext, ((int)longtext.length() + 1) / 2);
18a7c1e9
VK
671 if (hm1->isEmpty() && hm2->isEmpty())
672 {
673 delete hm1;
674 delete hm2;
675 return new StringList();
676 }
677
678 StringList *hm;
679 if (hm2->isEmpty())
680 {
681 hm = hm1;
682 delete hm2;
683 }
684 else if (hm1->isEmpty())
685 {
686 hm = hm2;
687 delete hm1;
688 }
689 else
690 {
691 // Both matched. Select the longest.
692 if (_tcslen(hm1->get(4)) > _tcslen(hm2->get(4)))
693 {
694 hm = hm1;
695 delete hm2;
696 }
697 else
698 {
699 hm = hm2;
700 delete hm1;
701 }
702 }
703
704 // A half-match was found, sort out the return data.
705 if (text1.length() > text2.length())
706 {
707 return hm;
708 }
709
710 StringList *listRet = new StringList();
711 listRet->add(hm->get(2));
712 listRet->add(hm->get(3));
713 listRet->add(hm->get(0));
714 listRet->add(hm->get(1));
715 listRet->add(hm->get(4));
716 delete hm;
717 return listRet;
718}
719
720StringList *DiffEngine::diff_halfMatchI(const String &longtext, const String &shorttext, int i)
721{
722 // Start with a 1/4 length substring at position i as a seed.
c3c060b8 723 const String seed = safeMid(longtext, i, (int)(longtext.length() / 4));
18a7c1e9
VK
724 int j = -1;
725 String best_common;
726 String best_longtext_a, best_longtext_b;
727 String best_shorttext_a, best_shorttext_b;
728 while((j = shorttext.find(seed, j + 1)) != -1)
729 {
730 const int prefixLength = diff_commonPrefix(safeMid(longtext, i), safeMid(shorttext, j));
731 const int suffixLength = diff_commonSuffix(longtext.left(i), shorttext.left(j));
1554532b 732 if ((int)best_common.length() < suffixLength + prefixLength)
18a7c1e9
VK
733 {
734 best_common = safeMid(shorttext, j - suffixLength, suffixLength);
735 best_common.append(safeMid(shorttext, j, prefixLength));
736
737 best_longtext_a = longtext.left(i - suffixLength);
738 best_longtext_b = safeMid(longtext, i + prefixLength);
739
740 best_shorttext_a = shorttext.left(j - suffixLength);
741 best_shorttext_b = safeMid(shorttext, j + prefixLength);
742 }
743 }
744 if (best_common.length() * 2 >= longtext.length())
745 {
746 StringList *listRet = new StringList;
747 listRet->add(best_longtext_a);
748 listRet->add(best_longtext_b);
749 listRet->add(best_shorttext_a);
750 listRet->add(best_shorttext_b);
751 listRet->add(best_common);
752 return listRet;
753 }
754 else
755 {
756 return new StringList();
757 }
758}
759
760void DiffEngine::diff_cleanupSemantic(ObjectArray<Diff> &diffs)
761{
762 if (diffs.isEmpty())
763 {
764 return;
765 }
766 bool changes = false;
767 Stack<Diff> equalities; // Stack of equalities.
768 String lastequality; // Always equal to equalities.lastElement().text
769 MutableListIterator<Diff> pointer(&diffs);
770 // Number of characters that changed prior to the equality.
771 int length_insertions1 = 0;
772 int length_deletions1 = 0;
773 // Number of characters that changed after the equality.
774 int length_insertions2 = 0;
775 int length_deletions2 = 0;
776 Diff *thisDiff = pointer.hasNext() ? pointer.next() : NULL;
777 while(thisDiff != NULL)
778 {
c3c060b8 779 if (thisDiff->operation == DIFF_EQUAL)
18a7c1e9
VK
780 {
781 // Equality found.
782 equalities.push(thisDiff);
783 length_insertions1 = length_insertions2;
784 length_deletions1 = length_deletions2;
785 length_insertions2 = 0;
786 length_deletions2 = 0;
787 lastequality = thisDiff->text;
788 }
789 else
790 {
791 // An insertion or deletion.
c3c060b8 792 if (thisDiff->operation == DIFF_INSERT)
18a7c1e9 793 {
c3c060b8 794 length_insertions2 += (int)thisDiff->text.length();
18a7c1e9
VK
795 }
796 else
797 {
c3c060b8 798 length_deletions2 += (int)thisDiff->text.length();
18a7c1e9
VK
799 }
800 // Eliminate an equality that is smaller or equal to the edits on both
801 // sides of it.
b9792835
VK
802 if (!lastequality.isEmpty() && ((int)lastequality.length() <= std::max(length_insertions1, length_deletions1))
803 && ((int)lastequality.length() <= std::max(length_insertions2, length_deletions2)))
18a7c1e9
VK
804 {
805 // printf("Splitting: '%s'\n", qPrintable(lastequality));
806 // Walk back to offending equality.
807 while(*thisDiff != *equalities.top())
808 {
809 thisDiff = pointer.previous();
810 }
811 pointer.next();
812
813 // Replace equality with a delete.
c3c060b8 814 pointer.setValue(new Diff(DIFF_DELETE, lastequality));
18a7c1e9 815 // Insert a corresponding an insert.
c3c060b8 816 pointer.insert(new Diff(DIFF_INSERT, lastequality));
18a7c1e9
VK
817
818 equalities.pop(); // Throw away the equality we just deleted.
819 if (!equalities.isEmpty())
820 {
821 // Throw away the previous equality (it needs to be reevaluated).
822 equalities.pop();
823 }
824 if (equalities.isEmpty())
825 {
826 // There are no previous equalities, walk back to the start.
827 while(pointer.hasPrevious())
828 {
829 pointer.previous();
830 }
831 }
832 else
833 {
834 // There is a safe equality we can fall back to.
835 thisDiff = equalities.top();
836 while(*thisDiff != *pointer.previous())
837 {
838 // Intentionally empty loop.
839 }
840 }
841
842 length_insertions1 = 0; // Reset the counters.
843 length_deletions1 = 0;
844 length_insertions2 = 0;
845 length_deletions2 = 0;
846 lastequality = String();
847 changes = true;
848 }
849 }
850 thisDiff = pointer.hasNext() ? pointer.next() : NULL;
851 }
852
853 // Normalize the diff.
854 if (changes)
855 {
856 diff_cleanupMerge(diffs);
857 }
858 diff_cleanupSemanticLossless(diffs);
859
860 // Find any overlaps between deletions and insertions.
861 // e.g: <del>abcxxx</del><ins>xxxdef</ins>
862 // -> <del>abc</del>xxx<ins>def</ins>
863 // e.g: <del>xxxabc</del><ins>defxxx</ins>
864 // -> <ins>def</ins>xxx<del>abc</del>
865 // Only extract an overlap if it is as big as the edit ahead or behind it.
866 pointer.toFront();
867 Diff *prevDiff = NULL;
868 thisDiff = NULL;
869 if (pointer.hasNext())
870 {
871 prevDiff = pointer.next();
872 if (pointer.hasNext())
873 {
874 thisDiff = pointer.next();
875 }
876 }
877 while(thisDiff != NULL)
878 {
c3c060b8 879 if (prevDiff->operation == DIFF_DELETE && thisDiff->operation == DIFF_INSERT)
18a7c1e9
VK
880 {
881 String deletion = prevDiff->text;
882 String insertion = thisDiff->text;
c3c060b8
VK
883 size_t overlap_length1 = diff_commonOverlap(deletion, insertion);
884 size_t overlap_length2 = diff_commonOverlap(insertion, deletion);
18a7c1e9
VK
885 if (overlap_length1 >= overlap_length2)
886 {
887 if (overlap_length1 >= deletion.length() / 2.0 || overlap_length1 >= insertion.length() / 2.0)
888 {
889 // Overlap found. Insert an equality and trim the surrounding edits.
890 pointer.previous();
c3c060b8 891 pointer.insert(new Diff(DIFF_EQUAL, insertion.left(overlap_length1)));
18a7c1e9
VK
892 prevDiff->text = deletion.left(deletion.length() - overlap_length1);
893 thisDiff->text = safeMid(insertion, overlap_length1);
894 // pointer.insert inserts the element before the cursor, so there is
895 // no need to step past the new element.
896 }
897 }
898 else
899 {
900 if (overlap_length2 >= deletion.length() / 2.0 || overlap_length2 >= insertion.length() / 2.0)
901 {
902 // Reverse overlap found.
903 // Insert an equality and swap and trim the surrounding edits.
904 pointer.previous();
c3c060b8
VK
905 pointer.insert(new Diff(DIFF_EQUAL, deletion.left(overlap_length2)));
906 prevDiff->operation = DIFF_INSERT;
18a7c1e9 907 prevDiff->text = insertion.left(insertion.length() - overlap_length2);
c3c060b8 908 thisDiff->operation = DIFF_DELETE;
18a7c1e9
VK
909 thisDiff->text = safeMid(deletion, overlap_length2);
910 // pointer.insert inserts the element before the cursor, so there is
911 // no need to step past the new element.
912 }
913 }
914 thisDiff = pointer.hasNext() ? pointer.next() : NULL;
915 }
916 prevDiff = thisDiff;
917 thisDiff = pointer.hasNext() ? pointer.next() : NULL;
918 }
919}
920
921void DiffEngine::diff_cleanupSemanticLossless(ObjectArray<Diff> &diffs)
922{
923 String equality1, edit, equality2;
924 String commonString;
925 int commonOffset;
926 int score, bestScore;
927 String bestEquality1, bestEdit, bestEquality2;
928 // Create a new iterator at the start.
929 MutableListIterator<Diff> pointer(&diffs);
930 Diff *prevDiff = pointer.hasNext() ? pointer.next() : NULL;
931 Diff *thisDiff = pointer.hasNext() ? pointer.next() : NULL;
932 Diff *nextDiff = pointer.hasNext() ? pointer.next() : NULL;
933
934 // Intentionally ignore the first and last element (don't need checking).
935 while(nextDiff != NULL)
936 {
c3c060b8 937 if (prevDiff->operation == DIFF_EQUAL && nextDiff->operation == DIFF_EQUAL)
18a7c1e9
VK
938 {
939 // This is a single edit surrounded by equalities.
940 equality1 = prevDiff->text;
941 edit = thisDiff->text;
942 equality2 = nextDiff->text;
943
944 // First, shift the edit as far left as possible.
945 commonOffset = diff_commonSuffix(equality1, edit);
946 if (commonOffset != 0)
947 {
948 commonString = safeMid(edit, edit.length() - commonOffset);
949 equality1 = equality1.left(equality1.length() - commonOffset);
950 edit = commonString + edit.left(edit.length() - commonOffset);
951 equality2 = commonString + equality2;
952 }
953
954 // Second, step character by character right, looking for the best fit.
955 bestEquality1 = equality1;
956 bestEdit = edit;
957 bestEquality2 = equality2;
958 bestScore = diff_cleanupSemanticScore(equality1, edit) + diff_cleanupSemanticScore(edit, equality2);
959 while(!edit.isEmpty() && !equality2.isEmpty() && edit.charAt(0) == equality2.charAt(0))
960 {
961 equality1.append(edit.charAt(0));
962 edit = safeMid(edit, 1);
963 edit.append(equality2.charAt(0));
964 equality2 = safeMid(equality2, 1);
965 score = diff_cleanupSemanticScore(equality1, edit) + diff_cleanupSemanticScore(edit, equality2);
966 // The >= encourages trailing rather than leading whitespace on edits.
967 if (score >= bestScore)
968 {
969 bestScore = score;
970 bestEquality1 = equality1;
971 bestEdit = edit;
972 bestEquality2 = equality2;
973 }
974 }
975
976 if (prevDiff->text != bestEquality1)
977 {
978 // We have an improvement, save it back to the diff.
979 if (!bestEquality1.isEmpty())
980 {
981 prevDiff->text = bestEquality1;
982 }
983 else
984 {
985 pointer.previous(); // Walk past nextDiff.
986 pointer.previous(); // Walk past thisDiff.
987 pointer.previous(); // Walk past prevDiff.
988 pointer.remove(); // Delete prevDiff.
989 pointer.next(); // Walk past thisDiff.
990 pointer.next(); // Walk past nextDiff.
991 }
992 thisDiff->text = bestEdit;
993 if (!bestEquality2.isEmpty())
994 {
995 nextDiff->text = bestEquality2;
996 }
997 else
998 {
999 pointer.remove(); // Delete nextDiff.
1000 nextDiff = thisDiff;
1001 thisDiff = prevDiff;
1002 }
1003 }
1004 }
1005 prevDiff = thisDiff;
1006 thisDiff = nextDiff;
1007 nextDiff = pointer.hasNext() ? pointer.next() : NULL;
1008 }
1009}
1010
1011int DiffEngine::diff_cleanupSemanticScore(const String &one, const String &two)
1012{
1013 if (one.isEmpty() || two.isEmpty())
1014 {
1015 // Edges are the best.
1016 return 6;
1017 }
1018
1019 // Each port of this function behaves slightly differently due to
1020 // subtle differences in each language's definition of things like
1021 // 'whitespace'. Since this function's purpose is largely cosmetic,
1022 // the choice has been made to use each language's native features
1023 // rather than force total conformity.
1024 TCHAR char1 = one.charAt(one.length() - 1);
1025 TCHAR char2 = two.charAt(0);
1026 bool nonAlphaNumeric1 = !_istalnum(char1);
1027 bool nonAlphaNumeric2 = !_istalnum(char2);
1028 bool whitespace1 = nonAlphaNumeric1 && _istspace(char1);
1029 bool whitespace2 = nonAlphaNumeric2 && _istspace(char2);
1030 bool lineBreak1 = whitespace1 && (char1 == '\n');
1031 bool lineBreak2 = whitespace2 && (char2 == '\n');
1032 bool blankLine1 = lineBreak1 && RegexpMatch(one, BLANKLINEEND, true);
1033 bool blankLine2 = lineBreak2 && RegexpMatch(two, BLANKLINESTART, true);
1034
1035 if (blankLine1 || blankLine2)
1036 {
1037 // Five points for blank lines.
1038 return 5;
1039 }
1040 else if (lineBreak1 || lineBreak2)
1041 {
1042 // Four points for line breaks.
1043 return 4;
1044 }
1045 else if (nonAlphaNumeric1 && !whitespace1 && whitespace2)
1046 {
1047 // Three points for end of sentences.
1048 return 3;
1049 }
1050 else if (whitespace1 || whitespace2)
1051 {
1052 // Two points for whitespace.
1053 return 2;
1054 }
1055 else if (nonAlphaNumeric1 || nonAlphaNumeric2)
1056 {
1057 // One point for non-alphanumeric.
1058 return 1;
1059 }
1060 return 0;
1061}
1062
1063void DiffEngine::diff_cleanupEfficiency(ObjectArray<Diff> &diffs)
1064{
1065 if (diffs.isEmpty())
1066 {
1067 return;
1068 }
1069 bool changes = false;
1070 Stack<Diff> equalities; // Stack of equalities.
1071 String lastequality; // Always equal to equalities.lastElement().text
1072 MutableListIterator<Diff> pointer(&diffs);
1073 // Is there an insertion operation before the last equality.
1074 bool pre_ins = false;
1075 // Is there a deletion operation before the last equality.
1076 bool pre_del = false;
1077 // Is there an insertion operation after the last equality.
1078 bool post_ins = false;
1079 // Is there a deletion operation after the last equality.
1080 bool post_del = false;
1081
1082 Diff *thisDiff = pointer.hasNext() ? pointer.next() : NULL;
1083 Diff *safeDiff = thisDiff;
1084
1085 while(thisDiff != NULL)
1086 {
c3c060b8 1087 if (thisDiff->operation == DIFF_EQUAL)
18a7c1e9
VK
1088 {
1089 // Equality found.
1554532b 1090 if ((int)thisDiff->text.length() < Diff_EditCost && (post_ins || post_del))
18a7c1e9
VK
1091 {
1092 // Candidate found.
1093 equalities.push(thisDiff);
1094 pre_ins = post_ins;
1095 pre_del = post_del;
1096 lastequality = thisDiff->text;
1097 }
1098 else
1099 {
1100 // Not a candidate, and can never become one.
1101 equalities.clear();
1102 lastequality = String();
1103 safeDiff = thisDiff;
1104 }
1105 post_ins = post_del = false;
1106 }
1107 else
1108 {
1109 // An insertion or deletion.
c3c060b8 1110 if (thisDiff->operation == DIFF_DELETE)
18a7c1e9
VK
1111 {
1112 post_del = true;
1113 }
1114 else
1115 {
1116 post_ins = true;
1117 }
1118 /*
1119 * Five types to be split:
1120 * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
1121 * <ins>A</ins>X<ins>C</ins><del>D</del>
1122 * <ins>A</ins><del>B</del>X<ins>C</ins>
1123 * <ins>A</del>X<ins>C</ins><del>D</del>
1124 * <ins>A</ins><del>B</del>X<del>C</del>
1125 */
1126 if (!lastequality.isEmpty()
1127 && ((pre_ins && pre_del && post_ins && post_del)
1554532b 1128 || (((int)lastequality.length() < Diff_EditCost / 2)
18a7c1e9
VK
1129 && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3)))
1130 {
1131 // printf("Splitting: '%s'\n", qPrintable(lastequality));
1132 // Walk back to offending equality.
1133 while(*thisDiff != *equalities.top())
1134 {
1135 thisDiff = pointer.previous();
1136 }
1137 pointer.next();
1138
1139 // Replace equality with a delete.
c3c060b8 1140 pointer.setValue(new Diff(DIFF_DELETE, lastequality));
18a7c1e9 1141 // Insert a corresponding an insert.
c3c060b8 1142 pointer.insert(new Diff(DIFF_INSERT, lastequality));
18a7c1e9
VK
1143 thisDiff = pointer.previous();
1144 pointer.next();
1145
1146 equalities.pop(); // Throw away the equality we just deleted.
1147 lastequality = String();
1148 if (pre_ins && pre_del)
1149 {
1150 // No changes made which could affect previous entry, keep going.
1151 post_ins = post_del = true;
1152 equalities.clear();
1153 safeDiff = thisDiff;
1154 }
1155 else
1156 {
1157 if (!equalities.isEmpty())
1158 {
1159 // Throw away the previous equality (it needs to be reevaluated).
1160 equalities.pop();
1161 }
1162 if (equalities.isEmpty())
1163 {
1164 // There are no previous questionable equalities,
1165 // walk back to the last known safe diff.
1166 thisDiff = safeDiff;
1167 }
1168 else
1169 {
1170 // There is an equality we can fall back to.
1171 thisDiff = equalities.top();
1172 }
1173 while(*thisDiff != *pointer.previous())
1174 {
1175 // Intentionally empty loop.
1176 }
1177 post_ins = post_del = false;
1178 }
1179
1180 changes = true;
1181 }
1182 }
1183 thisDiff = pointer.hasNext() ? pointer.next() : NULL;
1184 }
1185
1186 if (changes)
1187 {
1188 diff_cleanupMerge(diffs);
1189 }
1190}
1191
1192void DiffEngine::diff_cleanupMerge(ObjectArray<Diff> &diffs)
1193{
c3c060b8 1194 diffs.add(new Diff(DIFF_EQUAL, _T(""))); // Add a dummy entry at the end.
18a7c1e9
VK
1195 MutableListIterator<Diff> pointer(&diffs);
1196 int count_delete = 0;
1197 int count_insert = 0;
1198 String text_delete;
1199 String text_insert;
1200 Diff *thisDiff = pointer.hasNext() ? pointer.next() : NULL;
1201 Diff *prevEqual = NULL;
1202 int commonlength;
1203 while(thisDiff != NULL)
1204 {
1205 switch(thisDiff->operation)
1206 {
c3c060b8 1207 case DIFF_INSERT:
18a7c1e9
VK
1208 count_insert++;
1209 text_insert += thisDiff->text;
1210 prevEqual = NULL;
1211 break;
c3c060b8 1212 case DIFF_DELETE:
18a7c1e9
VK
1213 count_delete++;
1214 text_delete += thisDiff->text;
1215 prevEqual = NULL;
1216 break;
c3c060b8 1217 case DIFF_EQUAL:
18a7c1e9
VK
1218 if (count_delete + count_insert > 1)
1219 {
1220 bool both_types = count_delete != 0 && count_insert != 0;
1221 // Delete the offending records.
1222 pointer.previous(); // Reverse direction.
1223 while(count_delete-- > 0)
1224 {
1225 pointer.previous();
1226 pointer.remove();
1227 }
1228 while(count_insert-- > 0)
1229 {
1230 pointer.previous();
1231 pointer.remove();
1232 }
1233 if (both_types)
1234 {
1235 // Factor out any common prefixies.
1236 commonlength = diff_commonPrefix(text_insert, text_delete);
1237 if (commonlength != 0)
1238 {
1239 if (pointer.hasPrevious())
1240 {
1241 thisDiff = pointer.previous();
c3c060b8 1242 if (thisDiff->operation != DIFF_EQUAL)
18a7c1e9
VK
1243 {
1244 return;
1245 }
1246 thisDiff->text += text_insert.left(commonlength);
1247 pointer.next();
1248 }
1249 else
1250 {
c3c060b8 1251 pointer.insert(new Diff(DIFF_EQUAL, text_insert.left(commonlength)));
18a7c1e9
VK
1252 }
1253 text_insert = safeMid(text_insert, commonlength);
1254 text_delete = safeMid(text_delete, commonlength);
1255 }
1256 // Factor out any common suffixies.
1257 commonlength = diff_commonSuffix(text_insert, text_delete);
1258 if (commonlength != 0)
1259 {
1260 thisDiff = pointer.next();
1261 String s = safeMid(text_insert, text_insert.length() - commonlength);
1262 s.append(thisDiff->text);
1263 thisDiff->text = s;
1264 text_insert = text_insert.left(text_insert.length() - commonlength);
1265 text_delete = text_delete.left(text_delete.length() - commonlength);
1266 pointer.previous();
1267 }
1268 }
1269 // Insert the merged records.
1270 if (!text_delete.isEmpty())
1271 {
c3c060b8 1272 pointer.insert(new Diff(DIFF_DELETE, text_delete));
18a7c1e9
VK
1273 }
1274 if (!text_insert.isEmpty())
1275 {
c3c060b8 1276 pointer.insert(new Diff(DIFF_INSERT, text_insert));
18a7c1e9
VK
1277 }
1278 // Step forward to the equality.
1279 thisDiff = pointer.hasNext() ? pointer.next() : NULL;
1280 }
1281 else if (prevEqual != NULL)
1282 {
1283 // Merge this equality with the previous one.
1284 prevEqual->text += thisDiff->text;
1285 pointer.remove();
1286 thisDiff = pointer.previous();
1287 pointer.next(); // Forward direction
1288 }
1289 count_insert = 0;
1290 count_delete = 0;
1291 text_delete = _T("");
1292 text_insert = _T("");
1293 prevEqual = thisDiff;
1294 break;
1295 }
1296 thisDiff = pointer.hasNext() ? pointer.next() : NULL;
1297 }
1298 if (diffs.last()->text.isEmpty())
1299 {
1300 diffs.remove(diffs.size() - 1); // Remove the dummy entry at the end.
1301 }
1302
1303 /*
1304 * Second pass: look for single edits surrounded on both sides by equalities
1305 * which can be shifted sideways to eliminate an equality.
1306 * e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1307 */
1308 bool changes = false;
1309 // Create a new iterator at the start.
1310 // (As opposed to walking the current one back.)
1311 pointer.toFront();
1312 Diff *prevDiff = pointer.hasNext() ? pointer.next() : NULL;
1313 thisDiff = pointer.hasNext() ? pointer.next() : NULL;
1314 Diff *nextDiff = pointer.hasNext() ? pointer.next() : NULL;
1315
1316 // Intentionally ignore the first and last element (don't need checking).
1317 while(nextDiff != NULL)
1318 {
c3c060b8 1319 if (prevDiff->operation == DIFF_EQUAL && nextDiff->operation == DIFF_EQUAL)
18a7c1e9
VK
1320 {
1321 // This is a single edit surrounded by equalities.
1322 if (thisDiff->text.endsWith(prevDiff->text))
1323 {
1324 // Shift the edit over the previous equality.
1325 String prefix = thisDiff->text.left(thisDiff->text.length() - prevDiff->text.length());
1326 thisDiff->text = prevDiff->text;
1327 thisDiff->text.append(prefix);
1328 String tmp = prevDiff->text;
1329 tmp.append(nextDiff->text);
1330 nextDiff->text = tmp;
1331 pointer.previous(); // Walk past nextDiff.
1332 pointer.previous(); // Walk past thisDiff.
1333 pointer.previous(); // Walk past prevDiff.
1334 pointer.remove(); // Delete prevDiff.
1335 pointer.next(); // Walk past thisDiff.
1336 thisDiff = pointer.next(); // Walk past nextDiff.
1337 nextDiff = pointer.hasNext() ? pointer.next() : NULL;
1338 changes = true;
1339 }
1340 else if (thisDiff->text.startsWith(nextDiff->text))
1341 {
1342 // Shift the edit over the next equality.
1343 prevDiff->text.append(nextDiff->text);
1344 thisDiff->text = safeMid(thisDiff->text, nextDiff->text.length());
1345 thisDiff->text.append(nextDiff->text);
1346 pointer.remove(); // Delete nextDiff.
1347 nextDiff = pointer.hasNext() ? pointer.next() : NULL;
1348 changes = true;
1349 }
1350 }
1351 prevDiff = thisDiff;
1352 thisDiff = nextDiff;
1353 nextDiff = pointer.hasNext() ? pointer.next() : NULL;
1354 }
1355 // If shifts were made, the diff needs reordering and another shift sweep.
1356 if (changes)
1357 {
1358 diff_cleanupMerge(diffs);
1359 }
1360}
1361
1362int DiffEngine::diff_xIndex(const ObjectArray<Diff> &diffs, int loc)
1363{
1364 int chars1 = 0;
1365 int chars2 = 0;
1366 int last_chars1 = 0;
1367 int last_chars2 = 0;
1368 Diff *lastDiff;
1369 for(int i = 0; i < diffs.size(); i++)
1370 {
1371 Diff *aDiff = diffs.get(i);
c3c060b8 1372 if (aDiff->operation != DIFF_INSERT)
18a7c1e9
VK
1373 {
1374 // Equality or deletion.
c3c060b8 1375 chars1 += (int)aDiff->text.length();
18a7c1e9 1376 }
c3c060b8 1377 if (aDiff->operation != DIFF_DELETE)
18a7c1e9
VK
1378 {
1379 // Equality or insertion.
c3c060b8 1380 chars2 += (int)aDiff->text.length();
18a7c1e9
VK
1381 }
1382 if (chars1 > loc)
1383 {
1384 // Overshot the location.
1385 lastDiff = aDiff;
1386 break;
1387 }
1388 last_chars1 = chars1;
1389 last_chars2 = chars2;
1390 }
c3c060b8 1391 if (lastDiff->operation == DIFF_DELETE)
18a7c1e9
VK
1392 {
1393 // The location was deleted.
1394 return last_chars2;
1395 }
1396 // Add the remaining character length.
1397 return last_chars2 + (loc - last_chars1);
1398}
1399
1400inline void AppendLines(String& out, const String& text, TCHAR prefix)
1401{
1402 StringList *lines = text.split(_T("\n"));
1403 for(int i = 0; i < lines->size(); i++)
1404 {
1405 const TCHAR *l = lines->get(i);
1406 if (*l != 0)
1407 {
1408 out.append(prefix);
1409 out.append(l);
1410 out.append(_T('\n'));
1411 }
1412 }
1413 delete lines;
1414}
1415
1416String DiffEngine::diff_generateLineDiff(ObjectArray<Diff> *diffs)
1417{
1418 String out;
1419 for(int i = 0; i < diffs->size(); i++)
1420 {
1421 Diff *aDiff = diffs->get(i);
1422 switch(aDiff->operation)
1423 {
c3c060b8 1424 case DIFF_INSERT:
18a7c1e9
VK
1425 AppendLines(out, aDiff->text, _T('+'));
1426 break;
c3c060b8 1427 case DIFF_DELETE:
18a7c1e9
VK
1428 AppendLines(out, aDiff->text, _T('-'));
1429 break;
1430 }
1431 }
1432 return out;
1433}
1434
1435String DiffEngine::diff_text1(const ObjectArray<Diff> &diffs)
1436{
1437 String text;
1438 for(int i = 0; i < diffs.size(); i++)
1439 {
1440 Diff *aDiff = diffs.get(i);
c3c060b8 1441 if (aDiff->operation != DIFF_INSERT)
18a7c1e9
VK
1442 {
1443 text.append(aDiff->text);
1444 }
1445 }
1446 return text;
1447}
1448
1449String DiffEngine::diff_text2(const ObjectArray<Diff> &diffs)
1450{
1451 String text;
1452 for(int i = 0; i < diffs.size(); i++)
1453 {
1454 Diff *aDiff = diffs.get(i);
c3c060b8 1455 if (aDiff->operation != DIFF_DELETE)
18a7c1e9
VK
1456 {
1457 text += aDiff->text;
1458 }
1459 }
1460 return text;
1461}
1462
1463int DiffEngine::diff_levenshtein(const ObjectArray<Diff> &diffs)
1464{
1465 int levenshtein = 0;
1466 int insertions = 0;
1467 int deletions = 0;
1468 for(int i = 0; i < diffs.size(); i++)
1469 {
1470 Diff *aDiff = diffs.get(i);
1471 switch(aDiff->operation)
1472 {
c3c060b8
VK
1473 case DIFF_INSERT:
1474 insertions += (int)aDiff->text.length();
18a7c1e9 1475 break;
c3c060b8
VK
1476 case DIFF_DELETE:
1477 deletions += (int)aDiff->text.length();
18a7c1e9 1478 break;
c3c060b8 1479 case DIFF_EQUAL:
18a7c1e9 1480 // A deletion and an insertion is one substitution.
b9792835 1481 levenshtein += std::max(insertions, deletions);
18a7c1e9
VK
1482 insertions = 0;
1483 deletions = 0;
1484 break;
1485 }
1486 }
b9792835 1487 levenshtein += std::max(insertions, deletions);
18a7c1e9
VK
1488 return levenshtein;
1489}
1490
1491String DiffEngine::diff_toDelta(const ObjectArray<Diff> &diffs)
1492{
1493 String text;
1494 for(int i = 0; i < diffs.size(); i++)
1495 {
1496 Diff *aDiff = diffs.get(i);
1497 switch(aDiff->operation)
1498 {
c3c060b8 1499 case DIFF_INSERT:
18a7c1e9
VK
1500 text.append(_T('+'));
1501 text.append(aDiff->text);
1502 text.append(_T('\t'));
1503 break;
c3c060b8 1504 case DIFF_DELETE:
18a7c1e9
VK
1505 text.appendFormattedString(_T("-%d\t"), aDiff->text.length());
1506 break;
c3c060b8 1507 case DIFF_EQUAL:
18a7c1e9
VK
1508 text.appendFormattedString(_T("=%d\t"), aDiff->text.length());
1509 break;
1510 }
1511 }
1512 if (!text.isEmpty())
1513 {
1514 // Strip off trailing tab character.
1515 text = text.left(text.length() - 1);
1516 }
1517 return text;
1518}
1519
1520ObjectArray<Diff> *DiffEngine::diff_fromDelta(const String &text1, const String &delta)
1521{
1522 ObjectArray<Diff> *diffs = new ObjectArray<Diff>(64, 64, true);
1523 int pointer = 0; // Cursor in text1
1524 StringList *tokens = delta.split(_T("\t"));
1525 for(int i = 0; i < tokens->size(); i++)
1526 {
1527 const TCHAR *token = tokens->get(i);
1528 if (*token == 0)
1529 {
1530 // Blank tokens are ok (from a trailing \t).
1531 continue;
1532 }
1533 // Each token begins with a one character parameter which specifies the
1534 // operation of this token (delete, insert, equality).
1535 String param = safeMid(token, 1);
1536 switch(token[0])
1537 {
1538 case '+':
c3c060b8 1539 diffs->add(new Diff(DIFF_INSERT, param));
18a7c1e9
VK
1540 break;
1541 case '-':
1542 // Fall through.
1543 case '=':
1544 {
1545 int n = _tcstol(param, NULL, 10);
1546 if (n < 0)
1547 {
1548 delete tokens;
1549 return diffs;
1550 }
1551 String text;
1552 text = safeMid(text1, pointer, n);
1553 pointer += n;
1554 if (token[0] == '=')
1555 {
c3c060b8 1556 diffs->add(new Diff(DIFF_EQUAL, text));
18a7c1e9
VK
1557 }
1558 else
1559 {
c3c060b8 1560 diffs->add(new Diff(DIFF_DELETE, text));
18a7c1e9
VK
1561 }
1562 break;
1563 }
1564 default:
1565 delete tokens;
1566 return diffs;
1567 }
1568 }
1569 delete tokens;
1570 return diffs;
1571}
1572
1573/**
1574 * Produce diff between two strings line by line
1575 */
1576String LIBNETXMS_EXPORTABLE GenerateLineDiff(const String& left, const String& right)
1577{
1578 DiffEngine d;
1579 ObjectArray<Diff> *diffs = d.diff_main(left, right);
1580 String result = d.diff_generateLineDiff(diffs);
1581 delete diffs;
1582 return result;
1583}