2021-09-15 21:58:53 -04:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
|
2023-06-24 17:33:04 +12:00
|
|
|
* Copyright (c) 2023, Shannon Booth <shannon.ml.booth@gmail.com>
|
2021-09-15 21:58:53 -04:00
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "Generator.h"
|
|
|
|
|
|
|
|
namespace Diff {
|
|
|
|
|
2023-06-24 12:57:38 +12:00
|
|
|
ErrorOr<Vector<Hunk>> from_text(StringView old_text, StringView new_text)
|
2021-09-15 21:58:53 -04:00
|
|
|
{
|
|
|
|
auto old_lines = old_text.lines();
|
|
|
|
auto new_lines = new_text.lines();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This is a simple implementation of the Longest Common Subsequence algorithm (over
|
|
|
|
* the lines of the text as opposed to the characters). A Dynamic programming approach
|
|
|
|
* is used here.
|
|
|
|
*/
|
|
|
|
|
|
|
|
enum class Direction {
|
2021-09-17 14:29:48 -04:00
|
|
|
Down, // Added a new line
|
|
|
|
Right, // Removed a line
|
2021-09-15 21:58:53 -04:00
|
|
|
Diagonal, // Line remained the same
|
|
|
|
};
|
|
|
|
|
|
|
|
// A single cell in the DP-matrix. Cell (i, j) represents the longest common
|
|
|
|
// sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
|
|
|
|
struct Cell {
|
|
|
|
size_t length;
|
|
|
|
Direction direction;
|
|
|
|
};
|
|
|
|
|
|
|
|
auto dp_matrix = Vector<Cell>();
|
2023-06-24 12:57:38 +12:00
|
|
|
TRY(dp_matrix.try_resize((old_lines.size() + 1) * (new_lines.size() + 1)));
|
2021-09-15 21:58:53 -04:00
|
|
|
|
|
|
|
auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
|
|
|
|
return dp_matrix[i + width * j];
|
|
|
|
};
|
|
|
|
|
|
|
|
// Initialize the first row and column
|
|
|
|
for (size_t i = 0; i <= old_lines.size(); ++i)
|
2021-09-17 14:29:48 -04:00
|
|
|
dp(i, new_lines.size()) = { 0, Direction::Right };
|
2021-09-15 21:58:53 -04:00
|
|
|
|
|
|
|
for (size_t j = 0; j <= new_lines.size(); ++j)
|
2021-09-17 14:29:48 -04:00
|
|
|
dp(old_lines.size(), 0) = { 0, Direction::Down };
|
2021-09-15 21:58:53 -04:00
|
|
|
|
|
|
|
// Fill in the rest of the DP table
|
2021-09-17 14:29:48 -04:00
|
|
|
for (int i = old_lines.size() - 1; i >= 0; --i) {
|
|
|
|
for (int j = new_lines.size() - 1; j >= 0; --j) {
|
|
|
|
if (old_lines[i] == new_lines[j]) {
|
|
|
|
dp(i, j) = { dp(i + 1, j + 1).length + 1, Direction::Diagonal };
|
2021-09-15 21:58:53 -04:00
|
|
|
} else {
|
2021-09-17 14:29:48 -04:00
|
|
|
auto down = dp(i, j + 1).length;
|
|
|
|
auto right = dp(i + 1, j).length;
|
|
|
|
if (down > right)
|
|
|
|
dp(i, j) = { down, Direction::Down };
|
2021-09-15 21:58:53 -04:00
|
|
|
else
|
2021-09-17 14:29:48 -04:00
|
|
|
dp(i, j) = { right, Direction::Right };
|
2021-09-15 21:58:53 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Vector<Hunk> hunks;
|
2021-09-17 14:33:35 -04:00
|
|
|
Hunk cur_hunk;
|
|
|
|
bool in_hunk = false;
|
|
|
|
|
2023-06-24 12:57:38 +12:00
|
|
|
auto update_hunk = [&](size_t i, size_t j, Direction direction) -> ErrorOr<void> {
|
2021-09-17 14:33:35 -04:00
|
|
|
if (!in_hunk) {
|
2023-06-24 17:33:04 +12:00
|
|
|
HunkLocation location;
|
|
|
|
location.old_range.start_line = i;
|
|
|
|
location.new_range.start_line = j;
|
2021-09-17 14:33:35 -04:00
|
|
|
in_hunk = true;
|
2023-06-24 17:33:04 +12:00
|
|
|
cur_hunk = { location, {} };
|
2021-09-17 14:33:35 -04:00
|
|
|
}
|
2023-06-24 17:33:04 +12:00
|
|
|
|
2021-09-17 14:33:35 -04:00
|
|
|
if (direction == Direction::Down) {
|
2023-06-24 17:33:04 +12:00
|
|
|
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Addition, TRY(String::from_utf8(new_lines[j])) }));
|
|
|
|
cur_hunk.location.new_range.number_of_lines++;
|
2021-09-17 14:33:35 -04:00
|
|
|
} else if (direction == Direction::Right) {
|
2023-06-24 17:33:04 +12:00
|
|
|
TRY(cur_hunk.lines.try_append(Line { Line::Operation::Removal, TRY(String::from_utf8(old_lines[i])) }));
|
|
|
|
cur_hunk.location.old_range.number_of_lines++;
|
2021-09-17 14:33:35 -04:00
|
|
|
}
|
2023-06-24 12:57:38 +12:00
|
|
|
|
|
|
|
return {};
|
2021-09-17 14:33:35 -04:00
|
|
|
};
|
|
|
|
|
2023-06-24 12:57:38 +12:00
|
|
|
auto flush_hunk = [&]() -> ErrorOr<void> {
|
2021-09-17 14:33:35 -04:00
|
|
|
if (in_hunk) {
|
2023-06-24 17:33:04 +12:00
|
|
|
// A file with no content has a zero indexed start line.
|
|
|
|
if (cur_hunk.location.new_range.start_line != 0 || cur_hunk.location.new_range.number_of_lines != 0)
|
|
|
|
cur_hunk.location.new_range.start_line++;
|
|
|
|
if (cur_hunk.location.old_range.start_line != 0 || cur_hunk.location.old_range.number_of_lines != 0)
|
|
|
|
cur_hunk.location.old_range.start_line++;
|
|
|
|
|
2023-06-24 12:57:38 +12:00
|
|
|
TRY(hunks.try_append(cur_hunk));
|
2021-09-17 14:33:35 -04:00
|
|
|
in_hunk = false;
|
|
|
|
}
|
2023-06-24 12:57:38 +12:00
|
|
|
|
|
|
|
return {};
|
2021-09-17 14:33:35 -04:00
|
|
|
};
|
|
|
|
|
2021-09-17 14:29:48 -04:00
|
|
|
size_t i = 0;
|
|
|
|
size_t j = 0;
|
2021-09-15 21:58:53 -04:00
|
|
|
|
2021-09-17 14:29:48 -04:00
|
|
|
while (i < old_lines.size() && j < new_lines.size()) {
|
2021-09-15 21:58:53 -04:00
|
|
|
auto& cell = dp(i, j);
|
2021-09-17 14:29:48 -04:00
|
|
|
if (cell.direction == Direction::Down) {
|
2023-06-24 12:57:38 +12:00
|
|
|
TRY(update_hunk(i, j, cell.direction));
|
2021-09-17 14:29:48 -04:00
|
|
|
++j;
|
|
|
|
} else if (cell.direction == Direction::Right) {
|
2023-06-24 12:57:38 +12:00
|
|
|
TRY(update_hunk(i, j, cell.direction));
|
2021-09-17 14:29:48 -04:00
|
|
|
++i;
|
|
|
|
} else {
|
|
|
|
++i;
|
|
|
|
++j;
|
2023-06-24 12:57:38 +12:00
|
|
|
TRY(flush_hunk());
|
2021-09-15 21:58:53 -04:00
|
|
|
}
|
|
|
|
}
|
2022-02-06 22:54:20 +02:00
|
|
|
|
2022-02-25 22:06:54 +01:00
|
|
|
while (i < old_lines.size()) {
|
2023-06-24 12:57:38 +12:00
|
|
|
TRY(update_hunk(i, new_lines.is_empty() ? 0 : new_lines.size() - 1, Direction::Right)); // Remove a line
|
2022-02-06 22:54:20 +02:00
|
|
|
++i;
|
|
|
|
}
|
2022-02-25 22:06:54 +01:00
|
|
|
while (j < new_lines.size()) {
|
2023-06-24 12:57:38 +12:00
|
|
|
TRY(update_hunk(old_lines.is_empty() ? 0 : old_lines.size() - 1, j, Direction::Down)); // Add a line
|
2022-02-06 22:54:20 +02:00
|
|
|
++j;
|
|
|
|
}
|
|
|
|
|
2023-06-24 12:57:38 +12:00
|
|
|
TRY(flush_hunk());
|
2021-09-15 21:58:53 -04:00
|
|
|
|
|
|
|
return hunks;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|