From d5781d488352f103c772b8db9b8277d7bd93a7f7 Mon Sep 17 00:00:00 2001 From: "Winter, Felix" Date: Wed, 18 Nov 2020 18:07:19 +0100 Subject: [PATCH 1/4] Added library files for edit distance global --- share/minizinc/linear/edit_distance.mzn | 290 ++++++++++++++++++++++++ share/minizinc/std/edit_distance.mzn | 206 +++++++++++++++++ 2 files changed, 496 insertions(+) create mode 100644 share/minizinc/linear/edit_distance.mzn create mode 100644 share/minizinc/std/edit_distance.mzn diff --git a/share/minizinc/linear/edit_distance.mzn b/share/minizinc/linear/edit_distance.mzn new file mode 100644 index 000000000..9e747ce84 --- /dev/null +++ b/share/minizinc/linear/edit_distance.mzn @@ -0,0 +1,290 @@ +% Longest common subsequence distance (insertion cost = deletion cost = 1, substitution cost = 2) +predicate edit_distance(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: u1 = length(S1); + var 0..u1: p1; + var 0..u1: p2; + var 0..u1: kept_sum; + } in + ED = p1 + p2 - 2*kept_sum + /\ + edit_distance_kept(max_char, S1, S2, p1, p2, kept_sum); + +% Fixed insertion/deletion/substitution costs +predicate edit_distance(int: max_char, + int: C_ins, + int: C_del, + int: C_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: u1 = length(S1); + var 0..u1: p1; + var 0..u1: p2; + var 0..1: p1_greater; + var 0..u1: kept_sum; + var 0..u1: insertions; + var 0..u1: deletions; + var 0..u1: substitutions; + } in + p1 + (1-p1_greater)*u1 >= p2 + /\ + p2 + p1_greater*u1 >= p1 + /\ + substitutions >= p1 - kept_sum - p1_greater * u1 + /\ + substitutions >= p2 - kept_sum - (1-p1_greater) * u1 + /\ + insertions >= p2-p1 + /\ + deletions >= p1-p2 + /\ + ED = C_ins * insertions + C_sub * substitutions + C_del * deletions + /\ + edit_distance_kept(max_char, S1, S2, p1, p2, kept_sum); + +% Decomposition from the paper "Constraint-Based Scheduling for Paint Shops in the Automotive Supply Industry", Winter and Musliu, ACM Transactions on Intelligent Systems and Technology, 2020. +predicate edit_distance_kept(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: p1, + var int: p2, + var int: kept_sum) = + let { int: u1 = length(S1); + int: u2 = length(S2); + array[1..u1] of var 0..u1: kept_r1_from_position; + array[1..u1] of var 0..u1: kept_r2_from_position; + array[1..u1,0..u1] of var 0..1: kept_r2_from_position_i; + array[1..u1,0..u1] of var 0..1: kept_r1_from_position_i; + array[1..u1] of var 1..max_char: z; + array [1..u1] of var 0..1: kept; + array [1..u1] of var 0..1: used1; + array [1..u1] of var 0..1: used2; + array [1..u1] of var 0..max_char: kept_char; + } in + % check that strings are correctly aligned + u1 = u2 + /\ + % see which positions are used (i.e. positions which are not 0) + forall(i in 1..u1)( + S1[i] <= max_char*used1[i] + /\ + used1[i] < 1 + S1[i] + /\ + S2[i] <= max_char*used2[i] + /\ + used2[i] < 1 + S2[i] + ) + % determine lengths and substitutions + /\ + p1 = sum(i in 1..u1)(used1[i]) + /\ + p2 = sum(i in 1..u2)(used2[i]) + /\ + kept_sum = sum(i in 1..u1)(kept[i]) + /\ + forall(k in 2..u1)( + kept_r1_from_position[k] + (1-kept[k])*u1 > kept_r1_from_position[k-1] + /\ + kept_r2_from_position[k] + (1-kept[k])*u1 > kept_r2_from_position[k-1] + ) + /\ + forall(i in 1..u1-1)( + kept[i+1] <= kept[i] + /\ + used1[i+1] <= used1[i] + /\ + used2[i+1] <= used2[i] + ) + /\ + forall(k in 1..u1)( + kept[k]*u1 >= kept_r1_from_position[k] + /\ + kept[k]*u1 >= kept_r2_from_position[k] + /\ + kept[k] <= kept_r1_from_position[k] + /\ + kept[k] <= kept_r2_from_position[k] + ) + /\ + forall(k in 1..u1)( + kept_r2_from_position[k] - (1-kept[k])*u1 <= p1 + /\ + kept_r1_from_position[k] - (1-kept[k])*u2 <= p2 + ) + /\ + forall(i in 1..u1)( + sum(j in 0..u1)(kept_r2_from_position_i[i,j]) == 1 + /\ + sum(j in 0..u1)(kept_r2_from_position_i[i,j]*j) == kept_r2_from_position[i] + /\ + sum(j in 0..u1)(kept_r1_from_position_i[i,j]) == 1 + /\ + sum(j in 0..u1)(kept_r1_from_position_i[i,j]*j) == kept_r1_from_position[i] + ) + /\ + forall(i in 1..u1)( + z[i] <= 1 + (1-kept_r2_from_position_i[i,0])*max_char + ) + /\ + forall(i in 1..u1, j in 1..u1)( + z[i] <= S1[j] + (1-kept_r2_from_position_i[i,j])*max_char + /\ + z[i] >= S1[j] - (1-kept_r2_from_position_i[i,j])*max_char + ) + /\ + forall(i in 1..u1, j in 1..u1)( + z[i] <= S2[j] + (1-kept_r1_from_position_i[i,j])*max_char + /\ + z[i] >= S2[j] - (1-kept_r1_from_position_i[i,j])*max_char + ); + +% Based on the MIP formulation proposed in the paper: +% "Finding median and center strings for a probability distribution on a set of strings under Levenshtein distance based on integer linear programming", Hayashida and Koyano, BIOSTEC 2016. +predicate edit_distance(int: max_char, + array[int] of int: W_ins, + array[int] of int: W_del, + array[int, int] of int: W_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: u1 = length(S1); + int: u2 = length(S2); + + array[int] of int: W_ins_0 = array1d(0..length(W_ins), [0] ++ W_ins); + int: max_ic = max(W_ins); + array[int] of int: W_del_0 = array1d(0..length(W_del), [0] ++ W_del); + int: max_dc = max(W_del); + array[int, int] of int: W_sub_0 = array2d(0..max_char, 0..max_char, + [ if i == 0 then W_ins_0[j] else if j == 0 then W_del_0[i] else W_sub[i,j] endif endif | i in 0..max_char, j in 0..max_char]); + int: max_sc = max(W_sub_0); + + array [0..u1, 0..u2] of var 0..1: x; + array [0..u1, 0..u2] of var 0..max_dc: x_c; + array [0..u1, 0..u2] of var 0..1: y; + array [0..u1, 0..u2] of var 0..max_ic: y_c; + array [0..u1, 0..u2] of var 0..1: z; + + array [1..u1, 1..u2] of var 0..1: g; + array [1..u1, 1..u2] of var 0..1: h; + array [1..u1, 1..u2] of var 0..max_sc: h_c; + + + array [1..u1] of var 0..1: used1; + array [1..u2] of var 0..1: used2; + } in + % see which positions are used (i.e. positions which are not 0) + forall(i in 1..u1)( + S1[i] <= max_char*used1[i] + /\ + used1[i] < 1 + S1[i] + ) + /\ + forall(i in 1..u2)( + S2[i] <= max_char*used2[i] + /\ + used2[i] < 1 + S2[i] + ) + /\ + % check that strings are correctly aligned + u1 = u2 + /\ + forall(i in 1..u1-1)( + used1[i+1] <= used1[i] + ) + /\ + forall(i in 1..u2-1)( + used2[i+1] <= used2[i] + ) + /\ + % a1 + 1 = x[1,0] + y[1,0] + z[1,1] + /\ + % a2 + forall(i in 1..u1-1)( + x[i,0] = x[i+1,0] + y[i,1] + z[i+1,1] + ) + /\ + % a3 + x[u1,0] = y[u1,1] + /\ + % a4 + forall(j in 1..u2-1)( + y[0,j] = x[1,j] + y[0,j+1] + z[1,j+1] + ) + /\ + % a5 + y[0,u2] = x[1,u2] + /\ + % a6 + forall(i in 1..u1-1, j in 1..u2-1)( + x[i,j] + y[i,j] + z[i,j] = x[i+1,j] + y[i,j+1] + z[i+1,j+1] + ) + /\ + % a7 + forall(j in 1..u2-1)( + x[u1,j] + y[u1,j] + z[u1,j] = y[u1,j+1] + ) + /\ + % a8 + forall(i in 1..u1-1)( + x[i,u2] + y[i,u2] + z[i,u2] = x[i+1,u2] + ) + /\ + % a9 + x[u1,u2] + y[u1,u2] + z[u1,u2] = 1 + /\ + % special 0 length string constraint, we need to force a variable in this case + y[0,u2] * u2 >= u2 * (1-u1) + /\ + % b + forall(j in 1..u2)( + y[u1,j] >= (1/u2) * (j-u2) + ) + /\ + % c1 + forall(i in 1..u1, j in 1..u2)( + S1[i] - S2[j] <= max_char * g[i,j] + ) + /\ + % c2 + forall(i in 1..u1, j in 1..u2)( + S2[j] - S1[i] <= max_char * g[i,j] + ) + /\ + % d2 + forall(i in 1..u1, j in 1..u2)( + h[i,j] >= z[i,j] + g[i,j] - 1 + ) + /\ + forall(i in 1..u1, j in 1..u2)( + h[i,j] <= 1/2 * (z[i,j] + g[i,j]) + ) + /\ + % if the empty character zero is used, we forbid substitution + forall(i in 1..u1, j in 1..u1)( + S1[i] >= h[i,j] + /\ + S2[j] >= h[i,j] + ) + /\ + % linearize deletion costs + forall(i in 1..u1, j in 0..u2)( + x_c[i,j] >= W_del_0[S1[i]] - (1-x[i,j])*max_dc + ) + /\ + % linearize insertion costs + forall(i in 0..u1, j in 1..u2)( + y_c[i,j] >= W_ins_0[S2[j]] - (1-y[i,j])*max_ic + ) + /\ + % linearize substitution costs + forall(i in 1..u1, j in 1..u2)( + h_c[i,j] >= W_sub_0[S1[i],S2[j]] - (1-h[i,j])*max_sc + ) + /\ + ED = sum(i in 1..u1)(x_c[i,0]) + + sum(j in 1..u2)(y_c[0,j]) + + sum(i in 1..u1, j in 1..u2)(x_c[i,j] + y_c[i,j] + h_c[i,j]); \ No newline at end of file diff --git a/share/minizinc/std/edit_distance.mzn b/share/minizinc/std/edit_distance.mzn new file mode 100644 index 000000000..ffb36d751 --- /dev/null +++ b/share/minizinc/std/edit_distance.mzn @@ -0,0 +1,206 @@ +/** @group globals + Constrains \a ED to be greater than or equal to the minimum edit distance between two strings \a S1 and \a S2. + Any character insertion or deletion has a cost of 1, whereas any character substitution has a cost of 2. + All characters need to be numbered from 1 to \a max_char. + The integer arrays \a S1 and \a S2 need to have the same length, but may contain zeros at the end to indicate an "empty" character. + Example: Let \a max_char = 2, \a S1 = [1,1,0,0,0], \a S2 = [1,2,1,0,0], then \a ED = 1, as the minimum edit distance is 1 between the strings "11" and "121" (a single '2' character needs to be inserted). + +@param max_char: The maximum character that can occur in a string (all characters should be numbered from 1..max_char, 0 represents an empty character) +@param S1: Integer array that represents the first string, the index set has to be 1..length(\a S1 ) +@param S2: Integer array that represents the second string, the index set has to be 1..length(\a S2 ) +@param ED: Variable that is constrained to be >= than the minimum edit distance between \a S1 to \a S2 +*/ +predicate edit_distance(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { + int: len1 = length(S1); + int: len2 = length(S2); + var 0..len1: p1; + var 0..len2: p2; + var 0..len1: kept_sum; + } in + ED = p1 + p2 - 2*kept_sum + /\ + edit_distance_kept(max_char, S1, S2, p1, p2, kept_sum); + +/** @group globals + Constrains \a ED to be greater than or equal to the minimum edit distance between two strings \a S1 and \a S2. + This constraint functions in the same way as the constraint \a edit_distance(max_char, S1, S2, ED), however it additionally specifies custom character insertion, deletion and substitution costs. + All costs should be >= 1 and the following inequality must hold: C_sub <= C_ins + C_del + +@param max_char: The maximum character that can occur in a string (all characters should be numbered from 1..max_char, 0 represents an empty character) +@param C_ins: Costs for a single character insertion +@param C_del: Costs for a single character deletion +@param C_sub: Costs for a single character substitution +@param S1: Integer array that represents the first string, the index set has to be 1..length(\a S1 ) +@param S2: Integer array that represents the second string, the index set has to be 1..length(\a S2 ) +@param ED: Variable that is constrained to be >= than the minimum edit distance between \a S1 to \a S2 +*/ +predicate edit_distance(int: max_char, + int: C_ins, + int: C_del, + int: C_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: len1 = length(S1); + int: len2 = length(S2); + var 0..len1: p1; + var 0..len2: p2; + var 0..len1: kept_sum; + var 0..len2: insertions; + var 0..len1: deletions; + var 0..max(len1,len2): substitutions; + } in + substitutions = min(p1,p2) - kept_sum + /\ + insertions = max(p2-p1, 0) + /\ + deletions = max(p1-p2, 0) + /\ + ED = C_ins * insertions + C_sub * substitutions + C_del * deletions + /\ + edit_distance_kept(max_char, S1, S2, p1, p2, kept_sum); + +% Decomposition from the paper "Constraint-Based Scheduling for Paint Shops in the Automotive Supply Industry", Winter and Musliu, ACM Transactions on Intelligent Systems and Technology, 2020. +predicate edit_distance_kept(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: p1, + var int: p2, + var int: kept_sum) = + let { int: l1 = min(index_set(S1)); + int: l2 = min(index_set(S2)); + int: u1 = max(index_set(S1)); + int: u2 = max(index_set(S2)); + int: len1 = length(S1); + int: len2 = length(S2); + var 0..len2: insertions; + var 0..len1: deletions; + var 0..max(len1,len2): substitutions; + array[l1..u1] of var 0..len2: kept_r1_from_position; + array[l2..u2] of var 0..len1: kept_r2_from_position; + } in + p1 = sum(i in l1..u1 where S1[i] != 0)(1) + /\ + p2 = sum(i in l2..u2 where S2[i] != 0)(1) + /\ + kept_sum = sum(i in l1..u1 where kept_r1_from_position[i] != 0)(1) + /\ + forall(i in l1..u1-1)( + S1[i] == 0 -> S1[i+1] == 0 + ) + /\ + forall(i in l2..u2-1)( + S2[i] == 0 -> S2[i+1] == 0 + ) + /\ + forall(i in l1+1..u1 where kept_r1_from_position[i] != 0) ( + kept_r1_from_position[i] > kept_r1_from_position[i-1] + ) + /\ + forall(i in l2+1..u2 where kept_r2_from_position[i] != 0) ( + kept_r2_from_position[i] > kept_r2_from_position[i-1] + ) + /\ + forall(i in l1..u1) ( + kept_r1_from_position[i] > 0 <-> kept_r2_from_position[i] > 0 + ) + /\ + forall(j in l2..u2-1) ( + (kept_r1_from_position[j] = 0) -> (kept_r1_from_position[j+1] = 0) + ) + /\ + forall(j in l1..u1-1) ( + (kept_r2_from_position[j] = 0) -> (kept_r2_from_position[j+1] = 0) + ) + /\ + forall(i in l2..u2 where kept_r2_from_position[i] != 0) ( + kept_r2_from_position[i] <= p1 + ) + /\ + forall(i in l1..u1 where kept_r1_from_position[i] != 0) ( + kept_r1_from_position[i] <= p2 + ) + /\ + forall(i in l1..u1 where kept_r1_from_position[i] != 0) ( + S1[kept_r2_from_position[i]] = S2[kept_r1_from_position[i]] + ); + + +% Dynamic programming routine decomposition for insertion/deletion/substition cost defined by character +% DP routine first proposed in the paper "The String-to-String Correction Problem", Wagner and Fischer, Journal of the ACM, 1978. +/** @group globals + Constrains \a ED to be greater than or equal to the minimum edit distance between two strings \a S1 and \a S2. + This constraint functions in the same way as the constraint \a edit_distance(max_char, S1, S2, ED), however it additionally specifies custom character insertion, deletion and substitution costs for each individual combination of characters. + All costs should be >= 1, except the substitution costs of a character with itself which should be 0 (W_sub[c,c] = 0). + Furthermore, the following inequality must hold for all pairs of characters c1 and c2: \a W_sub[c1,c2] <= \a W_del[c1] + \a W_ins[c2] + +@param max_char: The maximum character that can occur in a string (all characters should be numbered from 1..max_char, 0 represents an empty character) +@param W_ins: Integer array containing the costs for single character insertions +@param W_del: Integer array containing the costs for single character deletions +@param W_sub: Integer array containing the costs for single character substitutions +@param S1: Integer array that represents the first string, the index set has to be 1..length(\a S1 ) +@param S2: Integer array that represents the second string, the index set has to be 1..length(\a S2 ) +@param ED: Variable that is constrained to be >= than the minimum edit distance between \a S1 to \a S2 +*/ +predicate edit_distance(int: max_char, + array[int] of int: W_ins, + array[int] of int: W_del, + array[int, int] of int: W_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: l1 = min(index_set(S1)) - 1; + int: l2 = min(index_set(S2)) - 1; + int: u1 = max(index_set(S1)); + int: u2 = max(index_set(S2)); + array[int] of int: W_ins_0 = array1d(0..length(W_ins), [0] ++ W_ins); + array[int] of int: W_del_0 = array1d(0..length(W_del), [0] ++ W_del); + array[int, int] of int: W_sub_0 = array2d(0..max_char, 0..max_char, + [ if i == 0 then W_ins_0[j] else if j == 0 then W_del_0[i] else W_sub[i,j] endif endif | i in 0..max_char, j in 0..max_char]); + array[l1..u1,l2..u2] of var int: T; + } in + forall(i in l1+1..u1-1)( + S1[i] == 0 -> S1[i+1] == 0 + ) + /\ + forall(i in l2+1..u2-1)( + S2[i] == 0 -> S2[i+1] == 0 + ) + /\ + T[l1,l2] = 0 /\ + T[u1,u2] = ED /\ + forall(i in l1+1..u1, j in l2+1..u2)( + T[i,j] >= 0 /\ T[i,j] <= sum(i2 in 1..i)(W_del_0[S1[i2]]) + sum(j2 in 1..j)(W_ins_0[S2[j2]]) + ) + /\ + forall(j in l2+1..u2)( + T[l1,j] = sum(j2 in 1..j)(W_ins_0[S2[j2]]) + ) + /\ + forall(i in l1+1..u1)( + T[i,l2] = sum(i2 in 1..i)(W_del_0[S1[i2]]) + ) + /\ + forall(i in l1+1..u1, j in l2+1..u2)( + T[i,j] = + if S1[i] = S2[j] then + T[i-1,j-1] + else + if S1[i] = 0 then + T[i-1,j] + else + if S2[j] = 0 then + T[i,j-1] + else + min([T[i-1,j]+W_del_0[S1[i]], + T[i,j-1]+W_ins_0[S2[j]], + T[i-1,j-1]+W_sub_0[S1[i], S2[j]] + ]) + endif + endif + endif + ); \ No newline at end of file From 25eb16a43126bd7ab2d335078c77adbc641d17cd Mon Sep 17 00:00:00 2001 From: "Winter, Felix" Date: Thu, 19 Nov 2020 16:00:00 +0100 Subject: [PATCH 2/4] Moved decompositions to fzn_edit_distance, included assertions, fixed small problem with a decomposition --- share/minizinc/linear/edit_distance.mzn | 290 ---------------- share/minizinc/linear/fzn_edit_distance.mzn | 363 ++++++++++++++++++++ share/minizinc/std/edit_distance.mzn | 195 +++-------- share/minizinc/std/fzn_edit_distance.mzn | 182 ++++++++++ 4 files changed, 587 insertions(+), 443 deletions(-) delete mode 100644 share/minizinc/linear/edit_distance.mzn create mode 100644 share/minizinc/linear/fzn_edit_distance.mzn create mode 100644 share/minizinc/std/fzn_edit_distance.mzn diff --git a/share/minizinc/linear/edit_distance.mzn b/share/minizinc/linear/edit_distance.mzn deleted file mode 100644 index 9e747ce84..000000000 --- a/share/minizinc/linear/edit_distance.mzn +++ /dev/null @@ -1,290 +0,0 @@ -% Longest common subsequence distance (insertion cost = deletion cost = 1, substitution cost = 2) -predicate edit_distance(int: max_char, - array[int] of var int: S1, - array[int] of var int: S2, - var int: ED) = - let { int: u1 = length(S1); - var 0..u1: p1; - var 0..u1: p2; - var 0..u1: kept_sum; - } in - ED = p1 + p2 - 2*kept_sum - /\ - edit_distance_kept(max_char, S1, S2, p1, p2, kept_sum); - -% Fixed insertion/deletion/substitution costs -predicate edit_distance(int: max_char, - int: C_ins, - int: C_del, - int: C_sub, - array[int] of var int: S1, - array[int] of var int: S2, - var int: ED) = - let { int: u1 = length(S1); - var 0..u1: p1; - var 0..u1: p2; - var 0..1: p1_greater; - var 0..u1: kept_sum; - var 0..u1: insertions; - var 0..u1: deletions; - var 0..u1: substitutions; - } in - p1 + (1-p1_greater)*u1 >= p2 - /\ - p2 + p1_greater*u1 >= p1 - /\ - substitutions >= p1 - kept_sum - p1_greater * u1 - /\ - substitutions >= p2 - kept_sum - (1-p1_greater) * u1 - /\ - insertions >= p2-p1 - /\ - deletions >= p1-p2 - /\ - ED = C_ins * insertions + C_sub * substitutions + C_del * deletions - /\ - edit_distance_kept(max_char, S1, S2, p1, p2, kept_sum); - -% Decomposition from the paper "Constraint-Based Scheduling for Paint Shops in the Automotive Supply Industry", Winter and Musliu, ACM Transactions on Intelligent Systems and Technology, 2020. -predicate edit_distance_kept(int: max_char, - array[int] of var int: S1, - array[int] of var int: S2, - var int: p1, - var int: p2, - var int: kept_sum) = - let { int: u1 = length(S1); - int: u2 = length(S2); - array[1..u1] of var 0..u1: kept_r1_from_position; - array[1..u1] of var 0..u1: kept_r2_from_position; - array[1..u1,0..u1] of var 0..1: kept_r2_from_position_i; - array[1..u1,0..u1] of var 0..1: kept_r1_from_position_i; - array[1..u1] of var 1..max_char: z; - array [1..u1] of var 0..1: kept; - array [1..u1] of var 0..1: used1; - array [1..u1] of var 0..1: used2; - array [1..u1] of var 0..max_char: kept_char; - } in - % check that strings are correctly aligned - u1 = u2 - /\ - % see which positions are used (i.e. positions which are not 0) - forall(i in 1..u1)( - S1[i] <= max_char*used1[i] - /\ - used1[i] < 1 + S1[i] - /\ - S2[i] <= max_char*used2[i] - /\ - used2[i] < 1 + S2[i] - ) - % determine lengths and substitutions - /\ - p1 = sum(i in 1..u1)(used1[i]) - /\ - p2 = sum(i in 1..u2)(used2[i]) - /\ - kept_sum = sum(i in 1..u1)(kept[i]) - /\ - forall(k in 2..u1)( - kept_r1_from_position[k] + (1-kept[k])*u1 > kept_r1_from_position[k-1] - /\ - kept_r2_from_position[k] + (1-kept[k])*u1 > kept_r2_from_position[k-1] - ) - /\ - forall(i in 1..u1-1)( - kept[i+1] <= kept[i] - /\ - used1[i+1] <= used1[i] - /\ - used2[i+1] <= used2[i] - ) - /\ - forall(k in 1..u1)( - kept[k]*u1 >= kept_r1_from_position[k] - /\ - kept[k]*u1 >= kept_r2_from_position[k] - /\ - kept[k] <= kept_r1_from_position[k] - /\ - kept[k] <= kept_r2_from_position[k] - ) - /\ - forall(k in 1..u1)( - kept_r2_from_position[k] - (1-kept[k])*u1 <= p1 - /\ - kept_r1_from_position[k] - (1-kept[k])*u2 <= p2 - ) - /\ - forall(i in 1..u1)( - sum(j in 0..u1)(kept_r2_from_position_i[i,j]) == 1 - /\ - sum(j in 0..u1)(kept_r2_from_position_i[i,j]*j) == kept_r2_from_position[i] - /\ - sum(j in 0..u1)(kept_r1_from_position_i[i,j]) == 1 - /\ - sum(j in 0..u1)(kept_r1_from_position_i[i,j]*j) == kept_r1_from_position[i] - ) - /\ - forall(i in 1..u1)( - z[i] <= 1 + (1-kept_r2_from_position_i[i,0])*max_char - ) - /\ - forall(i in 1..u1, j in 1..u1)( - z[i] <= S1[j] + (1-kept_r2_from_position_i[i,j])*max_char - /\ - z[i] >= S1[j] - (1-kept_r2_from_position_i[i,j])*max_char - ) - /\ - forall(i in 1..u1, j in 1..u1)( - z[i] <= S2[j] + (1-kept_r1_from_position_i[i,j])*max_char - /\ - z[i] >= S2[j] - (1-kept_r1_from_position_i[i,j])*max_char - ); - -% Based on the MIP formulation proposed in the paper: -% "Finding median and center strings for a probability distribution on a set of strings under Levenshtein distance based on integer linear programming", Hayashida and Koyano, BIOSTEC 2016. -predicate edit_distance(int: max_char, - array[int] of int: W_ins, - array[int] of int: W_del, - array[int, int] of int: W_sub, - array[int] of var int: S1, - array[int] of var int: S2, - var int: ED) = - let { int: u1 = length(S1); - int: u2 = length(S2); - - array[int] of int: W_ins_0 = array1d(0..length(W_ins), [0] ++ W_ins); - int: max_ic = max(W_ins); - array[int] of int: W_del_0 = array1d(0..length(W_del), [0] ++ W_del); - int: max_dc = max(W_del); - array[int, int] of int: W_sub_0 = array2d(0..max_char, 0..max_char, - [ if i == 0 then W_ins_0[j] else if j == 0 then W_del_0[i] else W_sub[i,j] endif endif | i in 0..max_char, j in 0..max_char]); - int: max_sc = max(W_sub_0); - - array [0..u1, 0..u2] of var 0..1: x; - array [0..u1, 0..u2] of var 0..max_dc: x_c; - array [0..u1, 0..u2] of var 0..1: y; - array [0..u1, 0..u2] of var 0..max_ic: y_c; - array [0..u1, 0..u2] of var 0..1: z; - - array [1..u1, 1..u2] of var 0..1: g; - array [1..u1, 1..u2] of var 0..1: h; - array [1..u1, 1..u2] of var 0..max_sc: h_c; - - - array [1..u1] of var 0..1: used1; - array [1..u2] of var 0..1: used2; - } in - % see which positions are used (i.e. positions which are not 0) - forall(i in 1..u1)( - S1[i] <= max_char*used1[i] - /\ - used1[i] < 1 + S1[i] - ) - /\ - forall(i in 1..u2)( - S2[i] <= max_char*used2[i] - /\ - used2[i] < 1 + S2[i] - ) - /\ - % check that strings are correctly aligned - u1 = u2 - /\ - forall(i in 1..u1-1)( - used1[i+1] <= used1[i] - ) - /\ - forall(i in 1..u2-1)( - used2[i+1] <= used2[i] - ) - /\ - % a1 - 1 = x[1,0] + y[1,0] + z[1,1] - /\ - % a2 - forall(i in 1..u1-1)( - x[i,0] = x[i+1,0] + y[i,1] + z[i+1,1] - ) - /\ - % a3 - x[u1,0] = y[u1,1] - /\ - % a4 - forall(j in 1..u2-1)( - y[0,j] = x[1,j] + y[0,j+1] + z[1,j+1] - ) - /\ - % a5 - y[0,u2] = x[1,u2] - /\ - % a6 - forall(i in 1..u1-1, j in 1..u2-1)( - x[i,j] + y[i,j] + z[i,j] = x[i+1,j] + y[i,j+1] + z[i+1,j+1] - ) - /\ - % a7 - forall(j in 1..u2-1)( - x[u1,j] + y[u1,j] + z[u1,j] = y[u1,j+1] - ) - /\ - % a8 - forall(i in 1..u1-1)( - x[i,u2] + y[i,u2] + z[i,u2] = x[i+1,u2] - ) - /\ - % a9 - x[u1,u2] + y[u1,u2] + z[u1,u2] = 1 - /\ - % special 0 length string constraint, we need to force a variable in this case - y[0,u2] * u2 >= u2 * (1-u1) - /\ - % b - forall(j in 1..u2)( - y[u1,j] >= (1/u2) * (j-u2) - ) - /\ - % c1 - forall(i in 1..u1, j in 1..u2)( - S1[i] - S2[j] <= max_char * g[i,j] - ) - /\ - % c2 - forall(i in 1..u1, j in 1..u2)( - S2[j] - S1[i] <= max_char * g[i,j] - ) - /\ - % d2 - forall(i in 1..u1, j in 1..u2)( - h[i,j] >= z[i,j] + g[i,j] - 1 - ) - /\ - forall(i in 1..u1, j in 1..u2)( - h[i,j] <= 1/2 * (z[i,j] + g[i,j]) - ) - /\ - % if the empty character zero is used, we forbid substitution - forall(i in 1..u1, j in 1..u1)( - S1[i] >= h[i,j] - /\ - S2[j] >= h[i,j] - ) - /\ - % linearize deletion costs - forall(i in 1..u1, j in 0..u2)( - x_c[i,j] >= W_del_0[S1[i]] - (1-x[i,j])*max_dc - ) - /\ - % linearize insertion costs - forall(i in 0..u1, j in 1..u2)( - y_c[i,j] >= W_ins_0[S2[j]] - (1-y[i,j])*max_ic - ) - /\ - % linearize substitution costs - forall(i in 1..u1, j in 1..u2)( - h_c[i,j] >= W_sub_0[S1[i],S2[j]] - (1-h[i,j])*max_sc - ) - /\ - ED = sum(i in 1..u1)(x_c[i,0]) - + sum(j in 1..u2)(y_c[0,j]) - + sum(i in 1..u1, j in 1..u2)(x_c[i,j] + y_c[i,j] + h_c[i,j]); \ No newline at end of file diff --git a/share/minizinc/linear/fzn_edit_distance.mzn b/share/minizinc/linear/fzn_edit_distance.mzn new file mode 100644 index 000000000..524590ff3 --- /dev/null +++ b/share/minizinc/linear/fzn_edit_distance.mzn @@ -0,0 +1,363 @@ +% Longest common subsequence distance (insertion cost = deletion cost = 1, substitution cost = 2) +predicate fzn_edit_distance(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: len = length(S1); + var 0..len: p1; + var 0..len: p2; + var 0..len: kept_sum; + array[1..len] of var 0..len: kept_r1_from_position; + array[1..len] of var 0..len: kept_r2_from_position; + } in + ED = p1 + p2 - 2 * kept_sum + /\ + fzn_edit_distance_kept(max_char, array1d(1..len, S1), array1d(1..len, S2), p1, p2, kept_sum); + +% Fixed insertion/deletion/substitution costs +predicate fzn_edit_distance(int: max_char, + int: C_ins, + int: C_del, + int: C_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: len = length(S1); + var 0..len: insertions; + var 0..len: deletions; + var 0..len: substitutions; + } in + ED = C_ins * insertions + C_sub * substitutions + C_del * deletions + /\ + fzn_edit_distance_kept_general(max_char, array1d(1..len, S1), array1d(1..len, S2), insertions, deletions, substitutions); + +% Linearized decomposition from the paper "Constraint-Based Scheduling for Paint Shops in the Automotive Supply Industry", Winter and Musliu, ACM Transactions on Intelligent Systems and Technology, 2020. +predicate fzn_edit_distance_kept(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: p1, + var int: p2, + var int: kept_sum) = + let { int: len = length(S1); + array[1..len] of var 0..len: kept_r1_from_position; + array[1..len] of var 0..len: kept_r2_from_position; + array[1..len,0..len] of var 0..1: kept_r2_from_position_i; + array[1..len,0..len] of var 0..1: kept_r1_from_position_i; + array[1..len] of var 1..max_char: z; + array[1..len] of var 0..1: kept; + array[1..len] of var 0..1: used1; + array[1..len] of var 0..1: used2; + array[1..len] of var 0..max_char: kept_char; + } in + % see which positions are used (i.e. positions which are not 0) + forall(i in 1..len)( + S1[i] <= max_char*used1[i] + /\ + used1[i] < 1 + S1[i] + /\ + S2[i] <= max_char*used2[i] + /\ + used2[i] < 1 + S2[i] + ) + /\ + p1 = sum(i in 1..len)(used1[i]) + /\ + p2 = sum(i in 1..len)(used2[i]) + /\ + kept_sum = sum(i in 1..len)(kept[i]) + /\ + forall(k in 2..len)( + kept_r1_from_position[k] + (1-kept[k])*len > kept_r1_from_position[k-1] + /\ + kept_r2_from_position[k] + (1-kept[k])*len > kept_r2_from_position[k-1] + ) + /\ + forall(i in 1..len-1)( + kept[i+1] <= kept[i] + /\ + used1[i+1] <= used1[i] + /\ + used2[i+1] <= used2[i] + ) + /\ + forall(k in 1..len)( + kept[k]*len >= kept_r1_from_position[k] + /\ + kept[k]*len >= kept_r2_from_position[k] + /\ + kept[k] <= kept_r1_from_position[k] + /\ + kept[k] <= kept_r2_from_position[k] + ) + /\ + forall(k in 1..len)( + kept_r2_from_position[k] - (1-kept[k])*len <= p1 + /\ + kept_r1_from_position[k] - (1-kept[k])*len <= p2 + ) + /\ + forall(i in 1..len)( + sum(j in 0..len)(kept_r2_from_position_i[i,j]) == 1 + /\ + sum(j in 0..len)(kept_r2_from_position_i[i,j]*j) == kept_r2_from_position[i] + /\ + sum(j in 0..len)(kept_r1_from_position_i[i,j]) == 1 + /\ + sum(j in 0..len)(kept_r1_from_position_i[i,j]*j) == kept_r1_from_position[i] + ) + /\ + forall(i in 1..len)( + z[i] <= 1 + (1-kept_r2_from_position_i[i,0])*max_char + ); + +% Linearized decomposition based on the paper "Constraint-Based Scheduling for Paint Shops in the Automotive Supply Industry", Winter and Musliu, ACM Transactions on Intelligent Systems and Technology, 2020. +predicate fzn_edit_distance_kept_general(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: insertions, + var int: deletions, + var int: substitutions) = + let { int: len = length(S1); + var 0..len: p1; + var 0..len: p2; + var 0..1: p1_greater; + var 0..len: kept_sum; + array[1..len] of var 0..len: kept_r1_from_position; + array[1..len] of var 0..len: kept_r2_from_position; + array[1..len,0..len] of var 0..1: kept_r2_from_position_i; + array[1..len,0..len] of var 0..1: kept_r1_from_position_i; + array[1..len] of var 1..max_char: z; + array[1..len] of var 0..1: kept; + array[1..len] of var 0..1: kept_equal; + array[1..len] of var 0..1: used1; + array[1..len] of var 0..1: used2; + array[1..len] of var 0..max_char: kept_char; + } in + substitutions = sum(i in 1..len)(1-kept_equal[i]) + /\ + insertions >= p2 - kept_sum + /\ + deletions >= p1 - kept_sum + /\ + forall(i in 1..len)( + S1[i] <= max_char*used1[i] + /\ + used1[i] < 1 + S1[i] + /\ + S2[i] <= max_char*used2[i] + /\ + used2[i] < 1 + S2[i] + ) + /\ + p1 = sum(i in 1..len)(used1[i]) + /\ + p2 = sum(i in 1..len)(used2[i]) + /\ + kept_sum = sum(i in 1..len)(kept[i]) + /\ + forall(k in 2..len)( + kept_r1_from_position[k] + (1-kept[k])*len > kept_r1_from_position[k-1] + /\ + kept_r2_from_position[k] + (1-kept[k])*len > kept_r2_from_position[k-1] + ) + /\ + forall(i in 1..len-1)( + kept[i+1] <= kept[i] + /\ + used1[i+1] <= used1[i] + /\ + used2[i+1] <= used2[i] + ) + /\ + forall(k in 1..len)( + kept[k]*len >= kept_r1_from_position[k] + /\ + kept[k]*len >= kept_r2_from_position[k] + /\ + kept[k] <= kept_r1_from_position[k] + /\ + kept[k] <= kept_r2_from_position[k] + /\ + kept[k] >= 1-kept_equal[k] + ) + /\ + forall(k in 1..len)( + kept_r2_from_position[k] - (1-kept[k])*len <= p1 + /\ + kept_r1_from_position[k] - (1-kept[k])*len <= p2 + ) + /\ + forall(i in 1..len)( + sum(j in 0..len)(kept_r2_from_position_i[i,j]) == 1 + /\ + sum(j in 0..len)(kept_r2_from_position_i[i,j]*j) == kept_r2_from_position[i] + /\ + sum(j in 0..len)(kept_r1_from_position_i[i,j]) == 1 + /\ + sum(j in 0..len)(kept_r1_from_position_i[i,j]*j) == kept_r1_from_position[i] + ) + /\ + forall(i in 1..len)( + z[i] <= 1 + (1-kept_r2_from_position_i[i,0])*max_char + ) + /\ + forall(i in 1..len, j in 1..len)( + z[i] - (1-kept_equal[i])*max_char <= S1[j] + (1-kept_r2_from_position_i[i,j])*max_char + /\ + z[i] + (1-kept_equal[i])*max_char >= S1[j] - (1-kept_r2_from_position_i[i,j])*max_char + ) + /\ + forall(i in 1..len, j in 1..len)( + z[i] - (1-kept_equal[i])*max_char <= S2[j] + (1-kept_r1_from_position_i[i,j])*max_char + /\ + z[i] + (1-kept_equal[i])*max_char >= S2[j] - (1-kept_r1_from_position_i[i,j])*max_char + ); + +% Based on the MIP formulation proposed in the paper: +% "Finding median and center strings for a probability distribution on a set of strings under Levenshtein distance based on integer linear programming", Hayashida and Koyano, BIOSTEC 2016. +predicate fzn_edit_distance(int: max_char, + array[int] of int: W_ins, + array[int] of int: W_del, + array[int, int] of int: W_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: len = length(S1); + + array[int] of var int: O1 = array1d(1..len, S1); + array[int] of var int: O2 = array1d(1..len, S2); + array[int] of int: W_ins_0 = array1d(0..length(W_ins), [0] ++ W_ins); + int: max_ic = max(W_ins); + array[int] of int: W_del_0 = array1d(0..length(W_del), [0] ++ W_del); + int: max_dc = max(W_del); + array[int, int] of int: W_sub_0 = array2d(0..max_char, 0..max_char, + [ if i == 0 then W_ins_0[j] else if j == 0 then W_del_0[i] else W_sub[i,j] endif endif | i in 0..max_char, j in 0..max_char]); + int: max_sc = max(W_sub_0); + + array[0..len, 0..len] of var 0..1: x; + array[0..len, 0..len] of var 0..max_dc: x_c; + array[0..len, 0..len] of var 0..1: y; + array[0..len, 0..len] of var 0..max_ic: y_c; + array[0..len, 0..len] of var 0..1: z; + + array[1..len, 1..len] of var 0..1: g; + array[1..len, 1..len] of var 0..1: h; + array[1..len, 1..len] of var 0..max_sc: h_c; + + + array[1..len] of var 0..1: used1; + array[1..len] of var 0..1: used2; + } in + % see which positions are used (i.e. positions which are not 0) + forall(i in 1..len)( + O1[i] <= max_char*used1[i] + /\ + used1[i] < 1 + O1[i] + ) + /\ + forall(i in 1..len)( + O2[i] <= max_char*used2[i] + /\ + used2[i] < 1 + O2[i] + ) + /\ + % check that strings are correctly aligned + len = len + /\ + forall(i in 1..len-1)( + used1[i+1] <= used1[i] + ) + /\ + forall(i in 1..len-1)( + used2[i+1] <= used2[i] + ) + /\ + % a1 + 1 = x[1,0] + y[1,0] + z[1,1] + /\ + % a2 + forall(i in 1..len-1)( + x[i,0] = x[i+1,0] + y[i,1] + z[i+1,1] + ) + /\ + % a3 + x[len,0] = y[len,1] + /\ + % a4 + forall(j in 1..len-1)( + y[0,j] = x[1,j] + y[0,j+1] + z[1,j+1] + ) + /\ + % a5 + y[0,len] = x[1,len] + /\ + % a6 + forall(i in 1..len-1, j in 1..len-1)( + x[i,j] + y[i,j] + z[i,j] = x[i+1,j] + y[i,j+1] + z[i+1,j+1] + ) + /\ + % a7 + forall(j in 1..len-1)( + x[len,j] + y[len,j] + z[len,j] = y[len,j+1] + ) + /\ + % a8 + forall(i in 1..len-1)( + x[i,len] + y[i,len] + z[i,len] = x[i+1,len] + ) + /\ + % a9 + x[len,len] + y[len,len] + z[len,len] = 1 + /\ + % special 0 length string constraint, we need to force a variable in this case + y[0,len] * len >= len * (1-len) + /\ + % b + forall(j in 1..len)( + y[len,j] >= (1/len) * (j-len) + ) + /\ + % c1 + forall(i in 1..len, j in 1..len)( + O1[i] - O2[j] <= max_char * g[i,j] + ) + /\ + % c2 + forall(i in 1..len, j in 1..len)( + O2[j] - O1[i] <= max_char * g[i,j] + ) + /\ + % d2 + forall(i in 1..len, j in 1..len)( + h[i,j] >= z[i,j] + g[i,j] - 1 + ) + /\ + forall(i in 1..len, j in 1..len)( + h[i,j] <= 1/2 * (z[i,j] + g[i,j]) + ) + /\ + % if the empty character zero is used, we forbid substitution + forall(i in 1..len, j in 1..len)( + O1[i] >= h[i,j] + /\ + O2[j] >= h[i,j] + ) + /\ + % linearize deletion costs + forall(i in 1..len, j in 0..len)( + x_c[i,j] >= W_del_0[O1[i]] - (1-x[i,j])*max_dc + ) + /\ + % linearize insertion costs + forall(i in 0..len, j in 1..len)( + y_c[i,j] >= W_ins_0[O2[j]] - (1-y[i,j])*max_ic + ) + /\ + % linearize substitution costs + forall(i in 1..len, j in 1..len)( + h_c[i,j] >= W_sub_0[O1[i],O2[j]] - (1-h[i,j])*max_sc + ) + /\ + ED = sum(i in 1..len)(x_c[i,0]) + + sum(j in 1..len)(y_c[0,j]) + + sum(i in 1..len, j in 1..len)(x_c[i,j] + y_c[i,j] + h_c[i,j]); \ No newline at end of file diff --git a/share/minizinc/std/edit_distance.mzn b/share/minizinc/std/edit_distance.mzn index ffb36d751..268d044cd 100644 --- a/share/minizinc/std/edit_distance.mzn +++ b/share/minizinc/std/edit_distance.mzn @@ -1,3 +1,5 @@ +include "fzn_edit_distance.mzn"; + /** @group globals Constrains \a ED to be greater than or equal to the minimum edit distance between two strings \a S1 and \a S2. Any character insertion or deletion has a cost of 1, whereas any character substitution has a cost of 2. @@ -6,24 +8,20 @@ Example: Let \a max_char = 2, \a S1 = [1,1,0,0,0], \a S2 = [1,2,1,0,0], then \a ED = 1, as the minimum edit distance is 1 between the strings "11" and "121" (a single '2' character needs to be inserted). @param max_char: The maximum character that can occur in a string (all characters should be numbered from 1..max_char, 0 represents an empty character) -@param S1: Integer array that represents the first string, the index set has to be 1..length(\a S1 ) -@param S2: Integer array that represents the second string, the index set has to be 1..length(\a S2 ) +@param S1: Integer array that represents the first string +@param S2: Integer array that represents the second string @param ED: Variable that is constrained to be >= than the minimum edit distance between \a S1 to \a S2 */ predicate edit_distance(int: max_char, array[int] of var int: S1, array[int] of var int: S2, var int: ED) = - let { - int: len1 = length(S1); - int: len2 = length(S2); - var 0..len1: p1; - var 0..len2: p2; - var 0..len1: kept_sum; - } in - ED = p1 + p2 - 2*kept_sum - /\ - edit_distance_kept(max_char, S1, S2, p1, p2, kept_sum); + assert(length(S1) == length(S2), + "edit_distance: string arrays S1 and S2 need to have the same length", + assert(max_char >= 1, + "edit_distance: max_char must be >= 1", + fzn_edit_distance(max_char, S1, S2, ED) + )); /** @group globals Constrains \a ED to be greater than or equal to the minimum edit distance between two strings \a S1 and \a S2. @@ -34,8 +32,8 @@ predicate edit_distance(int: max_char, @param C_ins: Costs for a single character insertion @param C_del: Costs for a single character deletion @param C_sub: Costs for a single character substitution -@param S1: Integer array that represents the first string, the index set has to be 1..length(\a S1 ) -@param S2: Integer array that represents the second string, the index set has to be 1..length(\a S2 ) +@param S1: Integer array that represents the first string +@param S2: Integer array that represents the second string @param ED: Variable that is constrained to be >= than the minimum edit distance between \a S1 to \a S2 */ predicate edit_distance(int: max_char, @@ -45,93 +43,17 @@ predicate edit_distance(int: max_char, array[int] of var int: S1, array[int] of var int: S2, var int: ED) = - let { int: len1 = length(S1); - int: len2 = length(S2); - var 0..len1: p1; - var 0..len2: p2; - var 0..len1: kept_sum; - var 0..len2: insertions; - var 0..len1: deletions; - var 0..max(len1,len2): substitutions; - } in - substitutions = min(p1,p2) - kept_sum - /\ - insertions = max(p2-p1, 0) - /\ - deletions = max(p1-p2, 0) - /\ - ED = C_ins * insertions + C_sub * substitutions + C_del * deletions - /\ - edit_distance_kept(max_char, S1, S2, p1, p2, kept_sum); + assert(length(S1) == length(S2), + "edit_distance: string arrays S1 and S2 need to have the same length", + assert(max_char >= 1, + "edit_distance: max_char must be >= 1", + assert(C_sub <= C_ins + C_del, + "edit_distance: The following inequality must hold C_sub <= C_ins + C_del", + assert(C_sub >= 1 /\ C_ins >= 1 /\ C_del >= 1, + "edit_distance: C_sub, C_ins, C_del all must be >= 1", + fzn_edit_distance(max_char, C_ins, C_del, C_sub, S1, S2, ED) + )))); -% Decomposition from the paper "Constraint-Based Scheduling for Paint Shops in the Automotive Supply Industry", Winter and Musliu, ACM Transactions on Intelligent Systems and Technology, 2020. -predicate edit_distance_kept(int: max_char, - array[int] of var int: S1, - array[int] of var int: S2, - var int: p1, - var int: p2, - var int: kept_sum) = - let { int: l1 = min(index_set(S1)); - int: l2 = min(index_set(S2)); - int: u1 = max(index_set(S1)); - int: u2 = max(index_set(S2)); - int: len1 = length(S1); - int: len2 = length(S2); - var 0..len2: insertions; - var 0..len1: deletions; - var 0..max(len1,len2): substitutions; - array[l1..u1] of var 0..len2: kept_r1_from_position; - array[l2..u2] of var 0..len1: kept_r2_from_position; - } in - p1 = sum(i in l1..u1 where S1[i] != 0)(1) - /\ - p2 = sum(i in l2..u2 where S2[i] != 0)(1) - /\ - kept_sum = sum(i in l1..u1 where kept_r1_from_position[i] != 0)(1) - /\ - forall(i in l1..u1-1)( - S1[i] == 0 -> S1[i+1] == 0 - ) - /\ - forall(i in l2..u2-1)( - S2[i] == 0 -> S2[i+1] == 0 - ) - /\ - forall(i in l1+1..u1 where kept_r1_from_position[i] != 0) ( - kept_r1_from_position[i] > kept_r1_from_position[i-1] - ) - /\ - forall(i in l2+1..u2 where kept_r2_from_position[i] != 0) ( - kept_r2_from_position[i] > kept_r2_from_position[i-1] - ) - /\ - forall(i in l1..u1) ( - kept_r1_from_position[i] > 0 <-> kept_r2_from_position[i] > 0 - ) - /\ - forall(j in l2..u2-1) ( - (kept_r1_from_position[j] = 0) -> (kept_r1_from_position[j+1] = 0) - ) - /\ - forall(j in l1..u1-1) ( - (kept_r2_from_position[j] = 0) -> (kept_r2_from_position[j+1] = 0) - ) - /\ - forall(i in l2..u2 where kept_r2_from_position[i] != 0) ( - kept_r2_from_position[i] <= p1 - ) - /\ - forall(i in l1..u1 where kept_r1_from_position[i] != 0) ( - kept_r1_from_position[i] <= p2 - ) - /\ - forall(i in l1..u1 where kept_r1_from_position[i] != 0) ( - S1[kept_r2_from_position[i]] = S2[kept_r1_from_position[i]] - ); - - -% Dynamic programming routine decomposition for insertion/deletion/substition cost defined by character -% DP routine first proposed in the paper "The String-to-String Correction Problem", Wagner and Fischer, Journal of the ACM, 1978. /** @group globals Constrains \a ED to be greater than or equal to the minimum edit distance between two strings \a S1 and \a S2. This constraint functions in the same way as the constraint \a edit_distance(max_char, S1, S2, ED), however it additionally specifies custom character insertion, deletion and substitution costs for each individual combination of characters. @@ -142,8 +64,8 @@ predicate edit_distance_kept(int: max_char, @param W_ins: Integer array containing the costs for single character insertions @param W_del: Integer array containing the costs for single character deletions @param W_sub: Integer array containing the costs for single character substitutions -@param S1: Integer array that represents the first string, the index set has to be 1..length(\a S1 ) -@param S2: Integer array that represents the second string, the index set has to be 1..length(\a S2 ) +@param S1: Integer array that represents the first string +@param S2: Integer array that represents the second string @param ED: Variable that is constrained to be >= than the minimum edit distance between \a S1 to \a S2 */ predicate edit_distance(int: max_char, @@ -153,54 +75,21 @@ predicate edit_distance(int: max_char, array[int] of var int: S1, array[int] of var int: S2, var int: ED) = - let { int: l1 = min(index_set(S1)) - 1; - int: l2 = min(index_set(S2)) - 1; - int: u1 = max(index_set(S1)); - int: u2 = max(index_set(S2)); - array[int] of int: W_ins_0 = array1d(0..length(W_ins), [0] ++ W_ins); - array[int] of int: W_del_0 = array1d(0..length(W_del), [0] ++ W_del); - array[int, int] of int: W_sub_0 = array2d(0..max_char, 0..max_char, - [ if i == 0 then W_ins_0[j] else if j == 0 then W_del_0[i] else W_sub[i,j] endif endif | i in 0..max_char, j in 0..max_char]); - array[l1..u1,l2..u2] of var int: T; - } in - forall(i in l1+1..u1-1)( - S1[i] == 0 -> S1[i+1] == 0 - ) - /\ - forall(i in l2+1..u2-1)( - S2[i] == 0 -> S2[i+1] == 0 - ) - /\ - T[l1,l2] = 0 /\ - T[u1,u2] = ED /\ - forall(i in l1+1..u1, j in l2+1..u2)( - T[i,j] >= 0 /\ T[i,j] <= sum(i2 in 1..i)(W_del_0[S1[i2]]) + sum(j2 in 1..j)(W_ins_0[S2[j2]]) - ) - /\ - forall(j in l2+1..u2)( - T[l1,j] = sum(j2 in 1..j)(W_ins_0[S2[j2]]) - ) - /\ - forall(i in l1+1..u1)( - T[i,l2] = sum(i2 in 1..i)(W_del_0[S1[i2]]) - ) - /\ - forall(i in l1+1..u1, j in l2+1..u2)( - T[i,j] = - if S1[i] = S2[j] then - T[i-1,j-1] - else - if S1[i] = 0 then - T[i-1,j] - else - if S2[j] = 0 then - T[i,j-1] - else - min([T[i-1,j]+W_del_0[S1[i]], - T[i,j-1]+W_ins_0[S2[j]], - T[i-1,j-1]+W_sub_0[S1[i], S2[j]] - ]) - endif - endif - endif - ); \ No newline at end of file + assert(length(S1) == length(S2), + "edit_distance: string arrays S1 and S2 need to have the same length", + assert(index_set(W_ins) == 1..max_char /\ index_set(W_del) == 1..max_char, + "edit_distance: index sets of W_ins and W_del must be 1..max_char", + assert(index_set_1of2(W_sub) == 1..max_char /\ index_set_2of2(W_sub) == 1..max_char, + "edit_distance: both index sets of W_sub must be 1..max_char", + assert(max_char >= 1, + "edit_distance: max_char must be >= 1", + assert(forall(c1 in 1..max_char, c2 in 1..max_char)(W_sub[c1,c2] <= W_ins[c2] + W_del[c1]), + "edit_distance: The following inequality must hold for all c1, c2 in 1..max_char: W_sub[c1,c2] <= W_ins[c2] + W_del[c1]", + assert(forall(c in 1..max_char)(W_ins[c] >= 1 /\ W_del[c] >= 1), + "edit_distance: for all c in 1..max_char: W_ins[c] and W_del[c] must be >= 1", + assert(forall(c1 in 1..max_char, c2 in 1..max_char where c1 != c2)(W_sub[c1,c2] >= 1), + "edit_distance: for all c1, c2 in 1..max_char where c1 != c2: W_sub[c1,c2] must be >= 1", + assert(forall(c in 1..max_char)(W_sub[c,c] == 0), + "edit_distance: for all c in 1..max_char: W_sub[c,c] must be == 0", + fzn_edit_distance(max_char, W_ins, W_del, W_sub, S1, S2, ED) + )))))))); \ No newline at end of file diff --git a/share/minizinc/std/fzn_edit_distance.mzn b/share/minizinc/std/fzn_edit_distance.mzn new file mode 100644 index 000000000..6e0f13067 --- /dev/null +++ b/share/minizinc/std/fzn_edit_distance.mzn @@ -0,0 +1,182 @@ +predicate fzn_edit_distance(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { + int: len = length(S1); + var 0..len: p1; + var 0..len: p2; + var 0..len: kept_sum; + int: l1 = min(index_set(S1)); + int: l2 = min(index_set(S2)); + int: u1 = max(index_set(S1)); + int: u2 = max(index_set(S2)); + int: na1 = l1-1; + int: na2 = l2-1; + array[1..len] of var na2..u2: kept_r1_from_position; + array[1..len] of var na1..u1: kept_r2_from_position; + } in + ED = p1 + p2 - 2 * kept_sum + /\ + forall(i in 1..len where kept_r1_from_position[i] != na2) ( + S1[kept_r2_from_position[i]] = S2[kept_r1_from_position[i]] + ) + /\ + fzn_edit_distance_kept(max_char, S1, S2, p1, p2, kept_r1_from_position, kept_r2_from_position, kept_sum); + +predicate fzn_edit_distance(int: max_char, + int: C_ins, + int: C_del, + int: C_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: len = length(S1); + var 0..len: p1; + var 0..len: p2; + var 0..len: kept_sum; + var 0..len: insertions; + var 0..len: deletions; + var 0..len: substitutions; + int: l1 = min(index_set(S1)); + int: l2 = min(index_set(S2)); + int: u1 = max(index_set(S1)); + int: u2 = max(index_set(S2)); + int: na1 = l1-1; + int: na2 = l2-1; + array[1..len] of var na2..u2: kept_r1_from_position; + array[1..len] of var na1..u1: kept_r2_from_position; + } in + substitutions = count(i in 1..len)(kept_r1_from_position[i] != na2 /\ S1[kept_r2_from_position[i]] != S2[kept_r1_from_position[i]]) + /\ + insertions = p2 - kept_sum + /\ + deletions = p1 - kept_sum + /\ + ED = C_ins * insertions + C_sub * substitutions + C_del * deletions + /\ + fzn_edit_distance_kept(max_char, S1, S2, p1, p2, kept_r1_from_position, kept_r2_from_position, kept_sum); + +% Decomposition from the paper "Constraint-Based Scheduling for Paint Shops in the Automotive Supply Industry", Winter and Musliu, ACM Transactions on Intelligent Systems and Technology, 2020. +predicate fzn_edit_distance_kept(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: p1, + var int: p2, + array[int] of var int: kept_r1_from_position, + array[int] of var int: kept_r2_from_position, + var int: kept_sum) = + let { int: l1 = min(index_set(S1)); + int: l2 = min(index_set(S2)); + int: u1 = max(index_set(S1)); + int: u2 = max(index_set(S2)); + int: len = length(S1); + int: na1 = l1-1; + int: na2 = l2-1; + var 0..len: insertions; + var 0..len: deletions; + var 0..len: substitutions; + } in + p1 = sum(i in l1..u1 where S1[i] != 0)(1) + /\ + p2 = sum(i in l2..u2 where S2[i] != 0)(1) + /\ + kept_sum = sum(i in 1..len where kept_r1_from_position[i] != na2)(1) + /\ + forall(i in l1..u1-1)( + S1[i] == 0 -> S1[i+1] == 0 + ) + /\ + forall(i in l2..u2-1)( + S2[i] == 0 -> S2[i+1] == 0 + ) + /\ + forall(i in 2..len where kept_r1_from_position[i] != na2) ( + kept_r1_from_position[i] > kept_r1_from_position[i-1] + ) + /\ + forall(i in 2..len where kept_r2_from_position[i] != na1) ( + kept_r2_from_position[i] > kept_r2_from_position[i-1] + ) + /\ + forall(i in 1..len) ( + kept_r1_from_position[i] > na2 <-> kept_r2_from_position[i] > na1 + ) + /\ + forall(i in 1..len-1) ( + (kept_r1_from_position[i] = na2) -> (kept_r1_from_position[i+1] = na2) + ) + /\ + forall(i in 1..len-1) ( + (kept_r2_from_position[i] = na1) -> (kept_r2_from_position[i+1] = na1) + ) + /\ + forall(i in 1..len where kept_r2_from_position[i] != na1) ( + kept_r2_from_position[i] <= l1-1 + p1 + ) + /\ + forall(i in 1..len where kept_r1_from_position[i] != na2) ( + kept_r1_from_position[i] <= l2-1 + p2 + ); + + +% Dynamic programming routine decomposition for insertion/deletion/substition cost defined by character +% DP routine first proposed in the paper "The String-to-String Correction Problem", Wagner and Fischer, Journal of the ACM, 1978. +predicate fzn_edit_distance(int: max_char, + array[int] of int: W_ins, + array[int] of int: W_del, + array[int, int] of int: W_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: l1 = min(index_set(S1)) - 1; + int: l2 = min(index_set(S2)) - 1; + int: u1 = max(index_set(S1)); + int: u2 = max(index_set(S2)); + array[int] of int: W_ins_0 = array1d(0..length(W_ins), [0] ++ W_ins); + array[int] of int: W_del_0 = array1d(0..length(W_del), [0] ++ W_del); + array[int, int] of int: W_sub_0 = array2d(0..max_char, 0..max_char, + [ if i == 0 then W_ins_0[j] else if j == 0 then W_del_0[i] else W_sub[i,j] endif endif | i in 0..max_char, j in 0..max_char]); + array[l1..u1,l2..u2] of var int: T; + } in + forall(i in l1+1..u1-1)( + S1[i] == 0 -> S1[i+1] == 0 + ) + /\ + forall(i in l2+1..u2-1)( + S2[i] == 0 -> S2[i+1] == 0 + ) + /\ + T[l1,l2] = 0 /\ + T[u1,u2] = ED /\ + forall(i in l1+1..u1, j in l2+1..u2)( + T[i,j] >= 0 /\ T[i,j] <= sum(i2 in 1..i)(W_del_0[S1[i2]]) + sum(j2 in 1..j)(W_ins_0[S2[j2]]) + ) + /\ + forall(j in l2+1..u2)( + T[l1,j] = sum(j2 in 1..j)(W_ins_0[S2[j2]]) + ) + /\ + forall(i in l1+1..u1)( + T[i,l2] = sum(i2 in 1..i)(W_del_0[S1[i2]]) + ) + /\ + forall(i in l1+1..u1, j in l2+1..u2)( + T[i,j] = + if S1[i] = S2[j] then + T[i-1,j-1] + else + if S1[i] = 0 then + T[i-1,j] + else + if S2[j] = 0 then + T[i,j-1] + else + min([T[i-1,j]+W_del_0[S1[i]], + T[i,j-1]+W_ins_0[S2[j]], + T[i-1,j-1]+W_sub_0[S1[i], S2[j]] + ]) + endif + endif + endif + ); \ No newline at end of file From 2213313bc3510b94605259cb54ee9827bc7ecf2a Mon Sep 17 00:00:00 2001 From: "Winter, Felix" Date: Thu, 19 Nov 2020 17:20:43 +0100 Subject: [PATCH 3/4] Added exact version of the constraint, fixed problem with unequal index sets for most general version --- share/minizinc/std/edit_distance.mzn | 86 +++++++++++++++++++ share/minizinc/std/fzn_edit_distance.mzn | 57 +----------- .../minizinc/std/fzn_edit_distance_exact.mzn | 60 +++++++++++++ 3 files changed, 149 insertions(+), 54 deletions(-) create mode 100644 share/minizinc/std/fzn_edit_distance_exact.mzn diff --git a/share/minizinc/std/edit_distance.mzn b/share/minizinc/std/edit_distance.mzn index 268d044cd..20d4a0aec 100644 --- a/share/minizinc/std/edit_distance.mzn +++ b/share/minizinc/std/edit_distance.mzn @@ -1,4 +1,5 @@ include "fzn_edit_distance.mzn"; +include "fzn_edit_distance_exact.mzn"; /** @group globals Constrains \a ED to be greater than or equal to the minimum edit distance between two strings \a S1 and \a S2. @@ -92,4 +93,89 @@ predicate edit_distance(int: max_char, assert(forall(c in 1..max_char)(W_sub[c,c] == 0), "edit_distance: for all c in 1..max_char: W_sub[c,c] must be == 0", fzn_edit_distance(max_char, W_ins, W_del, W_sub, S1, S2, ED) + )))))))); + +/** @group globals + Constrains \a ED to be equal to the minimum edit distance between two strings \a S1 and \a S2 (Note that this constraint is likely to be less efficient than its "edit_distance" version when you are aiming to minimize the edit distance). + Any character insertion or deletion has a cost of 1, whereas any character substitution has a cost of 2. + All characters need to be numbered from 1 to \a max_char. + The integer arrays \a S1 and \a S2 need to have the same length, but may contain zeros at the end to indicate an "empty" character. + Example: Let \a max_char = 2, \a S1 = [1,1,0,0,0], \a S2 = [1,2,1,0,0], then \a ED = 1, as the minimum edit distance is 1 between the strings "11" and "121" (a single '2' character needs to be inserted). + +@param max_char: The maximum character that can occur in a string (all characters should be numbered from 1..max_char, 0 represents an empty character) +@param S1: Integer array that represents the first string +@param S2: Integer array that represents the second string +@param ED: Variable that is constrained to be equal to the minimum edit distance between \a S1 to \a S2 +*/ +predicate edit_distance_exact(int: max_char, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + edit_distance_exact(max_char, 1, 1, 2, S1, S2, ED); + +/** @group globals + Constrains \a ED to be equal to the minimum edit distance between two strings \a S1 and \a S2 (Note that this constraint is likely to be less efficient than its "edit_distance" version when you are aiming to minimize the edit distance). + This constraint functions in the same way as the constraint \a edit_distance_exact(max_char, S1, S2, ED), however it additionally specifies custom character insertion, deletion and substitution costs. + All costs should be >= 1 and the following inequality must hold: C_sub <= C_ins + C_del + +@param max_char: The maximum character that can occur in a string (all characters should be numbered from 1..max_char, 0 represents an empty character) +@param C_ins: Costs for a single character insertion +@param C_del: Costs for a single character deletion +@param C_sub: Costs for a single character substitution +@param S1: Integer array that represents the first string +@param S2: Integer array that represents the second string +@param ED: Variable that is constrained to be equal to the minimum edit distance between \a S1 to \a S2 +*/ +predicate edit_distance_exact(int: max_char, + int: C_ins, + int: C_del, + int: C_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + assert(C_sub <= C_ins + C_del, + "edit_distance: The following inequality must hold C_sub <= C_ins + C_del", + assert(C_sub >= 1 /\ C_ins >= 1 /\ C_del >= 1, + "edit_distance: C_sub, C_ins, C_del all must be >= 1", + edit_distance_exact(max_char, [C_ins | i in 1..max_char], [C_del | i in 1..max_char], array2d(1..max_char, 1..max_char, [if i == j then 0 else C_sub endif | i in 1..max_char, j in 1..max_char]), S1, S2, ED) + )); + +/** @group globals + Constrains \a ED to be equal to the minimum edit distance between two strings \a S1 and \a S2 (Note that this constraint is likely to be less efficient than its "edit_distance" version when you are aiming to minimize the edit distance). + This constraint functions in the same way as the constraint \a edit_distance_exact(max_char, S1, S2, ED), however it additionally specifies custom character insertion, deletion and substitution costs for each individual combination of characters. + All costs should be >= 1, except the substitution costs of a character with itself which should be 0 (W_sub[c,c] = 0). + Furthermore, the following inequality must hold for all pairs of characters c1 and c2: \a W_sub[c1,c2] <= \a W_del[c1] + \a W_ins[c2] + +@param max_char: The maximum character that can occur in a string (all characters should be numbered from 1..max_char, 0 represents an empty character) +@param W_ins: Integer array containing the costs for single character insertions +@param W_del: Integer array containing the costs for single character deletions +@param W_sub: Integer array containing the costs for single character substitutions +@param S1: Integer array that represents the first string +@param S2: Integer array that represents the second string +@param ED: Variable that is constrained to be equal to the minimum edit distance between \a S1 to \a S2 +*/ +predicate edit_distance_exact(int: max_char, + array[int] of int: W_ins, + array[int] of int: W_del, + array[int, int] of int: W_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + assert(length(S1) == length(S2), + "edit_distance: string arrays S1 and S2 need to have the same length", + assert(index_set(W_ins) == 1..max_char /\ index_set(W_del) == 1..max_char, + "edit_distance: index sets of W_ins and W_del must be 1..max_char", + assert(index_set_1of2(W_sub) == 1..max_char /\ index_set_2of2(W_sub) == 1..max_char, + "edit_distance: both index sets of W_sub must be 1..max_char", + assert(max_char >= 1, + "edit_distance: max_char must be >= 1", + assert(forall(c1 in 1..max_char, c2 in 1..max_char)(W_sub[c1,c2] <= W_ins[c2] + W_del[c1]), + "edit_distance: The following inequality must hold for all c1, c2 in 1..max_char: W_sub[c1,c2] <= W_ins[c2] + W_del[c1]", + assert(forall(c in 1..max_char)(W_ins[c] >= 1 /\ W_del[c] >= 1), + "edit_distance: for all c in 1..max_char: W_ins[c] and W_del[c] must be >= 1", + assert(forall(c1 in 1..max_char, c2 in 1..max_char where c1 != c2)(W_sub[c1,c2] >= 1), + "edit_distance: for all c1, c2 in 1..max_char where c1 != c2: W_sub[c1,c2] must be >= 1", + assert(forall(c in 1..max_char)(W_sub[c,c] == 0), + "edit_distance: for all c in 1..max_char: W_sub[c,c] must be == 0", + fzn_edit_distance_exact(max_char, W_ins, W_del, W_sub, S1, S2, ED) )))))))); \ No newline at end of file diff --git a/share/minizinc/std/fzn_edit_distance.mzn b/share/minizinc/std/fzn_edit_distance.mzn index 6e0f13067..dff4778f7 100644 --- a/share/minizinc/std/fzn_edit_distance.mzn +++ b/share/minizinc/std/fzn_edit_distance.mzn @@ -1,3 +1,5 @@ +include "fzn_edit_distance_exact.mzn"; + predicate fzn_edit_distance(int: max_char, array[int] of var int: S1, array[int] of var int: S2, @@ -119,9 +121,6 @@ predicate fzn_edit_distance_kept(int: max_char, kept_r1_from_position[i] <= l2-1 + p2 ); - -% Dynamic programming routine decomposition for insertion/deletion/substition cost defined by character -% DP routine first proposed in the paper "The String-to-String Correction Problem", Wagner and Fischer, Journal of the ACM, 1978. predicate fzn_edit_distance(int: max_char, array[int] of int: W_ins, array[int] of int: W_del, @@ -129,54 +128,4 @@ predicate fzn_edit_distance(int: max_char, array[int] of var int: S1, array[int] of var int: S2, var int: ED) = - let { int: l1 = min(index_set(S1)) - 1; - int: l2 = min(index_set(S2)) - 1; - int: u1 = max(index_set(S1)); - int: u2 = max(index_set(S2)); - array[int] of int: W_ins_0 = array1d(0..length(W_ins), [0] ++ W_ins); - array[int] of int: W_del_0 = array1d(0..length(W_del), [0] ++ W_del); - array[int, int] of int: W_sub_0 = array2d(0..max_char, 0..max_char, - [ if i == 0 then W_ins_0[j] else if j == 0 then W_del_0[i] else W_sub[i,j] endif endif | i in 0..max_char, j in 0..max_char]); - array[l1..u1,l2..u2] of var int: T; - } in - forall(i in l1+1..u1-1)( - S1[i] == 0 -> S1[i+1] == 0 - ) - /\ - forall(i in l2+1..u2-1)( - S2[i] == 0 -> S2[i+1] == 0 - ) - /\ - T[l1,l2] = 0 /\ - T[u1,u2] = ED /\ - forall(i in l1+1..u1, j in l2+1..u2)( - T[i,j] >= 0 /\ T[i,j] <= sum(i2 in 1..i)(W_del_0[S1[i2]]) + sum(j2 in 1..j)(W_ins_0[S2[j2]]) - ) - /\ - forall(j in l2+1..u2)( - T[l1,j] = sum(j2 in 1..j)(W_ins_0[S2[j2]]) - ) - /\ - forall(i in l1+1..u1)( - T[i,l2] = sum(i2 in 1..i)(W_del_0[S1[i2]]) - ) - /\ - forall(i in l1+1..u1, j in l2+1..u2)( - T[i,j] = - if S1[i] = S2[j] then - T[i-1,j-1] - else - if S1[i] = 0 then - T[i-1,j] - else - if S2[j] = 0 then - T[i,j-1] - else - min([T[i-1,j]+W_del_0[S1[i]], - T[i,j-1]+W_ins_0[S2[j]], - T[i-1,j-1]+W_sub_0[S1[i], S2[j]] - ]) - endif - endif - endif - ); \ No newline at end of file + fzn_edit_distance_exact(max_char, W_ins, W_del, W_sub, S1, S2, ED); \ No newline at end of file diff --git a/share/minizinc/std/fzn_edit_distance_exact.mzn b/share/minizinc/std/fzn_edit_distance_exact.mzn new file mode 100644 index 000000000..81c5bc615 --- /dev/null +++ b/share/minizinc/std/fzn_edit_distance_exact.mzn @@ -0,0 +1,60 @@ +% Dynamic programming routine decomposition for insertion/deletion/substition cost defined by character +% DP routine first proposed in the paper "The String-to-String Correction Problem", Wagner and Fischer, Journal of the ACM, 1978. +predicate fzn_edit_distance_exact(int: max_char, + array[int] of int: W_ins, + array[int] of int: W_del, + array[int, int] of int: W_sub, + array[int] of var int: S1, + array[int] of var int: S2, + var int: ED) = + let { int: l1 = min(index_set(S1)) - 1; + int: l2 = min(index_set(S2)) - 1; + int: u1 = max(index_set(S1)); + int: u2 = max(index_set(S2)); + array[int] of int: W_ins_0 = array1d(0..length(W_ins), [0] ++ W_ins); + array[int] of int: W_del_0 = array1d(0..length(W_del), [0] ++ W_del); + array[int, int] of int: W_sub_0 = array2d(0..max_char, 0..max_char, + [ if i == 0 then W_ins_0[j] else if j == 0 then W_del_0[i] else W_sub[i,j] endif endif | i in 0..max_char, j in 0..max_char]); + array[l1..u1,l2..u2] of var int: T; + } in + forall(i in l1+1..u1-1)( + S1[i] == 0 -> S1[i+1] == 0 + ) + /\ + forall(i in l2+1..u2-1)( + S2[i] == 0 -> S2[i+1] == 0 + ) + /\ + T[l1,l2] = 0 /\ + T[u1,u2] = ED /\ + forall(i in l1+1..u1, j in l2+1..u2)( + T[i,j] >= 0 /\ T[i,j] <= sum(i2 in l1+1..i)(W_del_0[S1[i2]]) + sum(j2 in l2+1..j)(W_ins_0[S2[j2]]) + ) + /\ + forall(j in l2+1..u2)( + T[l1,j] = sum(j2 in l2+1..j)(W_ins_0[S2[j2]]) + ) + /\ + forall(i in l1+1..u1)( + T[i,l2] = sum(i2 in l1+1..i)(W_del_0[S1[i2]]) + ) + /\ + forall(i in l1+1..u1, j in l2+1..u2)( + T[i,j] = + if S1[i] = S2[j] then + T[i-1,j-1] + else + if S1[i] = 0 then + T[i-1,j] + else + if S2[j] = 0 then + T[i,j-1] + else + min([T[i-1,j]+W_del_0[S1[i]], + T[i,j-1]+W_ins_0[S2[j]], + T[i-1,j-1]+W_sub_0[S1[i], S2[j]] + ]) + endif + endif + endif + ); \ No newline at end of file From 664c2a411cda5eae2611d635bd86a0134a523799 Mon Sep 17 00:00:00 2001 From: "Winter, Felix" Date: Thu, 18 Mar 2021 18:02:55 +0100 Subject: [PATCH 4/4] Fixed bug in linear translation for kept decomposition --- share/minizinc/linear/fzn_edit_distance.mzn | 22 ++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/share/minizinc/linear/fzn_edit_distance.mzn b/share/minizinc/linear/fzn_edit_distance.mzn index 524590ff3..ba01b9ce1 100644 --- a/share/minizinc/linear/fzn_edit_distance.mzn +++ b/share/minizinc/linear/fzn_edit_distance.mzn @@ -67,9 +67,9 @@ predicate fzn_edit_distance_kept(int: max_char, kept_sum = sum(i in 1..len)(kept[i]) /\ forall(k in 2..len)( - kept_r1_from_position[k] + (1-kept[k])*len > kept_r1_from_position[k-1] + kept_r1_from_position[k] + (1-kept[k])*(len+1) > kept_r1_from_position[k-1] /\ - kept_r2_from_position[k] + (1-kept[k])*len > kept_r2_from_position[k-1] + kept_r2_from_position[k] + (1-kept[k])*(len+1) > kept_r2_from_position[k-1] ) /\ forall(i in 1..len-1)( @@ -108,7 +108,19 @@ predicate fzn_edit_distance_kept(int: max_char, /\ forall(i in 1..len)( z[i] <= 1 + (1-kept_r2_from_position_i[i,0])*max_char - ); + ) + /\ + forall(i in 1..len, j in 1..len)( + z[i] - (1-kept[i])*max_char <= S1[j] + (1-kept_r2_from_position_i[i,j])*max_char + /\ + z[i] + (1-kept[i])*max_char >= S1[j] - (1-kept_r2_from_position_i[i,j])*max_char + ) + /\ + forall(i in 1..len, j in 1..len)( + z[i] - (1-kept[i])*max_char <= S2[j] + (1-kept_r1_from_position_i[i,j])*max_char + /\ + z[i] + (1-kept[i])*max_char >= S2[j] - (1-kept_r1_from_position_i[i,j])*max_char + ); % Linearized decomposition based on the paper "Constraint-Based Scheduling for Paint Shops in the Automotive Supply Industry", Winter and Musliu, ACM Transactions on Intelligent Systems and Technology, 2020. predicate fzn_edit_distance_kept_general(int: max_char, @@ -156,9 +168,9 @@ predicate fzn_edit_distance_kept_general(int: max_char, kept_sum = sum(i in 1..len)(kept[i]) /\ forall(k in 2..len)( - kept_r1_from_position[k] + (1-kept[k])*len > kept_r1_from_position[k-1] + kept_r1_from_position[k] + (1-kept[k])*(len+1) > kept_r1_from_position[k-1] /\ - kept_r2_from_position[k] + (1-kept[k])*len > kept_r2_from_position[k-1] + kept_r2_from_position[k] + (1-kept[k])*(len+1) > kept_r2_from_position[k-1] ) /\ forall(i in 1..len-1)(