Skip to content

Commit

Permalink
actually fix vae tile merging
Browse files Browse the repository at this point in the history
Co-authored-by: stduhpf <[email protected]>
  • Loading branch information
2 people authored and SkutteOleg committed Aug 26, 2024
1 parent fddc6c3 commit c3d94dd
Showing 1 changed file with 20 additions and 30 deletions.
50 changes: 20 additions & 30 deletions ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,44 +371,34 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
int64_t width = input->ne[0];
int64_t height = input->ne[1];
int64_t channels = input->ne[2];

int64_t img_width = output->ne[0];
int64_t img_height = output->ne[1];

GGML_ASSERT(input->type == GGML_TYPE_F32 && output->type == GGML_TYPE_F32);
for (int iy = 0; iy < height; iy++) {
for (int ix = 0; ix < width; ix++) {
for (int k = 0; k < channels; k++) {
float new_value = ggml_tensor_get_f32(input, ix, iy, k);
if (overlap > 0) { // blend colors in overlapped area
float old_value = ggml_tensor_get_f32(output, x + ix, y + iy, k);
const bool inside_x_overlap = x > 0 && ix < overlap;
const bool inside_y_overlap = y > 0 && iy < overlap;
if (inside_x_overlap && inside_y_overlap) {
// upper left corner needs to be interpolated in both directions
const float x_f = ix / float(overlap);
const float y_f = iy / float(overlap);
// TODO: try `x+y - 1`
const float f = std::min(x_f, y_f); // min of both
ggml_tensor_set_f32(
output,
ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(f)),
x + ix, y + iy, k
);
continue;
} else if (inside_x_overlap) {
ggml_tensor_set_f32(
output,
ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(ix / float(overlap))),
x + ix, y + iy, k
);
continue;
} else if (inside_y_overlap) {
ggml_tensor_set_f32(
output,
ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(iy / float(overlap))),
x + ix, y + iy, k
);
continue;
}

const float x_f_0 = (x > 0) ? ix / float(overlap) : 1;
const float x_f_1 = (x < (img_width - width)) ? (width - ix) / float(overlap) : 1 ;
const float y_f_0 = (y > 0) ? iy / float(overlap) : 1;
const float y_f_1 = (y < (img_height - height)) ? (height - iy) / float(overlap) : 1;

const float x_f = std::min(std::min(x_f_0, x_f_1), 1.f);
const float y_f = std::min(std::min(y_f_0, y_f_1), 1.f);

ggml_tensor_set_f32(
output,
old_value + new_value * ggml_smootherstep_f32(y_f) * ggml_smootherstep_f32(x_f),
x + ix, y + iy, k
);
} else {
ggml_tensor_set_f32(output, new_value, x + ix, y + iy, k);
}
ggml_tensor_set_f32(output, new_value, x + ix, y + iy, k);
}
}
}
Expand Down

0 comments on commit c3d94dd

Please sign in to comment.