Skip to content

Commit

Permalink
utils: implement our own lrint() for Visual C++
Browse files Browse the repository at this point in the history
Commit cd35a4d added the -fno-math-errno flag which allows lrint() to
be inlined on GCC and Clang.

Unfortunately, Visual C++ can't inline lrint() without -fp:fast. That
flag has some nasty implications, so I think it's better to just write
our own lrint.

This change helps a bit with the video sample from libass#736. It halves the
number of dropped frames in MPC-HC on my machine (from ~180 to ~80).

Resolves: libass#806
  • Loading branch information
xenu authored and astiob committed Aug 4, 2024
1 parent c5bb87e commit b13f5d1
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 26 deletions.
10 changes: 5 additions & 5 deletions libass/ass_outline.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,8 @@ bool ass_outline_transform_2d(ASS_Outline *outline, const ASS_Outline *source,
ass_outline_free(outline);
return false;
}
outline->points[i].x = lrint(v[0]);
outline->points[i].y = lrint(v[1]);
outline->points[i].x = ass_lrint(v[0]);
outline->points[i].y = ass_lrint(v[1]);
}
memcpy(outline->segments, source->segments, source->n_segments);
outline->n_points = source->n_points;
Expand Down Expand Up @@ -457,8 +457,8 @@ bool ass_outline_transform_3d(ASS_Outline *outline, const ASS_Outline *source,
ass_outline_free(outline);
return false;
}
outline->points[i].x = lrint(v[0]);
outline->points[i].y = lrint(v[1]);
outline->points[i].x = ass_lrint(v[0]);
outline->points[i].y = ass_lrint(v[1]);
}
memcpy(outline->segments, source->segments, source->n_segments);
outline->n_points = source->n_points;
Expand All @@ -478,7 +478,7 @@ void ass_outline_update_min_transformed_x(const ASS_Outline *outline,
double x = (m[0][0] * pt[i].x + m[0][1] * pt[i].y + m[0][2]) / FFMAX(z, 0.1);
if (isnan(x))
continue;
int32_t ix = lrint(FFMINMAX(x, -OUTLINE_MAX, OUTLINE_MAX));
int32_t ix = ass_lrint(FFMINMAX(x, -OUTLINE_MAX, OUTLINE_MAX));
*min_x = FFMIN(*min_x, ix);
}
}
Expand Down
2 changes: 1 addition & 1 deletion libass/ass_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -1096,7 +1096,7 @@ void ass_process_karaoke_effects(RenderContext *state)
info->c[1] = tmp;
}
}
x = x_start + lrint((x_end - x_start) * dt);
x = x_start + ass_lrint((x_end - x_start) * dt);
}

for (GlyphInfo *info = start; info < end; info++) {
Expand Down
26 changes: 13 additions & 13 deletions libass/ass_render.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ static bool quantize_transform(double m[3][3], ASS_Vector *pos,
center[i] -= delta[i];
if (!(fabs(center[i]) < max_val))
return false;
qr[i] = lrint(center[i]);
qr[i] = ass_lrint(center[i]);
}

// Minimal bounding box z coordinate
Expand Down Expand Up @@ -602,7 +602,7 @@ static bool quantize_transform(double m[3][3], ASS_Vector *pos,
double val = m[i][j] * mul[j];
if (!(fabs(val) < max_val))
return false;
qm[i][j] = lrint(val);
qm[i][j] = ass_lrint(val);
}

// x_lim = |m_xx| * dx + |m_xy| * dy
Expand All @@ -629,7 +629,7 @@ static bool quantize_transform(double m[3][3], ASS_Vector *pos,
double val = m[2][j] * mul[j];
if (!(fabs(val) < max_val))
return false;
qm[2][j] = lrint(val);
qm[2][j] = ass_lrint(val);
}

if (first && offset) {
Expand Down Expand Up @@ -1215,17 +1215,17 @@ get_outline_glyph(RenderContext *state, GlyphInfo *info)
info->transform.scale = scale;
info->transform.offset = offset;

info->bbox.x_min = lrint(val->cbox.x_min * scale.x + offset.x);
info->bbox.y_min = lrint(val->cbox.y_min * scale.y + offset.y);
info->bbox.x_max = lrint(val->cbox.x_max * scale.x + offset.x);
info->bbox.y_max = lrint(val->cbox.y_max * scale.y + offset.y);
info->bbox.x_min = ass_lrint(val->cbox.x_min * scale.x + offset.x);
info->bbox.y_min = ass_lrint(val->cbox.y_min * scale.y + offset.y);
info->bbox.x_max = ass_lrint(val->cbox.x_max * scale.x + offset.x);
info->bbox.y_max = ass_lrint(val->cbox.y_max * scale.y + offset.y);

if (info->drawing_text.str || priv->settings.shaper == ASS_SHAPING_SIMPLE) {
info->cluster_advance.x = info->advance.x = lrint(val->advance * scale.x);
info->cluster_advance.x = info->advance.x = ass_lrint(val->advance * scale.x);
info->cluster_advance.y = info->advance.y = 0;
}
info->asc = lrint(asc * scale.y);
info->desc = lrint(desc * scale.y);
info->asc = ass_lrint(asc * scale.y);
info->desc = ass_lrint(desc * scale.y);
}

size_t ass_outline_construct(void *key, void *value, void *priv)
Expand Down Expand Up @@ -1511,8 +1511,8 @@ get_bitmap_glyph(RenderContext *state, GlyphInfo *info,
bord_y = ldexp(bord_y, k->scale_ord_y);
if (!(bord_x < OUTLINE_MAX && bord_y < OUTLINE_MAX))
return;
k->border.x = lrint(bord_x / STROKER_PRECISION);
k->border.y = lrint(bord_y / STROKER_PRECISION);
k->border.x = ass_lrint(bord_x / STROKER_PRECISION);
k->border.y = ass_lrint(bord_y / STROKER_PRECISION);
if (!k->border.x && !k->border.y) {
info->bm_o = info->bm;
return;
Expand Down Expand Up @@ -2458,7 +2458,7 @@ static int quantize_blur(double radius, int32_t *shadow_mask)
// floor(log2(x)) = frexp(x) - 1 = frexp(x / 2).
frexp((1 + radius) * (POSITION_PRECISION / 2), &ord);
*shadow_mask = ((uint32_t) 1 << ord) - 1;
return lrint(log1p(radius) / BLUR_PRECISION);
return ass_lrint(log1p(radius) / BLUR_PRECISION);
}

static double restore_blur(int qblur)
Expand Down
8 changes: 4 additions & 4 deletions libass/ass_shaper.c
Original file line number Diff line number Diff line change
Expand Up @@ -641,10 +641,10 @@ shape_harfbuzz_process_run(GlyphInfo *glyphs, hb_buffer_t *buf, int offset)
// set position and advance
info->skip = false;
info->glyph_index = glyph_info[j].codepoint;
info->offset.x = lrint(pos[j].x_offset * info->scale_x);
info->offset.y = lrint(-pos[j].y_offset * info->scale_y);
info->advance.x = lrint(pos[j].x_advance * info->scale_x);
info->advance.y = lrint(-pos[j].y_advance * info->scale_y);
info->offset.x = ass_lrint(pos[j].x_offset * info->scale_x);
info->offset.y = ass_lrint(-pos[j].y_offset * info->scale_y);
info->advance.x = ass_lrint(pos[j].x_advance * info->scale_x);
info->advance.y = ass_lrint(-pos[j].y_advance * info->scale_y);

// accumulate advance in the root glyph
root->cluster_advance.x += info->advance.x;
Expand Down
21 changes: 18 additions & 3 deletions libass/ass_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include <limits.h>
#include <math.h>

#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>
#endif

#include "ass.h"

#ifndef SIZE_MAX
Expand Down Expand Up @@ -159,6 +163,17 @@ static inline uint32_t ass_bswap32(uint32_t x)
#endif
}

static inline long ass_lrint(double x)
{
#if defined(_MSC_VER) && !defined(__clang__) && !defined(_M_ARM64EC) && (defined(_M_X64) || _M_IX86_FP == 2)
// Unlike GCC and Clang, Visual C++ can't inline lrint() without -fp:fast, so
// we provide our own implementation
return _mm_cvtsd_si32(_mm_set_sd(x));
#else
return lrint(x);
#endif
}

static inline int d6_to_int(int x)
{
return (x + 32) >> 6;
Expand Down Expand Up @@ -189,23 +204,23 @@ static inline double d6_to_double(int x)
}
static inline int double_to_d6(double x)
{
return lrint(x * 64);
return ass_lrint(x * 64);
}
static inline double d16_to_double(int x)
{
return ((double) x) / 0x10000;
}
static inline int double_to_d16(double x)
{
return lrint(x * 0x10000);
return ass_lrint(x * 0x10000);
}
static inline double d22_to_double(int x)
{
return ((double) x) / 0x400000;
}
static inline int double_to_d22(double x)
{
return lrint(x * 0x400000);
return ass_lrint(x * 0x400000);
}

static inline int32_t lshiftwrapi(int32_t i, int32_t shift)
Expand Down

0 comments on commit b13f5d1

Please sign in to comment.