common: replace the round() with nearbyint()

nearbyint() is 2x faster than round() in our local test.
2025-06-20 15:03:25 +00:00 · 2024-07-10 19:08:10 +09:00 · 2024-07-10 19:08:10 +09:00 · 318c76119a
commit 318c76119a
parent 74f5928e84
9 changed files with 33 additions and 33 deletions
--- a/src/loaders/lottie/tvgLottieLoader.cpp
+++ b/src/loaders/lottie/tvgLottieLoader.cpp
@ -316,7 +316,7 @@ bool LottieLoader::frame(float no)
    //This ensures that the target frame number is reached.
    frameNo *= 10000.0f;
-    frameNo = roundf(frameNo);
+    frameNo = nearbyintf(frameNo);
    frameNo *= 0.0001f;
    //Skip update if frame diff is too small.
--- a/src/loaders/lottie/tvgLottieModel.cpp
+++ b/src/loaders/lottie/tvgLottieModel.cpp
@ -160,32 +160,32 @@ uint32_t LottieGradient::populate(ColorStop& color)
        if (cidx == clast || aidx == color.input->count) break;
        if ((*color.input)[cidx] == (*color.input)[aidx]) {
            cs.offset = (*color.input)[cidx];
-            cs.r = lroundf((*color.input)[cidx + 1] * 255.0f);
+            cs.r = (uint8_t)nearbyint((*color.input)[cidx + 1] * 255.0f);
-            cs.g = lroundf((*color.input)[cidx + 2] * 255.0f);
+            cs.g = (uint8_t)nearbyint((*color.input)[cidx + 2] * 255.0f);
-            cs.b = lroundf((*color.input)[cidx + 3] * 255.0f);
+            cs.b = (uint8_t)nearbyint((*color.input)[cidx + 3] * 255.0f);
-            cs.a = lroundf((*color.input)[aidx + 1] * 255.0f);
+            cs.a = (uint8_t)nearbyint((*color.input)[aidx + 1] * 255.0f);
            cidx += 4;
            aidx += 2;
        } else if ((*color.input)[cidx] < (*color.input)[aidx]) {
            cs.offset = (*color.input)[cidx];
-            cs.r = lroundf((*color.input)[cidx + 1] * 255.0f);
+            cs.r = (uint8_t)nearbyint((*color.input)[cidx + 1] * 255.0f);
-            cs.g = lroundf((*color.input)[cidx + 2] * 255.0f);
+            cs.g = (uint8_t)nearbyint((*color.input)[cidx + 2] * 255.0f);
-            cs.b = lroundf((*color.input)[cidx + 3] * 255.0f);
+            cs.b = (uint8_t)nearbyint((*color.input)[cidx + 3] * 255.0f);
            //generate alpha value
            if (output.count > 0) {
                auto p = ((*color.input)[cidx] - output.last().offset) / ((*color.input)[aidx] - output.last().offset);
-                cs.a = mathLerp<uint8_t>(output.last().a, lroundf((*color.input)[aidx + 1] * 255.0f), p);
+                cs.a = mathLerp<uint8_t>(output.last().a, (uint8_t)nearbyint((*color.input)[aidx + 1] * 255.0f), p);
            } else cs.a = 255;
            cidx += 4;
        } else {
            cs.offset = (*color.input)[aidx];
-            cs.a = lroundf((*color.input)[aidx + 1] * 255.0f);
+            cs.a = (uint8_t)nearbyint((*color.input)[aidx + 1] * 255.0f);
            //generate color value
            if (output.count > 0) {
                auto p = ((*color.input)[aidx] - output.last().offset) / ((*color.input)[cidx] - output.last().offset);
-                cs.r = mathLerp<uint8_t>(output.last().r, lroundf((*color.input)[cidx + 1] * 255.0f), p);
+                cs.r = mathLerp<uint8_t>(output.last().r, (uint8_t)nearbyint((*color.input)[cidx + 1] * 255.0f), p);
-                cs.g = mathLerp<uint8_t>(output.last().g, lroundf((*color.input)[cidx + 2] * 255.0f), p);
+                cs.g = mathLerp<uint8_t>(output.last().g, (uint8_t)nearbyint((*color.input)[cidx + 2] * 255.0f), p);
-                cs.b = mathLerp<uint8_t>(output.last().b, lroundf((*color.input)[cidx + 3] * 255.0f), p);
+                cs.b = mathLerp<uint8_t>(output.last().b, (uint8_t)nearbyint((*color.input)[cidx + 3] * 255.0f), p);
            } else cs.r = cs.g = cs.b = 255;
            aidx += 2;
        }
@ -195,9 +195,9 @@ uint32_t LottieGradient::populate(ColorStop& color)
    //color remains
    while (cidx + 3 < clast) {
        cs.offset = (*color.input)[cidx];
-        cs.r = lroundf((*color.input)[cidx + 1] * 255.0f);
+        cs.r = (uint8_t)nearbyint((*color.input)[cidx + 1] * 255.0f);
-        cs.g = lroundf((*color.input)[cidx + 2] * 255.0f);
+        cs.g = (uint8_t)nearbyint((*color.input)[cidx + 2] * 255.0f);
-        cs.b = lroundf((*color.input)[cidx + 3] * 255.0f);
+        cs.b = (uint8_t)nearbyint((*color.input)[cidx + 3] * 255.0f);
        cs.a = (output.count > 0) ? output.last().a : 255;
        output.push(cs);
        cidx += 4;
@ -206,7 +206,7 @@ uint32_t LottieGradient::populate(ColorStop& color)
    //alpha remains
    while (aidx < color.input->count) {
        cs.offset = (*color.input)[aidx];
-        cs.a = lroundf((*color.input)[aidx + 1] * 255.0f);
+        cs.a = (uint8_t)nearbyint((*color.input)[aidx + 1] * 255.0f);
        if (output.count > 0) {
            cs.r = output.last().r;
            cs.g = output.last().g;
--- a/src/loaders/lottie/tvgLottieParser.cpp
+++ b/src/loaders/lottie/tvgLottieParser.cpp
@ -348,7 +348,7 @@ void LottieParser::getValue(RGB24& color)
    while (nextArrayValue()) {
        auto val = getFloat();
-        if (i < 3) color.rgb[i++] = int32_t(lroundf(val * 255.0f));
+        if (i < 3) color.rgb[i++] = (int32_t)nearbyint(val * 255.0f);
    }
    //TODO: color filter?
--- a/src/loaders/lottie/tvgLottieProperty.h
+++ b/src/loaders/lottie/tvgLottieProperty.h
@ -96,7 +96,7 @@ static inline RGB24 operator+(const RGB24& lhs, const RGB24& rhs)
 static inline RGB24 operator*(const RGB24& lhs, float rhs)
 {
-    return {(int32_t)lroundf(lhs.rgb[0] * rhs), (int32_t)lroundf(lhs.rgb[1] * rhs), (int32_t)lroundf(lhs.rgb[2] * rhs)};
+    return {(int32_t)nearbyint(lhs.rgb[0] * rhs), (int32_t)nearbyint(lhs.rgb[1] * rhs), (int32_t)nearbyint(lhs.rgb[2] * rhs)};
 }
--- a/src/loaders/svg/tvgSvgLoader.cpp
+++ b/src/loaders/svg/tvgSvgLoader.cpp
@ -647,9 +647,9 @@ static bool _hslToRgb(float hue, float saturation, float brightness, uint8_t* re
        }
    }
-    *red = static_cast<uint8_t>(ceil(_red * 255.0f));
+    *red = (uint8_t)nearbyint(_red * 255.0f);
-    *green = static_cast<uint8_t>(ceil(_green * 255.0f));
+    *green = (uint8_t)nearbyint(_green * 255.0f);
-    *blue = static_cast<uint8_t>(ceil(_blue * 255.0f));
+    *blue = (uint8_t)nearbyint(_blue * 255.0f);
    return true;
 }
--- a/src/renderer/gl_engine/tvgGlTessellator.cpp
+++ b/src/renderer/gl_engine/tvgGlTessellator.cpp
@ -455,8 +455,8 @@ bool Edge::intersect(Edge *other, GlPoint *point)
    double scale = 1.0 / denom;
-    point->x = std::round(static_cast<float>(top->point.x - s_number * le_b * scale));
+    point->x = nearbyintf(static_cast<float>(top->point.x - s_number * le_b * scale));
-    point->y = std::round(static_cast<float>(top->point.y + s_number * le_a * scale));
+    point->y = nearbyintf(static_cast<float>(top->point.y + s_number * le_a * scale));
    if (std::isinf(point->x) || std::isinf(point->y)) {
        return false;
--- a/src/renderer/sw_engine/tvgSwImage.cpp
+++ b/src/renderer/sw_engine/tvgSwImage.cpp
@ -114,8 +114,8 @@ bool imagePrepare(SwImage* image, const RenderMesh* mesh, const Matrix* transfor
    //Fast track: Non-transformed image but just shifted.
    if (image->direct) {
-        image->ox = -static_cast<int32_t>(round(transform->e13));
+        image->ox = -static_cast<int32_t>(nearbyint(transform->e13));
-        image->oy = -static_cast<int32_t>(round(transform->e23));
+        image->oy = -static_cast<int32_t>(nearbyint(transform->e23));
    //Figure out the scale factor by transform matrix
    } else {
        auto scaleX = sqrtf((transform->e11 * transform->e11) + (transform->e21 * transform->e21));
--- a/src/renderer/sw_engine/tvgSwMath.cpp
+++ b/src/renderer/sw_engine/tvgSwMath.cpp
@ -164,8 +164,8 @@ void mathRotate(SwPoint& pt, SwFixed angle)
    auto cosv = cosf(radian);
    auto sinv = sinf(radian);
-    pt.x = SwCoord(roundf((v.x * cosv - v.y * sinv) * 64.0f));
+    pt.x = SwCoord(nearbyint((v.x * cosv - v.y * sinv) * 64.0f));
-    pt.y = SwCoord(roundf((v.x * sinv + v.y * cosv) * 64.0f));
+    pt.y = SwCoord(nearbyint((v.x * sinv + v.y * cosv) * 64.0f));
 }
@ -309,10 +309,10 @@ bool mathUpdateOutlineBBox(const SwOutline* outline, const SwBBox& clipRegion, S
    //the rasterization region has to be rearranged.
    //https://github.com/Samsung/thorvg/issues/916
    if (fastTrack) {
-        renderRegion.min.x = static_cast<SwCoord>(round(xMin / 64.0f));
+        renderRegion.min.x = static_cast<SwCoord>(nearbyint(xMin / 64.0f));
-        renderRegion.max.x = static_cast<SwCoord>(round(xMax / 64.0f));
+        renderRegion.max.x = static_cast<SwCoord>(nearbyint(xMax / 64.0f));
-        renderRegion.min.y = static_cast<SwCoord>(round(yMin / 64.0f));
+        renderRegion.min.y = static_cast<SwCoord>(nearbyint(yMin / 64.0f));
-        renderRegion.max.y = static_cast<SwCoord>(round(yMax / 64.0f));
+        renderRegion.max.y = static_cast<SwCoord>(nearbyint(yMax / 64.0f));
    } else {
        renderRegion.min.x = xMin >> 6;
        renderRegion.max.x = (xMax + 63) >> 6;
--- a/src/renderer/sw_engine/tvgSwRaster.cpp
+++ b/src/renderer/sw_engine/tvgSwRaster.cpp
@ -675,7 +675,7 @@ static bool _rasterRle(SwSurface* surface, SwRleData* rle, uint8_t r, uint8_t g,
    auto sy = (y) * itransform->e22 + itransform->e23 - 0.49f; \
    if (sy <= -0.5f || (uint32_t)(sy + 0.5f) >= image->h) continue; \
    if (scaleMethod == _interpDownScaler) { \
-        auto my = (int32_t)round(sy); \
+        auto my = (int32_t)nearbyint(sy); \
        miny = my - (int32_t)sampleSize; \
        if (miny < 0) miny = 0; \
        maxy = my + (int32_t)sampleSize; \