common: replace the round() with nearbyint()

nearbyint() is 2x faster than round() in our local test.
This commit is contained in:
Hermet Park 2024-07-10 19:08:10 +09:00
parent 74f5928e84
commit 318c76119a
9 changed files with 33 additions and 33 deletions

View file

@ -316,7 +316,7 @@ bool LottieLoader::frame(float no)
//This ensures that the target frame number is reached. //This ensures that the target frame number is reached.
frameNo *= 10000.0f; frameNo *= 10000.0f;
frameNo = roundf(frameNo); frameNo = nearbyintf(frameNo);
frameNo *= 0.0001f; frameNo *= 0.0001f;
//Skip update if frame diff is too small. //Skip update if frame diff is too small.

View file

@ -160,32 +160,32 @@ uint32_t LottieGradient::populate(ColorStop& color)
if (cidx == clast || aidx == color.input->count) break; if (cidx == clast || aidx == color.input->count) break;
if ((*color.input)[cidx] == (*color.input)[aidx]) { if ((*color.input)[cidx] == (*color.input)[aidx]) {
cs.offset = (*color.input)[cidx]; cs.offset = (*color.input)[cidx];
cs.r = lroundf((*color.input)[cidx + 1] * 255.0f); cs.r = (uint8_t)nearbyint((*color.input)[cidx + 1] * 255.0f);
cs.g = lroundf((*color.input)[cidx + 2] * 255.0f); cs.g = (uint8_t)nearbyint((*color.input)[cidx + 2] * 255.0f);
cs.b = lroundf((*color.input)[cidx + 3] * 255.0f); cs.b = (uint8_t)nearbyint((*color.input)[cidx + 3] * 255.0f);
cs.a = lroundf((*color.input)[aidx + 1] * 255.0f); cs.a = (uint8_t)nearbyint((*color.input)[aidx + 1] * 255.0f);
cidx += 4; cidx += 4;
aidx += 2; aidx += 2;
} else if ((*color.input)[cidx] < (*color.input)[aidx]) { } else if ((*color.input)[cidx] < (*color.input)[aidx]) {
cs.offset = (*color.input)[cidx]; cs.offset = (*color.input)[cidx];
cs.r = lroundf((*color.input)[cidx + 1] * 255.0f); cs.r = (uint8_t)nearbyint((*color.input)[cidx + 1] * 255.0f);
cs.g = lroundf((*color.input)[cidx + 2] * 255.0f); cs.g = (uint8_t)nearbyint((*color.input)[cidx + 2] * 255.0f);
cs.b = lroundf((*color.input)[cidx + 3] * 255.0f); cs.b = (uint8_t)nearbyint((*color.input)[cidx + 3] * 255.0f);
//generate alpha value //generate alpha value
if (output.count > 0) { if (output.count > 0) {
auto p = ((*color.input)[cidx] - output.last().offset) / ((*color.input)[aidx] - output.last().offset); auto p = ((*color.input)[cidx] - output.last().offset) / ((*color.input)[aidx] - output.last().offset);
cs.a = mathLerp<uint8_t>(output.last().a, lroundf((*color.input)[aidx + 1] * 255.0f), p); cs.a = mathLerp<uint8_t>(output.last().a, (uint8_t)nearbyint((*color.input)[aidx + 1] * 255.0f), p);
} else cs.a = 255; } else cs.a = 255;
cidx += 4; cidx += 4;
} else { } else {
cs.offset = (*color.input)[aidx]; cs.offset = (*color.input)[aidx];
cs.a = lroundf((*color.input)[aidx + 1] * 255.0f); cs.a = (uint8_t)nearbyint((*color.input)[aidx + 1] * 255.0f);
//generate color value //generate color value
if (output.count > 0) { if (output.count > 0) {
auto p = ((*color.input)[aidx] - output.last().offset) / ((*color.input)[cidx] - output.last().offset); auto p = ((*color.input)[aidx] - output.last().offset) / ((*color.input)[cidx] - output.last().offset);
cs.r = mathLerp<uint8_t>(output.last().r, lroundf((*color.input)[cidx + 1] * 255.0f), p); cs.r = mathLerp<uint8_t>(output.last().r, (uint8_t)nearbyint((*color.input)[cidx + 1] * 255.0f), p);
cs.g = mathLerp<uint8_t>(output.last().g, lroundf((*color.input)[cidx + 2] * 255.0f), p); cs.g = mathLerp<uint8_t>(output.last().g, (uint8_t)nearbyint((*color.input)[cidx + 2] * 255.0f), p);
cs.b = mathLerp<uint8_t>(output.last().b, lroundf((*color.input)[cidx + 3] * 255.0f), p); cs.b = mathLerp<uint8_t>(output.last().b, (uint8_t)nearbyint((*color.input)[cidx + 3] * 255.0f), p);
} else cs.r = cs.g = cs.b = 255; } else cs.r = cs.g = cs.b = 255;
aidx += 2; aidx += 2;
} }
@ -195,9 +195,9 @@ uint32_t LottieGradient::populate(ColorStop& color)
//color remains //color remains
while (cidx + 3 < clast) { while (cidx + 3 < clast) {
cs.offset = (*color.input)[cidx]; cs.offset = (*color.input)[cidx];
cs.r = lroundf((*color.input)[cidx + 1] * 255.0f); cs.r = (uint8_t)nearbyint((*color.input)[cidx + 1] * 255.0f);
cs.g = lroundf((*color.input)[cidx + 2] * 255.0f); cs.g = (uint8_t)nearbyint((*color.input)[cidx + 2] * 255.0f);
cs.b = lroundf((*color.input)[cidx + 3] * 255.0f); cs.b = (uint8_t)nearbyint((*color.input)[cidx + 3] * 255.0f);
cs.a = (output.count > 0) ? output.last().a : 255; cs.a = (output.count > 0) ? output.last().a : 255;
output.push(cs); output.push(cs);
cidx += 4; cidx += 4;
@ -206,7 +206,7 @@ uint32_t LottieGradient::populate(ColorStop& color)
//alpha remains //alpha remains
while (aidx < color.input->count) { while (aidx < color.input->count) {
cs.offset = (*color.input)[aidx]; cs.offset = (*color.input)[aidx];
cs.a = lroundf((*color.input)[aidx + 1] * 255.0f); cs.a = (uint8_t)nearbyint((*color.input)[aidx + 1] * 255.0f);
if (output.count > 0) { if (output.count > 0) {
cs.r = output.last().r; cs.r = output.last().r;
cs.g = output.last().g; cs.g = output.last().g;

View file

@ -348,7 +348,7 @@ void LottieParser::getValue(RGB24& color)
while (nextArrayValue()) { while (nextArrayValue()) {
auto val = getFloat(); auto val = getFloat();
if (i < 3) color.rgb[i++] = int32_t(lroundf(val * 255.0f)); if (i < 3) color.rgb[i++] = (int32_t)nearbyint(val * 255.0f);
} }
//TODO: color filter? //TODO: color filter?

View file

@ -96,7 +96,7 @@ static inline RGB24 operator+(const RGB24& lhs, const RGB24& rhs)
static inline RGB24 operator*(const RGB24& lhs, float rhs) static inline RGB24 operator*(const RGB24& lhs, float rhs)
{ {
return {(int32_t)lroundf(lhs.rgb[0] * rhs), (int32_t)lroundf(lhs.rgb[1] * rhs), (int32_t)lroundf(lhs.rgb[2] * rhs)}; return {(int32_t)nearbyint(lhs.rgb[0] * rhs), (int32_t)nearbyint(lhs.rgb[1] * rhs), (int32_t)nearbyint(lhs.rgb[2] * rhs)};
} }

View file

@ -647,9 +647,9 @@ static bool _hslToRgb(float hue, float saturation, float brightness, uint8_t* re
} }
} }
*red = static_cast<uint8_t>(ceil(_red * 255.0f)); *red = (uint8_t)nearbyint(_red * 255.0f);
*green = static_cast<uint8_t>(ceil(_green * 255.0f)); *green = (uint8_t)nearbyint(_green * 255.0f);
*blue = static_cast<uint8_t>(ceil(_blue * 255.0f)); *blue = (uint8_t)nearbyint(_blue * 255.0f);
return true; return true;
} }

View file

@ -455,8 +455,8 @@ bool Edge::intersect(Edge *other, GlPoint *point)
double scale = 1.0 / denom; double scale = 1.0 / denom;
point->x = std::round(static_cast<float>(top->point.x - s_number * le_b * scale)); point->x = nearbyintf(static_cast<float>(top->point.x - s_number * le_b * scale));
point->y = std::round(static_cast<float>(top->point.y + s_number * le_a * scale)); point->y = nearbyintf(static_cast<float>(top->point.y + s_number * le_a * scale));
if (std::isinf(point->x) || std::isinf(point->y)) { if (std::isinf(point->x) || std::isinf(point->y)) {
return false; return false;

View file

@ -114,8 +114,8 @@ bool imagePrepare(SwImage* image, const RenderMesh* mesh, const Matrix* transfor
//Fast track: Non-transformed image but just shifted. //Fast track: Non-transformed image but just shifted.
if (image->direct) { if (image->direct) {
image->ox = -static_cast<int32_t>(round(transform->e13)); image->ox = -static_cast<int32_t>(nearbyint(transform->e13));
image->oy = -static_cast<int32_t>(round(transform->e23)); image->oy = -static_cast<int32_t>(nearbyint(transform->e23));
//Figure out the scale factor by transform matrix //Figure out the scale factor by transform matrix
} else { } else {
auto scaleX = sqrtf((transform->e11 * transform->e11) + (transform->e21 * transform->e21)); auto scaleX = sqrtf((transform->e11 * transform->e11) + (transform->e21 * transform->e21));

View file

@ -164,8 +164,8 @@ void mathRotate(SwPoint& pt, SwFixed angle)
auto cosv = cosf(radian); auto cosv = cosf(radian);
auto sinv = sinf(radian); auto sinv = sinf(radian);
pt.x = SwCoord(roundf((v.x * cosv - v.y * sinv) * 64.0f)); pt.x = SwCoord(nearbyint((v.x * cosv - v.y * sinv) * 64.0f));
pt.y = SwCoord(roundf((v.x * sinv + v.y * cosv) * 64.0f)); pt.y = SwCoord(nearbyint((v.x * sinv + v.y * cosv) * 64.0f));
} }
@ -309,10 +309,10 @@ bool mathUpdateOutlineBBox(const SwOutline* outline, const SwBBox& clipRegion, S
//the rasterization region has to be rearranged. //the rasterization region has to be rearranged.
//https://github.com/Samsung/thorvg/issues/916 //https://github.com/Samsung/thorvg/issues/916
if (fastTrack) { if (fastTrack) {
renderRegion.min.x = static_cast<SwCoord>(round(xMin / 64.0f)); renderRegion.min.x = static_cast<SwCoord>(nearbyint(xMin / 64.0f));
renderRegion.max.x = static_cast<SwCoord>(round(xMax / 64.0f)); renderRegion.max.x = static_cast<SwCoord>(nearbyint(xMax / 64.0f));
renderRegion.min.y = static_cast<SwCoord>(round(yMin / 64.0f)); renderRegion.min.y = static_cast<SwCoord>(nearbyint(yMin / 64.0f));
renderRegion.max.y = static_cast<SwCoord>(round(yMax / 64.0f)); renderRegion.max.y = static_cast<SwCoord>(nearbyint(yMax / 64.0f));
} else { } else {
renderRegion.min.x = xMin >> 6; renderRegion.min.x = xMin >> 6;
renderRegion.max.x = (xMax + 63) >> 6; renderRegion.max.x = (xMax + 63) >> 6;

View file

@ -675,7 +675,7 @@ static bool _rasterRle(SwSurface* surface, SwRleData* rle, uint8_t r, uint8_t g,
auto sy = (y) * itransform->e22 + itransform->e23 - 0.49f; \ auto sy = (y) * itransform->e22 + itransform->e23 - 0.49f; \
if (sy <= -0.5f || (uint32_t)(sy + 0.5f) >= image->h) continue; \ if (sy <= -0.5f || (uint32_t)(sy + 0.5f) >= image->h) continue; \
if (scaleMethod == _interpDownScaler) { \ if (scaleMethod == _interpDownScaler) { \
auto my = (int32_t)round(sy); \ auto my = (int32_t)nearbyint(sy); \
miny = my - (int32_t)sampleSize; \ miny = my - (int32_t)sampleSize; \
if (miny < 0) miny = 0; \ if (miny < 0) miny = 0; \
maxy = my + (int32_t)sampleSize; \ maxy = my + (int32_t)sampleSize; \