Skip to content

Commit

Permalink
Remove excessive floating-point divides
Browse files Browse the repository at this point in the history
Loft the loop-invariant divide outside the hot loop, and/or
invert the variable to turn FDIV into FMUL.
  • Loading branch information
heshpdx committed Sep 3, 2024
1 parent 027ad18 commit 41faf69
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
13 changes: 8 additions & 5 deletions src/lstm/networkio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, T
int target_width = stride_map_.Size(FD_WIDTH);
int num_features = NumFeatures();
bool color = num_features == 3;
float inv_contrast = 1.0/contrast;
if (width > target_width) {
width = target_width;
}
Expand All @@ -236,11 +237,11 @@ void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, T
int f = 0;
for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
int pixel = GET_DATA_BYTE(line + x, c);
SetPixel(t, f++, pixel, black, contrast);
SetPixel(t, f++, pixel, black, inv_contrast);
}
} else {
int pixel = GET_DATA_BYTE(line, x);
SetPixel(t, 0, pixel, black, contrast);
SetPixel(t, 0, pixel, black, inv_contrast);
}
}
}
Expand All @@ -264,6 +265,7 @@ void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contras
index.AddOffset(batch, FD_BATCH);
int t = index.t();
int target_width = stride_map_.Size(FD_WIDTH);
float inv_contrast = 1.0/contrast;
if (width > target_width) {
width = target_width;
}
Expand All @@ -272,7 +274,7 @@ void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contras
for (int y = 0; y < height; ++y) {
uint32_t *line = pixGetData(pix) + wpl * y;
int pixel = GET_DATA_BYTE(line, x);
SetPixel(t, y, pixel, black, contrast);
SetPixel(t, y, pixel, black, inv_contrast);
}
}
for (; x < target_width; ++x) {
Expand All @@ -287,8 +289,9 @@ void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contras
// pixel: the value of the pixel from the image (in one channel)
// black: the pixel value to map to the lowest of the range of *this
// contrast: the range of pixel values to stretch to half the range of *this.
void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
float float_pixel = (pixel - black) / contrast - 1.0f;
// inv_contrast: one over the contrast, to save a divide
void NetworkIO::SetPixel(int t, int f, int pixel, float black, float inv_contrast) {
float float_pixel = (pixel - black) * inv_contrast - 1.0f;
if (int_mode_) {
i_[t][f] = ClipToRange<int>(IntCastRounded((INT8_MAX + 1) * float_pixel), -INT8_MAX, INT8_MAX);
} else {
Expand Down
9 changes: 6 additions & 3 deletions src/textord/pithsync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ void FPCUTPT::assign( // constructor
// half of pitch
int16_t half_pitch = pitch / 2 - 1;
uint32_t lead_flag; // new flag
float inv_projection_scale = 1.0/projection_scale;

if (half_pitch > 31) {
half_pitch = 31;
Expand Down Expand Up @@ -166,7 +167,7 @@ void FPCUTPT::assign( // constructor
}
}
balance_count =
static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale);
static_cast<int16_t>(balance_count * textord_balance_factor * inv_projection_scale);
}
r_index = segpt->region_index + 1;
total = segpt->mean_sum + dist;
Expand Down Expand Up @@ -221,6 +222,7 @@ void FPCUTPT::assign_cheap( // constructor
// half of pitch
int16_t half_pitch = pitch / 2 - 1;
uint32_t lead_flag; // new flag
float inv_projection_scale = 1.0/projection_scale;

if (half_pitch > 31) {
half_pitch = 31;
Expand Down Expand Up @@ -260,7 +262,7 @@ void FPCUTPT::assign_cheap( // constructor
lead_flag &= lead_flag - 1;
}
balance_count =
static_cast<int16_t>(balance_count * textord_balance_factor / projection_scale);
static_cast<int16_t>(balance_count * textord_balance_factor * projection_scale);
}
r_index = segpt->region_index + 1;
total = segpt->mean_sum + dist;
Expand Down Expand Up @@ -511,6 +513,7 @@ double check_pitch_sync3( // find segmentation
int16_t best_fake; // best fake level
int16_t best_count; // no of cuts
FPSEGPT_IT seg_it = seg_list; // output iterator
float inv_projection_scale = 1.0/projection_scale;

end = (end - start) % pitch;
if (pitch < 3) {
Expand Down Expand Up @@ -597,7 +600,7 @@ double check_pitch_sync3( // find segmentation
offset = projection->pile_count(x);
faking = true;
} else {
projection_offset = static_cast<int16_t>(projection->pile_count(x) / projection_scale);
projection_offset = static_cast<int16_t>(projection->pile_count(x) * inv_projection_scale);
if (projection_offset > offset) {
offset = projection_offset;
}
Expand Down

0 comments on commit 41faf69

Please sign in to comment.