feat: any target resolution support (#6)

Nikaidou-Shinku · Dec 26, 2023 · 00f8bf5 · 00f8bf5
1 parent 399962b
commit 00f8bf5
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -28,6 +28,8 @@ Options:
   -d, --denoise-level <DENOISE>  Denoise level (-1/0/3), -1 for conservative model [default: 0]
   -l, --lossless                 Output lossless encoded image
   -t, --tile-size <TILE>         Tile size, smaller value may reduce memory usage
+  -W, --width <WIDTH>            After Real-CUGAN, resample to target width
+  -H, --height <HEIGHT>          After Real-CUGAN, resample to target height
       --no-cache                 Disable cache, which increases runtime but reduce memory usage
   -C, --use-cpu                  Use CPU instead of GPU for inference
   -a, --alpha <ALPHA>            Please check the documentation for this option [default: 1.0]
@@ -42,13 +44,15 @@ Supported image formats: BMP, JPEG, PNG, WebP.
 - Currently only the pro model is supported.
 - Currently GPU inference only supports NVIDIA graphics cards through CUDA and cuDNN.
 - Considering the encoding speed, WebP outputs lossy compressed images by default. If you need lossless compression, please add `--lossless` or `-l`.
-- Explanation of _the alpha option_: `该值越大 AI 修复程度、痕迹越小，越模糊；alpha 越小处理越烈，越锐化，色偏（对比度、饱和度增强）越大；默认为 1.0 不调整，推荐调整区间 (0.7, 1.3)`.
 - Explanation of _the tile size option_: After specifying tile size through `--tile-size` or `-t`, the image will be divided into small blocks with a length not exceeding the tile size for inference.
   - This will **significantly reduce the memory usage**. Generally, the smaller the tile size, the smaller the memory usage will be, but at the same time **the inference time will become longer**.
   - Note that the tile size should not be too small, and it is generally recommended not to be less than 32.
   - When tile size is not specified, the entire image will be used directly for inference.
+- Explanation on the _width option_ and _height option_: If width and height are specified, after used Real-CUGAN, Lanczos3 would be used to resample to the target resolution.
+  - If only one of width and height is specified, the other one will be calculated based on the aspect ratio of the original image.
 - Explanation on _cache_: If the memory is still insufficient after adjusting the tile size, you can consider disabling the cache through `--no-cache`.
   - This will **significantly reduce the memory usage**. After disabling caching, as long as the tile size is small enough, generally 1.5GiB of video memory can handle images of any resolution.
   - Disabling caching will **significantly increase inference time**, typically to 2 to 3 times that with caching enabled.
   - This option is ignored when tile size is not specified.
+- Explanation of _the alpha option_: `该值越大 AI 修复程度、痕迹越小，越模糊；alpha 越小处理越烈，越锐化，色偏（对比度、饱和度增强）越大；默认为 1.0 不调整，推荐调整区间 (0.7, 1.3)`.
 - **PRs are welcome!**
diff --git a/README_zh.md b/README_zh.md
@@ -28,6 +28,8 @@ Options:
   -d, --denoise-level <DENOISE>  Denoise level (-1/0/3), -1 for conservative model [default: 0]
   -l, --lossless                 Output lossless encoded image
   -t, --tile-size <TILE>         Tile size, smaller value may reduce memory usage
+  -W, --width <WIDTH>            After Real-CUGAN, resample to target width
+  -H, --height <HEIGHT>          After Real-CUGAN, resample to target height
       --no-cache                 Disable cache, which increases runtime but reduce memory usage
   -C, --use-cpu                  Use CPU instead of GPU for inference
   -a, --alpha <ALPHA>            Please check the documentation for this option [default: 1.0]
@@ -42,13 +44,15 @@ Options:
 - 目前仅支持 pro 模型。
 - 目前 GPU 推理仅通过 CUDA 和 cuDNN 支持 NVIDIA 显卡。
 - 考虑到编码速度，WebP 默认输出有损压缩图片，如果你需要无损压缩，请使用 `--lossless` 或 `-l`。
-- 关于 *alpha 参数*的解释：`该值越大 AI 修复程度、痕迹越小，越模糊；alpha 越小处理越烈，越锐化，色偏（对比度、饱和度增强）越大；默认为 1.0 不调整，推荐调整区间 (0.7, 1.3)`。
 - 关于 *tile size 参数*的解释：通过 `--tile-size` 或 `-t` 指定 tile size 后，图片将切分成长宽不超过 tile size 的小块进行推理。
   - 这样做会**显著减少显存占用**，一般 tile size 越小显存占用也越小，但同时**推理时间将会变长**。
   - 注意 tile size 不宜过小，一般建议不要小于 32。
   - 不指定 tile size 时，会直接使用整张图片进行推理。
+- 关于 *width 参数*与 *height 参数*的解释：如果指定了 width 和 height，则使用 Real-CUGAN 超分后继续使用 Lanczos3 重采样到目标分辨率。
+  - 若仅指定 width 与 height 其中之一，则另一个参数会按照原图长宽比进行计算。
 - 关于 _cache_ 的解释：如果调整 tile size 后显存仍然不足，可以考虑通过 `--no-cache` 禁用对中间结果的缓存。
   - 这样做会**显著减少显存占用**，禁用缓存后只要 tile size 足够小，一般 1.5GiB 显存可以处理任意分辨率的图片。
   - 禁用缓存将**显著增加推理时间**，一般会增加到启用缓存时的 2 到 3 倍。
   - 没有指定 tile size 时，该选项将被无视。
+- 关于 *alpha 参数*的解释：`该值越大 AI 修复程度、痕迹越小，越模糊；alpha 越小处理越烈，越锐化，色偏（对比度、饱和度增强）越大；默认为 1.0 不调整，推荐调整区间 (0.7, 1.3)`。
 - **欢迎 PR！**
diff --git a/src/cli.rs b/src/cli.rs
@@ -33,6 +33,18 @@ pub struct Cli {
   #[arg(value_name = "TILE")]
   pub tile_size: Option<usize>,
 
+  #[arg(short = 'W', long, help = "After Real-CUGAN, resample to target width")]
+  #[arg(value_name = "WIDTH")]
+  pub width: Option<usize>,
+
+  #[arg(
+    short = 'H',
+    long,
+    help = "After Real-CUGAN, resample to target height"
+  )]
+  #[arg(value_name = "HEIGHT")]
+  pub height: Option<usize>,
+
   #[arg(
     long,
     help = "Disable cache, which increases runtime but reduce memory usage"

diff --git a/src/main.rs b/src/main.rs
@@ -18,10 +18,10 @@ use setup::{setup_args, setup_tracing};
 use utils::{preprocess_alpha_channel, save_image};
 
 fn main() -> Result<(), candle_core::Error> {
-  setup_tracing();
-
   let args = Cli::parse();
 
+  setup_tracing();
+
   let output_format = match setup_args(&args) {
     Ok(res) => res,
     Err(err) => {
@@ -121,9 +121,20 @@ fn main() -> Result<(), candle_core::Error> {
     "Preprocess the image into tensor",
   );
 
-  let (target_width, target_height) = {
-    let scale: usize = args.scale.into();
-    (width * scale, height * scale)
+  let (target_width, target_height) = match (args.width, args.height) {
+    (Some(w), Some(h)) => (w, h),
+    (Some(w), None) => {
+      let h = (w * height) as f64 / width as f64;
+      (w, h.round() as usize)
+    }
+    (None, Some(h)) => {
+      let w = (h * width) as f64 / height as f64;
+      (w.round() as usize, h)
+    }
+    _ => {
+      let scale: usize = args.scale.into();
+      (width * scale, height * scale)
+    }
   };
 
   let alpha = alpha.map(|alpha| {
@@ -182,6 +193,37 @@ fn main() -> Result<(), candle_core::Error> {
   let res = ((res - 0.15)? * (255. / 0.7))?.round()?; // for pro model
   let res = res.squeeze(0)?.permute((1, 2, 0))?;
 
+  let cur_width = res.dim(1)?;
+  let cur_height = res.dim(0)?;
+
+  let res = if cur_width == target_width && cur_height == target_height {
+    tracing::info!("Skip resampling");
+    res
+  } else {
+    let mut resizer = resize::new(
+      cur_width,
+      cur_height,
+      target_width,
+      target_height,
+      Pixel::RGBF32,
+      resize::Type::Lanczos3,
+    )
+    .expect("Failed to initialize the target resizer");
+
+    let src = res.flatten_all()?.to_vec1()?;
+    drop(res);
+
+    let mut dst = vec![0.; target_width * target_height * 3];
+
+    resizer
+      .resize(src.as_rgb(), dst.as_rgb_mut())
+      .expect("Failed to resample the target image");
+
+    tracing::info!("Image resample to target");
+
+    Tensor::from_vec(dst, (target_height, target_width, 3), &device)?
+  };
+
   save_image(
     target_width,
     target_height,