hardikkgupta
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎build_variables.bzl
Lines changed: 1 addition & 0 deletions b/‎build_variables.bzl
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/cpp/source/Doxyfile
Lines changed: 1 addition & 0 deletions b/‎docs/cpp/source/Doxyfile
Lines changed: 1 addition & 0 deletions
diff --git a/‎setup.py
Lines changed: 1 addition & 0 deletions b/‎setup.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/__init__.py
Lines changed: 21 additions & 0 deletions b/‎test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/__init__.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/csrc/libtorch_agnostic_kernel.cpp
Lines changed: 127 additions & 0 deletions b/‎test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/csrc/libtorch_agnostic_kernel.cpp
Lines changed: 127 additions & 0 deletions
diff --git a/‎test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/ops.py
Lines changed: 38 additions & 0 deletions b/‎test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/ops.py
Lines changed: 38 additions & 0 deletions
diff --git a/‎test/cpp_extensions/libtorch_agnostic_extension/setup.py
Lines changed: 67 additions & 0 deletions b/‎test/cpp_extensions/libtorch_agnostic_extension/setup.py
Lines changed: 67 additions & 0 deletions
diff --git a/‎test/cpp_extensions/libtorch_agnostic_extension/test/test_libtorch_agnostic.py
Lines changed: 77 additions & 0 deletions b/‎test/cpp_extensions/libtorch_agnostic_extension/test/test_libtorch_agnostic.py
Lines changed: 77 additions & 0 deletions
@@ -64,6 +64,7 @@ test/generated_type_hints_smoketest.py
 test/htmlcov
 test/cpp_extensions/install/
 test/cpp_extensions/open_registration_extension/install
+test/cpp_extensions/libtorch_agnostic_extension/install
 test/kernel.errors.txt
 third_party/build/
 third_party/nccl/
 
@@ -48,6 +48,7 @@ jit_core_headers = [
     "torch/csrc/jit/frontend/schema_type_parser.h",
     "torch/csrc/jit/frontend/error_report.h",
     "torch/csrc/jit/frontend/tree.h",
+    "torch/csrc/stable/library.h",
     "torch/custom_class.h",
     "torch/custom_class_detail.h",
     "torch/library.h",
 
@@ -67,6 +67,7 @@ INPUT                  = ../../../aten/src/ATen/ATen.h \
                          ../../../torch/csrc/jit/runtime/custom_operator.h \
                          ../../../torch/csrc/jit/serialization/import.h \
                          ../../../torch/csrc/jit/api/module.h \
+                         ../../../torch/csrc/stable/library.h \
                          ../../../torch/library.h \
                          ../../../torch/custom_class.h
 # Don't include .cpp files!
 
@@ -1274,6 +1274,7 @@ def main():
         "include/c10/xpu/impl/*.h",
         "include/torch/*.h",
         "include/torch/csrc/*.h",
+        "include/torch/csrc/stable/*.h",
         "include/torch/csrc/api/include/torch/*.h",
         "include/torch/csrc/api/include/torch/data/*.h",
         "include/torch/csrc/api/include/torch/data/dataloader/*.h",
 
@@ -0,0 +1,21 @@
+import ctypes
+from pathlib import Path
+
+import torch
+
+
+so_files = list(Path(__file__).parent.glob("_C*.so"))
+assert len(so_files) == 1, f"Expected one _C*.so file, found {len(so_files)}"
+
+# use ctypes.CDLL instead of load_library to be able to test the unload logic
+# below code is reduced from the load_library code
+with torch._ops.dl_open_guard():
+    loaded_lib = ctypes.CDLL(so_files[0])
+
+from . import ops
+
+
+__all__ = [
+    "loaded_lib",
+    "ops",
+]
@@ -0,0 +1,127 @@
+#include <torch/csrc/inductor/aoti_torch/c/shim.h>
+#include <torch/csrc/inductor/aoti_runtime/utils.h>
+#include <torch/csrc/stable/library.h>
+
+using RAIIATH = torch::aot_inductor::RAIIAtenTensorHandle;
+
+void inline sgd_math(
+  float* param_ptr,
+  float* grad_ptr,
+  float* out_ptr,
+  const float weight_decay,
+  const double lr,
+  const bool maximize,
+  int64_t size
+){
+  int64_t d = 0;
+  for (; d < size; d++) {
+    float grad_val = grad_ptr[d];
+    if (maximize) grad_val = -grad_val;
+    if (weight_decay != 0.0){
+      grad_val += param_ptr[d] * weight_decay;
+    }
+    out_ptr[d] = param_ptr[d] - grad_val * float(lr);
+  }
+}
+
+
+RAIIATH sgd_out_of_place(
+    const RAIIATH param,
+    const RAIIATH grad,
+    const float weight_decay,
+    const double lr,
+    const bool maximize) {
+
+  int64_t param_dim;
+  aoti_torch_get_dim(param.get(), &param_dim);
+
+  int64_t *param_sizes;
+  int64_t *param_strides;
+  aoti_torch_get_sizes(param.get(), &param_sizes);
+  aoti_torch_get_strides(param.get(), &param_strides);
+
+  int32_t param_dtype;
+  aoti_torch_get_dtype(param.get(), &param_dtype);
+
+  int32_t param_device_type;
+  int32_t param_device_index;
+  aoti_torch_get_device_type(param.get(), &param_device_type);
+  aoti_torch_get_device_index(param.get(), &param_device_index);
+
+  AtenTensorHandle out;
+  aoti_torch_empty_strided(param_dim, param_sizes, param_strides, param_dtype, param_device_type, param_device_index, &out);
+
+  void* param_ptr;
+  aoti_torch_get_data_ptr(param.get(), &param_ptr);
+  void* grad_ptr;
+  aoti_torch_get_data_ptr(grad.get(), &grad_ptr);
+  void* out_ptr;
+  aoti_torch_get_data_ptr(out, &out_ptr);
+
+  auto param_fp_ptr = reinterpret_cast<float*>(param_ptr);
+  auto grad_fp_ptr = reinterpret_cast<float*>(grad_ptr);
+  auto out_fp_ptr = reinterpret_cast<float*>(out_ptr);
+
+  int64_t param_numel;
+  aoti_torch_get_numel(param.get(), &param_numel);
+
+  sgd_math(
+    param_fp_ptr,
+    grad_fp_ptr,
+    out_fp_ptr,
+    weight_decay,
+    lr,
+    maximize,
+    param_numel
+  );
+
+  return RAIIATH(out);
+}
+
+
+void boxed_sgd_out_of_place(StableIValue* stack, uint64_t num_args, uint64_t num_outputs) {
+  RAIIATH param(to<AtenTensorHandle>(stack[0]));
+  RAIIATH grad(to<AtenTensorHandle>(stack[1]));
+  auto weight_decay = to<double>(stack[2]);
+  auto lr = to<double>(stack[3]);
+  auto maximize = to<bool>(stack[4]);
+
+  RAIIATH raiiath_res = sgd_out_of_place(
+    std::move(param),
+    std::move(grad),
+    float(weight_decay),
+    lr,
+    maximize);
+
+  stack[0] = from(raiiath_res.release());
+}
+
+STABLE_TORCH_LIBRARY(libtorch_agnostic, m) {
+  m.def("sgd_out_of_place(Tensor param, Tensor grad, float weight_decay, float lr, bool maximize) -> Tensor");
+}
+
+STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CPU, m) {
+  m.impl("sgd_out_of_place", &boxed_sgd_out_of_place);
+}
+
+RAIIATH identity(RAIIATH t) {
+  return std::move(t);
+}
+
+void boxed_identity(StableIValue* stack, uint64_t num_args, uint64_t num_outputs) {
+  RAIIATH t(to<AtenTensorHandle>(stack[0]));
+  RAIIATH raiiath_res = identity(std::move(t));
+  stack[0] = from(raiiath_res.release());
+}
+
+STABLE_TORCH_LIBRARY_FRAGMENT(libtorch_agnostic, m) {
+  m.def("identity(Tensor t) -> Tensor");
+}
+
+STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CUDA, m) {
+  m.impl("identity", &boxed_identity);
+}
+
+STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CPU, m) {
+  m.impl("identity", &boxed_identity);
+}
@@ -0,0 +1,38 @@
+import torch
+from torch import Tensor
+
+
+def sgd_out_of_place(param, grad, weight_decay, lr, maximize) -> Tensor:
+    """
+    Computes a single step of SGD on a single parameter Tensor with grad.
+
+    Assumes:
+    - param and grad are the same shape and are 1D.
+    - param and grad are float and on CPU
+
+    Args:
+        param: a 1D tensor of floats
+        grad: a 1D tensor of floats
+        weight_decay: a python double between 0 and 1
+        lr: a python double
+
+    Returns:
+        a 1D float Tensor the same shape as param
+
+    """
+    return torch.ops.libtorch_agnostic.sgd_out_of_place.default(
+        param, grad, weight_decay, lr, maximize
+    )
+
+
+def identity(t) -> Tensor:
+    """
+    Returns the input tensor
+
+    Args:
+        t: any Tensor
+
+    Returns:
+        a Tensor, the same as input.
+    """
+    return torch.ops.libtorch_agnostic.identity.default(t)
@@ -0,0 +1,67 @@
+import distutils.command.clean
+import shutil
+from pathlib import Path
+
+from setuptools import find_packages, setup
+
+from torch.utils.cpp_extension import BuildExtension, CppExtension
+
+
+ROOT_DIR = Path(__file__).parent
+CSRC_DIR = ROOT_DIR / "libtorch_agnostic" / "csrc"
+
+
+class clean(distutils.command.clean.clean):
+    def run(self):
+        # Run default behavior first
+        distutils.command.clean.clean.run(self)
+
+        # Remove extension
+        for path in (ROOT_DIR / "libtorch_agnostic").glob("**/*.so"):
+            path.unlink()
+        # Remove build and dist and egg-info directories
+        dirs = [
+            ROOT_DIR / "build",
+            ROOT_DIR / "dist",
+            ROOT_DIR / "libtorch_agnostic.egg-info",
+        ]
+        for path in dirs:
+            if path.exists():
+                shutil.rmtree(str(path), ignore_errors=True)
+
+
+def get_extension():
+    extra_compile_args = {
+        "cxx": ["-fdiagnostics-color=always"],
+    }
+
+    sources = list(CSRC_DIR.glob("**/*.cpp"))
+
+    return [
+        CppExtension(
+            "libtorch_agnostic._C",
+            sources=sorted(str(s) for s in sources),
+            py_limited_api=True,
+            extra_compile_args=extra_compile_args,
+            extra_link_args=[],
+        )
+    ]
+
+
+setup(
+    name="libtorch_agnostic",
+    version="0.0",
+    author="PyTorch Core Team",
+    description="Example of libtorch agnostic extension",
+    packages=find_packages(exclude=("test",)),
+    package_data={"libtorch_agnostic": ["*.dll", "*.dylib", "*.so"]},
+    install_requires=[
+        "torch",
+    ],
+    ext_modules=get_extension(),
+    cmdclass={
+        "build_ext": BuildExtension.with_options(no_python_abi_suffix=True),
+        "clean": clean,
+    },
+    options={"bdist_wheel": {"py_limited_api": "cp39"}},
+)
@@ -0,0 +1,77 @@
+# Owner(s): ["module: cpp"]
+
+import libtorch_agnostic  # noqa: F401
+
+import torch
+from torch.testing._internal.common_device_type import (
+    instantiate_device_type_tests,
+    onlyCPU,
+    onlyCUDA,
+)
+from torch.testing._internal.common_utils import run_tests, TestCase
+
+
+class TestLibtorchAgnostic(TestCase):
+    @onlyCPU
+    def test_slow_sgd(self, device):
+        param = torch.rand(5, device=device)
+        grad = torch.rand_like(param)
+        weight_decay = 0.01
+        lr = 0.001
+        maximize = False
+
+        new_param = libtorch_agnostic.ops.sgd_out_of_place(
+            param, grad, weight_decay, lr, maximize
+        )
+        torch._fused_sgd_(
+            (param,),
+            (grad,),
+            (),
+            weight_decay=weight_decay,
+            momentum=0.0,
+            lr=lr,
+            dampening=0.0,
+            nesterov=False,
+            maximize=maximize,
+            is_first_step=False,
+        )
+        self.assertEqual(new_param, param)
+
+    @onlyCUDA
+    def test_identity_does_not_hog_memory(self, device):
+        def _run_identity(prior_mem):
+            t = torch.rand(32, 32, device=device)
+            self.assertGreater(torch.cuda.memory_allocated(device), prior_mem)
+            identi_t = libtorch_agnostic.ops.identity(t)
+            assert identi_t is t
+
+        init_mem = torch.cuda.memory_allocated(device)
+
+        for _ in range(3):
+            _run_identity(init_mem)
+            curr_mem = torch.cuda.memory_allocated(device)
+            self.assertEqual(curr_mem, init_mem)
+
+    @onlyCUDA
+    def test_z_delete_torch_lib(self, device):
+        # Why the z + CUDA? THIS TEST MUST BE RUN LAST
+        # We are testing that unloading the library properly deletes the registrations, so running this test
+        # earlier will cause all other tests in this file to fail
+        lib = libtorch_agnostic.loaded_lib
+
+        # code for unloading a library inspired from
+        # https://stackoverflow.com/questions/19547084/can-i-explicitly-close-a-ctypes-cdll
+        lib_handle = lib._handle
+        lib.dlclose(lib_handle)
+
+        t = torch.tensor([-2.0, 0.5])
+        with self.assertRaises(RuntimeError):
+            libtorch_agnostic.ops.identity(
+                t
+            )  # errors as identity shouldn't be registered anymore
+
+
+instantiate_device_type_tests(TestLibtorchAgnostic, globals(), except_for=None)
+
+if __name__ == "__main__":
+    run_tests()