@@ -1527,10 +1527,10 @@ MONAI_NAMESPACE_DEVICE { // cpu
1527
1527
iy0 = bound::index (bound1, iy0, src_Y);
1528
1528
iz0 = bound::index (bound2, iz0, src_Z);
1529
1529
1530
- // Offsets into source volume
1531
1530
offset_t o000, o100, o010, o001, o110, o011, o101, o111;
1532
1531
1533
1532
if (do_pull || do_grad || do_sgrad) {
1533
+ // Offsets into source volume
1534
1534
o000 = ix0 * src_sX + iy0 * src_sY + iz0 * src_sZ;
1535
1535
o100 = ix1 * src_sX + iy0 * src_sY + iz0 * src_sZ;
1536
1536
o010 = ix0 * src_sX + iy1 * src_sY + iz0 * src_sZ;
@@ -1539,18 +1539,20 @@ MONAI_NAMESPACE_DEVICE { // cpu
1539
1539
o011 = ix0 * src_sX + iy1 * src_sY + iz1 * src_sZ;
1540
1540
o101 = ix1 * src_sX + iy0 * src_sY + iz1 * src_sZ;
1541
1541
o111 = ix1 * src_sX + iy1 * src_sY + iz1 * src_sZ;
1542
+ } else {
1543
+ // Offsets into 'push' volume
1544
+ o000 = ix0 * out_sX + iy0 * out_sY + iz0 * out_sZ;
1545
+ o100 = ix1 * out_sX + iy0 * out_sY + iz0 * out_sZ;
1546
+ o010 = ix0 * out_sX + iy1 * out_sY + iz0 * out_sZ;
1547
+ o001 = ix0 * out_sX + iy0 * out_sY + iz1 * out_sZ;
1548
+ o110 = ix1 * out_sX + iy1 * out_sY + iz0 * out_sZ;
1549
+ o011 = ix0 * out_sX + iy1 * out_sY + iz1 * out_sZ;
1550
+ o101 = ix1 * out_sX + iy0 * out_sY + iz1 * out_sZ;
1551
+ o111 = ix1 * out_sX + iy1 * out_sY + iz1 * out_sZ;
1542
1552
}
1543
1553
1544
1554
// ~~~~~~~~~~~~~~~~~~~~~~~~~~ Grid gradient ~~~~~~~~~~~~~~~~~~~~~~~~~~
1545
1555
if (do_grad) {
1546
- o000 = ix0 * src_sX + iy0 * src_sY + iz0 * src_sZ;
1547
- o100 = ix1 * src_sX + iy0 * src_sY + iz0 * src_sZ;
1548
- o010 = ix0 * src_sX + iy1 * src_sY + iz0 * src_sZ;
1549
- o001 = ix0 * src_sX + iy0 * src_sY + iz1 * src_sZ;
1550
- o110 = ix1 * src_sX + iy1 * src_sY + iz0 * src_sZ;
1551
- o011 = ix0 * src_sX + iy1 * src_sY + iz1 * src_sZ;
1552
- o101 = ix1 * src_sX + iy0 * src_sY + iz1 * src_sZ;
1553
- o111 = ix1 * src_sX + iy1 * src_sY + iz1 * src_sZ;
1554
1556
scalar_t gx = static_cast <scalar_t >(0 );
1555
1557
scalar_t gy = static_cast <scalar_t >(0 );
1556
1558
scalar_t gz = static_cast <scalar_t >(0 );
@@ -1659,14 +1661,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
1659
1661
}
1660
1662
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Pull ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1661
1663
if (do_pull) {
1662
- o000 = ix0 * src_sX + iy0 * src_sY + iz0 * src_sZ;
1663
- o100 = ix1 * src_sX + iy0 * src_sY + iz0 * src_sZ;
1664
- o010 = ix0 * src_sX + iy1 * src_sY + iz0 * src_sZ;
1665
- o001 = ix0 * src_sX + iy0 * src_sY + iz1 * src_sZ;
1666
- o110 = ix1 * src_sX + iy1 * src_sY + iz0 * src_sZ;
1667
- o011 = ix0 * src_sX + iy1 * src_sY + iz1 * src_sZ;
1668
- o101 = ix1 * src_sX + iy0 * src_sY + iz1 * src_sZ;
1669
- o111 = ix1 * src_sX + iy1 * src_sY + iz1 * src_sZ;
1670
1664
scalar_t * out_ptr_NCXYZ = out_ptr + n * out_sN + w * out_sX + h * out_sY + d * out_sZ;
1671
1665
scalar_t * src_ptr_NC = src_ptr + n * src_sN;
1672
1666
for (offset_t c = 0 ; c < C; ++c, out_ptr_NCXYZ += out_sC, src_ptr_NC += src_sC) {
@@ -1678,14 +1672,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
1678
1672
}
1679
1673
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SGrad ~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~
1680
1674
else if (do_sgrad) {
1681
- o000 = ix0 * src_sX + iy0 * src_sY + iz0 * src_sZ;
1682
- o100 = ix1 * src_sX + iy0 * src_sY + iz0 * src_sZ;
1683
- o010 = ix0 * src_sX + iy1 * src_sY + iz0 * src_sZ;
1684
- o001 = ix0 * src_sX + iy0 * src_sY + iz1 * src_sZ;
1685
- o110 = ix1 * src_sX + iy1 * src_sY + iz0 * src_sZ;
1686
- o011 = ix0 * src_sX + iy1 * src_sY + iz1 * src_sZ;
1687
- o101 = ix1 * src_sX + iy0 * src_sY + iz1 * src_sZ;
1688
- o111 = ix1 * src_sX + iy1 * src_sY + iz1 * src_sZ;
1689
1675
scalar_t * out_ptr_NCXYZ = out_ptr + n * out_sN + w * out_sX + h * out_sY + d * out_sZ;
1690
1676
scalar_t * src_ptr_NC = src_ptr + n * src_sN;
1691
1677
@@ -1758,16 +1744,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
1758
1744
}
1759
1745
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1760
1746
else if (do_count) {
1761
- // Offsets into 'push' volume
1762
- o000 = ix0 * out_sX + iy0 * out_sY + iz0 * out_sZ;
1763
- o100 = ix1 * out_sX + iy0 * out_sY + iz0 * out_sZ;
1764
- o010 = ix0 * out_sX + iy1 * out_sY + iz0 * out_sZ;
1765
- o001 = ix0 * out_sX + iy0 * out_sY + iz1 * out_sZ;
1766
- o110 = ix1 * out_sX + iy1 * out_sY + iz0 * out_sZ;
1767
- o011 = ix0 * out_sX + iy1 * out_sY + iz1 * out_sZ;
1768
- o101 = ix1 * out_sX + iy0 * out_sY + iz1 * out_sZ;
1769
- o111 = ix1 * out_sX + iy1 * out_sY + iz1 * out_sZ;
1770
-
1771
1747
scalar_t * out_ptr_N = out_ptr + n * out_sN;
1772
1748
bound::add (out_ptr_N, o000, w000, s000);
1773
1749
bound::add (out_ptr_N, o100, w100, s100);
@@ -1822,21 +1798,23 @@ MONAI_NAMESPACE_DEVICE { // cpu
1822
1798
ix0 = bound::index (bound0, ix0, src_X);
1823
1799
iy0 = bound::index (bound1, iy0, src_Y);
1824
1800
1825
- // Offsets into source volume
1826
1801
offset_t o00, o10, o01, o11;
1827
1802
if (do_pull || do_grad || do_sgrad) {
1803
+ // Offsets into source volume
1828
1804
o00 = ix0 * src_sX + iy0 * src_sY;
1829
1805
o10 = ix1 * src_sX + iy0 * src_sY;
1830
1806
o01 = ix0 * src_sX + iy1 * src_sY;
1831
1807
o11 = ix1 * src_sX + iy1 * src_sY;
1808
+ } else {
1809
+ // Offsets into 'push' volume
1810
+ o00 = ix0 * out_sX + iy0 * out_sY;
1811
+ o10 = ix1 * out_sX + iy0 * out_sY;
1812
+ o01 = ix0 * out_sX + iy1 * out_sY;
1813
+ o11 = ix1 * out_sX + iy1 * out_sY;
1832
1814
}
1833
1815
1834
1816
// ~~~~~~~~~~~~~~~~~~~~~~~~~~ Grid gradient ~~~~~~~~~~~~~~~~~~~~~~~~~~
1835
1817
if (do_grad) {
1836
- o00 = ix0 * src_sX + iy0 * src_sY;
1837
- o10 = ix1 * src_sX + iy0 * src_sY;
1838
- o01 = ix0 * src_sX + iy1 * src_sY;
1839
- o11 = ix1 * src_sX + iy1 * src_sY;
1840
1818
scalar_t gx = static_cast <scalar_t >(0 );
1841
1819
scalar_t gy = static_cast <scalar_t >(0 );
1842
1820
scalar_t * trgt_ptr_NCXY = trgt_ptr + n * trgt_sN + w * trgt_sX + h * trgt_sY;
@@ -1895,10 +1873,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
1895
1873
}
1896
1874
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Pull ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1897
1875
if (do_pull) {
1898
- o00 = ix0 * src_sX + iy0 * src_sY;
1899
- o10 = ix1 * src_sX + iy0 * src_sY;
1900
- o01 = ix0 * src_sX + iy1 * src_sY;
1901
- o11 = ix1 * src_sX + iy1 * src_sY;
1902
1876
scalar_t * out_ptr_NCXY = out_ptr + n * out_sN + w * out_sX + h * out_sY;
1903
1877
scalar_t * src_ptr_NC = src_ptr + n * src_sN;
1904
1878
for (offset_t c = 0 ; c < C; ++c, out_ptr_NCXY += out_sC, src_ptr_NC += src_sC) {
@@ -1908,10 +1882,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
1908
1882
}
1909
1883
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SGrad ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1910
1884
else if (do_sgrad) {
1911
- o00 = ix0 * src_sX + iy0 * src_sY;
1912
- o10 = ix1 * src_sX + iy0 * src_sY;
1913
- o01 = ix0 * src_sX + iy1 * src_sY;
1914
- o11 = ix1 * src_sX + iy1 * src_sY;
1915
1885
scalar_t * out_ptr_NCXY = out_ptr + n * out_sN + w * out_sX + h * out_sY;
1916
1886
scalar_t * src_ptr_NC = src_ptr + n * src_sN;
1917
1887
@@ -1926,11 +1896,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
1926
1896
}
1927
1897
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1928
1898
else if (do_push) {
1929
- // Offsets into 'push' volume
1930
- o00 = ix0 * out_sX + iy0 * out_sY;
1931
- o10 = ix1 * out_sX + iy0 * out_sY;
1932
- o01 = ix0 * out_sX + iy1 * out_sY;
1933
- o11 = ix1 * out_sX + iy1 * out_sY;
1934
1899
scalar_t * trgt_ptr_NCXY = trgt_ptr + n * trgt_sN + w * trgt_sX + h * trgt_sY;
1935
1900
scalar_t * out_ptr_NC = out_ptr + n * out_sN;
1936
1901
if (trgt_K == 0 ) {
@@ -1960,12 +1925,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
1960
1925
}
1961
1926
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1962
1927
else if (do_count) {
1963
- // Offsets into 'push' volume
1964
- o00 = ix0 * out_sX + iy0 * out_sY;
1965
- o10 = ix1 * out_sX + iy0 * out_sY;
1966
- o01 = ix0 * out_sX + iy1 * out_sY;
1967
- o11 = ix1 * out_sX + iy1 * out_sY;
1968
-
1969
1928
scalar_t * out_ptr_N = out_ptr + n * out_sN;
1970
1929
bound::add (out_ptr_N, o00, w00, s00);
1971
1930
bound::add (out_ptr_N, o10, w10, s10);
@@ -1996,20 +1955,21 @@ MONAI_NAMESPACE_DEVICE { // cpu
1996
1955
ix1 = bound::index (bound0, ix0 + 1 , src_X);
1997
1956
ix0 = bound::index (bound0, ix0, src_X);
1998
1957
1999
- // Offsets into source volume
2000
1958
offset_t o0, o1;
2001
1959
if (do_pull || do_grad || do_sgrad) {
1960
+ // Offsets into source volume
2002
1961
o0 = ix0 * src_sX;
2003
1962
o1 = ix1 * src_sX;
1963
+ } else {
1964
+ // Offsets into 'push' volume
1965
+ o0 = ix0 * out_sX;
1966
+ o1 = ix1 * out_sX;
2004
1967
}
2005
1968
2006
1969
// ~~~~~~~~~~~~~~~~~~~~~~~~~~ Grid gradient ~~~~~~~~~~~~~~~~~~~~~~~~~~
2007
1970
if (do_grad) {
2008
1971
if (trgt_K == 0 ) {
2009
1972
// backward w.r.t. push/pull
2010
-
2011
- o0 = ix0 * src_sX;
2012
- o1 = ix1 * src_sX;
2013
1973
scalar_t gx = static_cast <scalar_t >(0 );
2014
1974
scalar_t * trgt_ptr_NCX = trgt_ptr + n * trgt_sN + w * trgt_sX;
2015
1975
scalar_t * src_ptr_NC = src_ptr + n * src_sN;
@@ -2037,8 +1997,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
2037
1997
}
2038
1998
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Pull ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2039
1999
if (do_pull) {
2040
- o0 = ix0 * src_sX;
2041
- o1 = ix1 * src_sX;
2042
2000
scalar_t * out_ptr_NCX = out_ptr + n * out_sN + w * out_sX;
2043
2001
scalar_t * src_ptr_NC = src_ptr + n * src_sN;
2044
2002
for (offset_t c = 0 ; c < C; ++c, out_ptr_NCX += out_sC, src_ptr_NC += src_sC) {
@@ -2047,8 +2005,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
2047
2005
}
2048
2006
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SGrad ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2049
2007
else if (do_sgrad) {
2050
- o0 = ix0 * src_sX;
2051
- o1 = ix1 * src_sX;
2052
2008
scalar_t * out_ptr_NCX = out_ptr + n * out_sN + w * out_sX;
2053
2009
scalar_t * src_ptr_NC = src_ptr + n * src_sN;
2054
2010
@@ -2058,9 +2014,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
2058
2014
}
2059
2015
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2060
2016
else if (do_push) {
2061
- // Offsets into 'push' volume
2062
- o0 = ix0 * out_sX;
2063
- o1 = ix1 * out_sX;
2064
2017
scalar_t * trgt_ptr_NCX = trgt_ptr + n * trgt_sN + w * trgt_sX;
2065
2018
scalar_t * out_ptr_NC = out_ptr + n * out_sN;
2066
2019
if (trgt_K == 0 ) {
@@ -2081,10 +2034,6 @@ MONAI_NAMESPACE_DEVICE { // cpu
2081
2034
}
2082
2035
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2083
2036
else if (do_count) {
2084
- // Offsets into 'push' volume
2085
- o0 = ix0 * out_sX;
2086
- o1 = ix1 * out_sX;
2087
-
2088
2037
scalar_t * out_ptr_N = out_ptr + n * out_sN;
2089
2038
bound::add (out_ptr_N, o0, w0, s0);
2090
2039
bound::add (out_ptr_N, o1, w1, s1);
0 commit comments