File tree Expand file tree Collapse file tree 1 file changed +21
-2
lines changed
torch/_inductor/codegen/cuda Expand file tree Collapse file tree 1 file changed +21
-2
lines changed Original file line number Diff line number Diff line change @@ -114,8 +114,19 @@ def try_import_cutlass() -> bool:
114
114
return False
115
115
116
116
117
+ @functools .lru_cache (8 )
117
118
def _normalize_cuda_arch (arch : str ) -> str :
118
- if int (arch ) >= 90 :
119
+ if int (arch ) >= 100 :
120
+ log .warning (
121
+ "Detected CUDA architecture >= 100: %s. We will generate operations with "
122
+ "GenerateSM100 (if available) and GenerateSM90. Please file an "
123
+ "issue for any problems and feedback. " ,
124
+ arch ,
125
+ )
126
+
127
+ if int (arch ) >= 100 :
128
+ return "100"
129
+ elif int (arch ) >= 90 :
119
130
return "90"
120
131
elif int (arch ) >= 80 :
121
132
return "80"
@@ -186,7 +197,15 @@ def _gen_ops_cached(arch, version) -> list[Any]:
186
197
)
187
198
manifest = cutlass_manifest .Manifest (args )
188
199
189
- if arch == "90" :
200
+ if arch == "100" :
201
+ try :
202
+ from cutlass_generator import GenerateSM100 # type: ignore[import]
203
+
204
+ GenerateSM100 (manifest , args .cuda_version )
205
+ except ImportError :
206
+ log .warning ("Cannot find GenerateSM100. Only GenerateSM90 will be used. " )
207
+ cutlass_generator .GenerateSM90 (manifest , args .cuda_version )
208
+ elif arch == "90" :
190
209
cutlass_generator .GenerateSM90 (manifest , args .cuda_version )
191
210
cutlass_generator .GenerateSM80 (manifest , args .cuda_version )
192
211
else :
You can’t perform that action at this time.
0 commit comments