Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 37 additions & 21 deletions thicket/ncu.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ def _match_call_trace_regex(
kernel_str = kernel_match.group(1)
else:
if debug:
print(f"\tCould not match {demangled_kernel_name}")
return None, None, None, True
print(f"\tCould not match {demangled_kernel_name}\n\tWill still attempt to match with query from kernel call trace (unsafe)")
kernel_str = None
#return None, None, None, None, True

# RAJA_CUDA/Lambda_CUDA variant
instance_pattern = r"instance (\d+)"
Expand Down Expand Up @@ -79,16 +80,23 @@ def _match_kernel_str_to_cali(
raja_lambda_cuda (bool): True if RAJA_CUDA or Lambda_CUDA, False if Base_CUDA
instance_exists (bool): True if instance number exists, False if not
"""
return [
n
for n in node_set
if kernel_str in n.frame["name"]
and (
f"#{instance_num}" in n.frame["name"]
if raja_lambda_cuda and instance_exists
else True
)
]
if kernel_str:
return [
n
for n in node_set
if kernel_str in n.frame["name"]
and (
f"#{instance_num}" in n.frame["name"]
if raja_lambda_cuda and instance_exists
else True
)
]
else:
return[
n
for n in node_set
if n.frame["type"] == "kernel"
]


def _multi_match_fallback_similarity(matched_nodes, demangled_kernel_name, debug):
Expand Down Expand Up @@ -119,7 +127,7 @@ def _multi_match_fallback_similarity(matched_nodes, demangled_kernel_name, debug
return matched_node


def _build_query_from_ncu_trace(kernel_call_trace):
def _build_query_from_ncu_trace(kernel_call_trace, debug):
"""Build QueryLanguage query from an NCU kernel call trace

Arguments:
Expand Down Expand Up @@ -151,10 +159,14 @@ def _predicate_builder(kernel, is_regex=False):
query.match(".", _predicate_builder(kernel))
elif i == len(kernel_call_trace) - 1:
query.rel("*")
query.rel(".", _predicate_builder(kernel, is_regex=True))
query.rel(".", _predicate_builder(kernel, is_regex=True)).rel("*")
else:
query.rel(".", _predicate_builder(kernel))

if debug:
print(query)
print(kernel_call_trace)

return query


Expand Down Expand Up @@ -201,10 +213,11 @@ def _read_ncu(self, thicket, ncu_report_mapping, debug=False, disable_tqdm=False
ncu_hash = profile_mapping_flipped[ncu_report_mapping[ncu_report_file]]

# Relevant for kernel matching
variant = thicket.metadata.loc[ncu_hash, "variant"]
raja_lambda_cuda = (
variant.upper() == "RAJA_CUDA" or variant.upper() == "LAMBDA_CUDA"
)
# variant = thicket.metadata.loc[ncu_hash, "variant"]
# raja_lambda_cuda = (
# variant.upper() == "RAJA_CUDA" or variant.upper() == "LAMBDA_CUDA"
# )
raja_lambda_cuda = (True)

# Load file
report = ncu_report.load_report(ncu_report_file)
Expand Down Expand Up @@ -268,16 +281,19 @@ def _read_ncu(self, thicket, ncu_report_mapping, debug=False, disable_tqdm=False
continue

# Add kernel name to the end of the trace tuple
kernel_call_trace.append(kernel_str)
if kernel_str:
kernel_call_trace.append(kernel_str)

# Match ncu kernel to thicket node
matched_node = None
if demangled_kernel_name in kernel_map:
if demangled_kernel_name in kernel_map and kernel_str:
# Skip query building
matched_node = kernel_map[demangled_kernel_name]
if debug:
print(f"\tKernel already in mapping: {demangled_kernel_name}")
else: # kernel hasn't been seen yet
# Build query
query = _build_query_from_ncu_trace(kernel_call_trace)
query = _build_query_from_ncu_trace(kernel_call_trace, debug)
# Apply the query
node_set = query.apply(thicket)
# Find the correct node. This may also get the parent so we take the last one
Expand Down
Loading