Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/Validate-GPU.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
-v "/home/data/cfs/.ccache:/root/.ccache" \
-v "/dev/shm:/dev/shm" \
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
-v ${{ github.workspace }}:/graphnet \
-v ${{ github.workspace }}:${{ github.workspace }} \
-e python \
-e core_index \
-e BRANCH \
Expand All @@ -73,7 +73,7 @@ jobs:
-e CACHE_DIR \
-e GITHUB_API_TOKEN \
-e CFS_DIR \
-w /graphnet --network host ${docker_image}
-w ${{ github.workspace }} --network host ${docker_image}

- name: Run check
env:
Expand Down
46 changes: 28 additions & 18 deletions graph_net/paddle/check_redundant_incrementally.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,26 @@ def main(args):
assert os.path.isdir(
args.graph_net_samples_path
), f"args.graph_net_samples_path ({args.graph_net_samples_path}) is not a directory!"

current_model_graph_hash_pathes = set()
if args.model_path:
assert os.path.isdir(
args.model_path
), f"args.model_path {args.model_path} is not a directory!"
current_model_graph_hash_pathes = set(
graph_hash_path
for model_path in get_recursively_model_pathes(args.model_path)
for graph_hash_path in [f"{model_path}/graph_hash.txt"]
)

find_redundant = False
graph_hash2graph_net_model_path = {}
for model_path in get_recursively_model_pathes(args.graph_net_samples_path):
graph_hash_path = f"{model_path}/graph_hash.txt"
if os.path.isfile(graph_hash_path):
if (
os.path.isfile(graph_hash_path)
and graph_hash_path not in current_model_graph_hash_pathes
):
graph_hash = open(graph_hash_path).read()
if graph_hash not in graph_hash2graph_net_model_path.keys():
graph_hash2graph_net_model_path[graph_hash] = [graph_hash_path]
Expand All @@ -60,29 +75,24 @@ def main(args):
print(
f"Totally {len(graph_hash2graph_net_model_path)} unique samples under {args.graph_net_samples_path}."
)
for graph_hash, graph_paths in graph_hash2graph_net_model_path.items():
if len(graph_paths) > 1:
print(f"Redundant models detected for grap_hash {graph_hash}:")
for model_path in graph_paths:
print(f" {model_path}")
assert (
not find_redundant
), f"Redundant models detected under {args.graph_net_samples_path}."

if args.model_path:
assert os.path.isdir(
args.model_path
), f"args.model_path {args.model_path} is not a directory!"
current_model_graph_hash_pathes = set(
graph_hash_path
for model_path in get_recursively_model_pathes(args.model_path)
for graph_hash_path in [f"{model_path}/graph_hash.txt"]
)
# Check whether the specified model is redundant.
for current_model_graph_hash_path in current_model_graph_hash_pathes:
graph_hash = open(current_model_graph_hash_path).read()
assert (
graph_hash not in graph_hash2graph_net_model_path
), f"Redundant models detected. old-model-path:{current_model_graph_hash_path}, new-model-path:{graph_hash2graph_net_model_path[graph_hash]}."
), f"Redundant models detected.\n\tgraph_hash:{graph_hash}, newly-added-model-path:{current_model_graph_hash_path}, existing-model-path:{graph_hash2graph_net_model_path[graph_hash]}."
else:
# Check whether there are redundant samples under samples directory.
for graph_hash, graph_paths in graph_hash2graph_net_model_path.items():
if len(graph_paths) > 1:
print(f"Redundant models detected for grap_hash {graph_hash}:")
for model_path in graph_paths:
print(f" {model_path}")
assert (
not find_redundant
), f"Redundant models detected under {args.graph_net_samples_path}."


if __name__ == "__main__":
Expand Down
3 changes: 2 additions & 1 deletion graph_net/paddle/samples_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@


def get_default_samples_directory():
return f"{os.path.dirname(graph_net.__file__)}/../paddle_samples"
graph_net_root = os.path.dirname(os.path.dirname(graph_net.__file__))
return f"{graph_net_root}/paddle_samples"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
62cc3d05adaf6e4219e2b653fec24cce7290406e2f80064a1e914ebc82570775
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"framework": "paddle",
"model_name": "ernie-search-base-dual-encoder-marco-en",
"model_name": "rocketqa-base-cross-encoder",
"num_devices_required": 1,
"num_nodes_required": 1
}
34 changes: 34 additions & 0 deletions paddle_samples/PaddleNLP/rocketqa-base-cross-encoder/input_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
class Program_weight_tensor_data_0:
name = "data_0"
shape = [1, 21]
dtype = "int64"
data = [
1,
6368,
30,
3441,
5254,
2775,
7208,
42,
1675,
6433,
7946,
4640,
31618,
7476,
34874,
1662,
4968,
36810,
9478,
42,
2,
]


class Program_weight_tensor_data_1:
name = "data_1"
shape = [1, 21]
dtype = "int64"
data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Loading