-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* qm7example rebased with updated capabiltiies from the main * qm7x example updated * qm7x example updated * coordinates added in data.x * black formatting of train.py * option to normalize energy by number of atoms added * qm7x example updated with correct variable name * energy_per_atoms boolean variable added * write only ADIOS format as default * qm7 dataset fixed * black formatting fixed --------- Co-authored-by: Massimiliano Lupo Pasini <[email protected]> Co-authored-by: Massimiliano Lupo Pasini <[email protected]> Co-authored-by: Massimiliano Lupo Pasini <[email protected]>
- Loading branch information
1 parent
89d4881
commit a1f1c43
Showing
4 changed files
with
833 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
############################################################################## | ||
# Copyright (c) 2021, Oak Ridge National Laboratory # | ||
# All rights reserved. # | ||
# # | ||
# This file is part of HydraGNN and is distributed under a BSD 3-clause # | ||
# license. For the licensing terms see the LICENSE file in the top-level # | ||
# directory. # | ||
# # | ||
# SPDX-License-Identifier: BSD-3-Clause # | ||
############################################################################## | ||
|
||
import json, os | ||
import sys | ||
import logging | ||
import pickle | ||
from tqdm import tqdm | ||
from mpi4py import MPI | ||
import argparse | ||
|
||
import torch | ||
import numpy as np | ||
|
||
import hydragnn | ||
from hydragnn.utils.time_utils import Timer | ||
from hydragnn.utils.distributed import get_device | ||
from hydragnn.utils.model import load_existing_model | ||
from hydragnn.utils.pickledataset import SimplePickleDataset | ||
from hydragnn.utils.config_utils import ( | ||
update_config, | ||
) | ||
from hydragnn.models.create import create_model_config | ||
from hydragnn.preprocess import create_dataloaders | ||
|
||
from scipy.interpolate import griddata | ||
|
||
try: | ||
from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset | ||
except ImportError: | ||
pass | ||
|
||
import matplotlib.pyplot as plt | ||
|
||
plt.rcParams.update({"font.size": 16}) | ||
|
||
|
||
def get_log_name_config(config): | ||
return ( | ||
config["NeuralNetwork"]["Architecture"]["model_type"] | ||
+ "-r-" | ||
+ str(config["NeuralNetwork"]["Architecture"]["radius"]) | ||
+ "-ncl-" | ||
+ str(config["NeuralNetwork"]["Architecture"]["num_conv_layers"]) | ||
+ "-hd-" | ||
+ str(config["NeuralNetwork"]["Architecture"]["hidden_dim"]) | ||
+ "-ne-" | ||
+ str(config["NeuralNetwork"]["Training"]["num_epoch"]) | ||
+ "-lr-" | ||
+ str(config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"]) | ||
+ "-bs-" | ||
+ str(config["NeuralNetwork"]["Training"]["batch_size"]) | ||
+ "-node_ft-" | ||
+ "".join( | ||
str(x) | ||
for x in config["NeuralNetwork"]["Variables_of_interest"][ | ||
"input_node_features" | ||
] | ||
) | ||
+ "-task_weights-" | ||
+ "".join( | ||
str(weigh) + "-" | ||
for weigh in config["NeuralNetwork"]["Architecture"]["task_weights"] | ||
) | ||
) | ||
|
||
|
||
def getcolordensity(xdata, ydata): | ||
############################### | ||
nbin = 20 | ||
hist2d, xbins_edge, ybins_edge = np.histogram2d(x=xdata, y=ydata, bins=[nbin, nbin]) | ||
xbin_cen = 0.5 * (xbins_edge[0:-1] + xbins_edge[1:]) | ||
ybin_cen = 0.5 * (ybins_edge[0:-1] + ybins_edge[1:]) | ||
BCTY, BCTX = np.meshgrid(ybin_cen, xbin_cen) | ||
hist2d = hist2d / np.amax(hist2d) | ||
print(np.amax(hist2d)) | ||
|
||
bctx1d = np.reshape(BCTX, len(xbin_cen) * nbin) | ||
bcty1d = np.reshape(BCTY, len(xbin_cen) * nbin) | ||
loc_pts = np.zeros((len(xbin_cen) * nbin, 2)) | ||
loc_pts[:, 0] = bctx1d | ||
loc_pts[:, 1] = bcty1d | ||
hist2d_norm = griddata( | ||
loc_pts, | ||
hist2d.reshape(len(xbin_cen) * nbin), | ||
(xdata, ydata), | ||
method="linear", | ||
fill_value=0, | ||
) # np.nan) | ||
return hist2d_norm | ||
|
||
|
||
def info(*args, logtype="info", sep=" "): | ||
getattr(logging, logtype)(sep.join(map(str, args))) | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
modelname = "qm7x" | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--inputfile", help="input file", type=str, default="./logs/qm7x/config.json" | ||
) | ||
group = parser.add_mutually_exclusive_group() | ||
group.add_argument( | ||
"--adios", | ||
help="Adios gan_dataset", | ||
action="store_const", | ||
dest="format", | ||
const="adios", | ||
) | ||
group.add_argument( | ||
"--pickle", | ||
help="Pickle gan_dataset", | ||
action="store_const", | ||
dest="format", | ||
const="pickle", | ||
) | ||
parser.set_defaults(format="pickle") | ||
|
||
args = parser.parse_args() | ||
|
||
dirpwd = os.path.dirname(os.path.abspath(__file__)) | ||
input_filename = os.path.join(dirpwd, args.inputfile) | ||
with open(input_filename, "r") as f: | ||
config = json.load(f) | ||
hydragnn.utils.setup_log(get_log_name_config(config)) | ||
################################################################################################################## | ||
# Always initialize for multi-rank training. | ||
comm_size, rank = hydragnn.utils.setup_ddp() | ||
################################################################################################################## | ||
comm = MPI.COMM_WORLD | ||
|
||
datasetname = "qm7x" | ||
|
||
comm.Barrier() | ||
|
||
timer = Timer("load_data") | ||
timer.start() | ||
if args.format == "pickle": | ||
info("Pickle load") | ||
basedir = os.path.join( | ||
os.path.dirname(__file__), "dataset", "%s.pickle" % modelname | ||
) | ||
trainset = SimplePickleDataset( | ||
basedir=basedir, | ||
label="trainset", | ||
var_config=config["NeuralNetwork"]["Variables_of_interest"], | ||
) | ||
valset = SimplePickleDataset( | ||
basedir=basedir, | ||
label="valset", | ||
var_config=config["NeuralNetwork"]["Variables_of_interest"], | ||
) | ||
testset = SimplePickleDataset( | ||
basedir=basedir, | ||
label="testset", | ||
var_config=config["NeuralNetwork"]["Variables_of_interest"], | ||
) | ||
pna_deg = trainset.pna_deg | ||
else: | ||
raise NotImplementedError("No supported format: %s" % (args.format)) | ||
|
||
model = create_model_config( | ||
config=config["NeuralNetwork"], | ||
verbosity=config["Verbosity"]["level"], | ||
) | ||
|
||
model = torch.nn.parallel.DistributedDataParallel(model) | ||
|
||
load_existing_model(model, modelname, path="./logs/") | ||
model.eval() | ||
|
||
variable_index = 0 | ||
for output_name, output_type, output_dim in zip( | ||
config["NeuralNetwork"]["Variables_of_interest"]["output_names"], | ||
config["NeuralNetwork"]["Variables_of_interest"]["type"], | ||
config["NeuralNetwork"]["Variables_of_interest"]["output_dim"], | ||
): | ||
|
||
test_MAE = 0.0 | ||
|
||
num_samples = len(testset) | ||
true_values = [] | ||
predicted_values = [] | ||
|
||
for data_id, data in enumerate(tqdm(testset)): | ||
predicted = model(data.to(get_device())) | ||
predicted = predicted[variable_index].flatten() | ||
start = data.y_loc[0][variable_index].item() | ||
end = data.y_loc[0][variable_index + 1].item() | ||
true = data.y[start:end, 0] | ||
test_MAE += torch.norm(predicted - true, p=1).item() / len(testset) | ||
predicted_values.extend(predicted.tolist()) | ||
true_values.extend(true.tolist()) | ||
|
||
hist2d_norm = getcolordensity(true_values, predicted_values) | ||
|
||
fig, ax = plt.subplots() | ||
plt.scatter(true_values, predicted_values, s=8, c=hist2d_norm, vmin=0, vmax=1) | ||
plt.clim(0, 1) | ||
ax.plot(ax.get_xlim(), ax.get_xlim(), ls="--", color="red") | ||
plt.colorbar() | ||
plt.xlabel("True values") | ||
plt.ylabel("Predicted values") | ||
plt.title(f"{output_name}") | ||
plt.draw() | ||
plt.tight_layout() | ||
plt.savefig(f"./{output_name}_Scatterplot" + ".png", dpi=400) | ||
|
||
print(f"Test MAE {output_name}: ", test_MAE) | ||
|
||
variable_index += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
{ | ||
"Verbosity": { | ||
"level": 2 | ||
}, | ||
"NeuralNetwork": { | ||
"Architecture": { | ||
"model_type": "EGNN", | ||
"edge_features": ["bond_length"], | ||
"equivariance": true, | ||
"max_neighbours": 20, | ||
"num_gaussians": 50, | ||
"num_filters": 50, | ||
"envelope_exponent": 5, | ||
"int_emb_size": 64, | ||
"basis_emb_size": 8, | ||
"out_emb_size": 128, | ||
"num_after_skip": 2, | ||
"num_before_skip": 1, | ||
"num_radial": 6, | ||
"num_spherical": 7, | ||
"radius": 5, | ||
"hidden_dim": 200, | ||
"num_conv_layers": 6, | ||
"output_heads": { | ||
"graph": { | ||
"num_sharedlayers": 2, | ||
"dim_sharedlayers": 200, | ||
"num_headlayers": 2, | ||
"dim_headlayers": [ | ||
1000, | ||
1000 | ||
] | ||
}, | ||
"node": { | ||
"num_headlayers": 2, | ||
"dim_headlayers": [1000,1000], | ||
"type": "mlp" | ||
} | ||
}, | ||
"task_weights": [ | ||
1, 1, 1, 1, 1 | ||
] | ||
}, | ||
"Variables_of_interest": { | ||
"input_node_features": [0, 1, 2, 3], | ||
"output_index": [ | ||
0, 1, 2, 3, 4 | ||
], | ||
"type": [ | ||
"graph", "node", "node", "node", "node" | ||
], | ||
"output_dim": [1, 3, 1, 1, 1], | ||
"output_names": ["HLGAP", "forces", "hCHG", "hVDIP", "hRAT"], | ||
"denormalize_output": false | ||
}, | ||
"Training": { | ||
"Checkpoint" : true, | ||
"num_epoch": 20, | ||
"batch_size": 32, | ||
"continue": 1, | ||
"startfrom": "/gpfs/alpine/lrn026/world-shared/HydraGNN_Max_QM7X/HydraGNN/logs/qm7x_fullx/qm7x_fullx", | ||
"Optimizer": { | ||
"learning_rate": 0.001 | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
{ | ||
"Verbosity": { | ||
"level": 2 | ||
}, | ||
"NeuralNetwork": { | ||
"Architecture": { | ||
"model_type": "EGNN", | ||
"edge_features": ["bond_length"], | ||
"max_neighbours": 20, | ||
"num_gaussians": 50, | ||
"num_filters": 50, | ||
"envelope_exponent": 5, | ||
"int_emb_size": 64, | ||
"basis_emb_size": 8, | ||
"out_emb_size": 128, | ||
"num_after_skip": 2, | ||
"num_before_skip": 1, | ||
"num_radial": 6, | ||
"num_spherical": 7, | ||
"radius": 5, | ||
"hidden_dim": 200, | ||
"num_conv_layers": 6, | ||
"output_heads": { | ||
"graph": { | ||
"num_sharedlayers": 2, | ||
"dim_sharedlayers": 200, | ||
"num_headlayers": 2, | ||
"dim_headlayers": [ | ||
1000, | ||
1000 | ||
] | ||
}, | ||
"node": { | ||
"num_headlayers": 2, | ||
"dim_headlayers": [1000,1000], | ||
"type": "mlp" | ||
} | ||
}, | ||
"task_weights": [ | ||
1 | ||
] | ||
}, | ||
"Variables_of_interest": { | ||
"input_node_features": [0, 1, 2, 3], | ||
"output_index": [ | ||
0 | ||
], | ||
"type": [ | ||
"graph" | ||
], | ||
"output_dim": [1], | ||
"output_names": ["HLGAP"], | ||
"denormalize_output": false | ||
}, | ||
"Training": { | ||
"num_epoch": 3, | ||
"batch_size": 1, | ||
"continue": 0, | ||
"startfrom": "existing_model", | ||
"Optimizer": { | ||
"learning_rate": 0.001 | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.