Skip to content

Commit

Permalink
Qm7x dataset (#216)
Browse files Browse the repository at this point in the history
* qm7example rebased with updated capabiltiies from the main

* qm7x example updated

* qm7x example updated

* coordinates added in data.x

* black formatting of train.py

* option to normalize energy by number of atoms added

* qm7x example updated with correct variable name

* energy_per_atoms boolean variable added

* write only ADIOS format as default

* qm7 dataset fixed

* black formatting fixed

---------

Co-authored-by: Massimiliano Lupo Pasini <[email protected]>
Co-authored-by: Massimiliano Lupo Pasini <[email protected]>
Co-authored-by: Massimiliano Lupo Pasini <[email protected]>
  • Loading branch information
4 people authored Mar 31, 2024
1 parent 89d4881 commit a1f1c43
Show file tree
Hide file tree
Showing 4 changed files with 833 additions and 0 deletions.
222 changes: 222 additions & 0 deletions examples/qm7x/inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
##############################################################################
# Copyright (c) 2021, Oak Ridge National Laboratory #
# All rights reserved. #
# #
# This file is part of HydraGNN and is distributed under a BSD 3-clause #
# license. For the licensing terms see the LICENSE file in the top-level #
# directory. #
# #
# SPDX-License-Identifier: BSD-3-Clause #
##############################################################################

import json, os
import sys
import logging
import pickle
from tqdm import tqdm
from mpi4py import MPI
import argparse

import torch
import numpy as np

import hydragnn
from hydragnn.utils.time_utils import Timer
from hydragnn.utils.distributed import get_device
from hydragnn.utils.model import load_existing_model
from hydragnn.utils.pickledataset import SimplePickleDataset
from hydragnn.utils.config_utils import (
update_config,
)
from hydragnn.models.create import create_model_config
from hydragnn.preprocess import create_dataloaders

from scipy.interpolate import griddata

try:
from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset
except ImportError:
pass

import matplotlib.pyplot as plt

plt.rcParams.update({"font.size": 16})


def get_log_name_config(config):
return (
config["NeuralNetwork"]["Architecture"]["model_type"]
+ "-r-"
+ str(config["NeuralNetwork"]["Architecture"]["radius"])
+ "-ncl-"
+ str(config["NeuralNetwork"]["Architecture"]["num_conv_layers"])
+ "-hd-"
+ str(config["NeuralNetwork"]["Architecture"]["hidden_dim"])
+ "-ne-"
+ str(config["NeuralNetwork"]["Training"]["num_epoch"])
+ "-lr-"
+ str(config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"])
+ "-bs-"
+ str(config["NeuralNetwork"]["Training"]["batch_size"])
+ "-node_ft-"
+ "".join(
str(x)
for x in config["NeuralNetwork"]["Variables_of_interest"][
"input_node_features"
]
)
+ "-task_weights-"
+ "".join(
str(weigh) + "-"
for weigh in config["NeuralNetwork"]["Architecture"]["task_weights"]
)
)


def getcolordensity(xdata, ydata):
###############################
nbin = 20
hist2d, xbins_edge, ybins_edge = np.histogram2d(x=xdata, y=ydata, bins=[nbin, nbin])
xbin_cen = 0.5 * (xbins_edge[0:-1] + xbins_edge[1:])
ybin_cen = 0.5 * (ybins_edge[0:-1] + ybins_edge[1:])
BCTY, BCTX = np.meshgrid(ybin_cen, xbin_cen)
hist2d = hist2d / np.amax(hist2d)
print(np.amax(hist2d))

bctx1d = np.reshape(BCTX, len(xbin_cen) * nbin)
bcty1d = np.reshape(BCTY, len(xbin_cen) * nbin)
loc_pts = np.zeros((len(xbin_cen) * nbin, 2))
loc_pts[:, 0] = bctx1d
loc_pts[:, 1] = bcty1d
hist2d_norm = griddata(
loc_pts,
hist2d.reshape(len(xbin_cen) * nbin),
(xdata, ydata),
method="linear",
fill_value=0,
) # np.nan)
return hist2d_norm


def info(*args, logtype="info", sep=" "):
getattr(logging, logtype)(sep.join(map(str, args)))


if __name__ == "__main__":

modelname = "qm7x"

parser = argparse.ArgumentParser()
parser.add_argument(
"--inputfile", help="input file", type=str, default="./logs/qm7x/config.json"
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--adios",
help="Adios gan_dataset",
action="store_const",
dest="format",
const="adios",
)
group.add_argument(
"--pickle",
help="Pickle gan_dataset",
action="store_const",
dest="format",
const="pickle",
)
parser.set_defaults(format="pickle")

args = parser.parse_args()

dirpwd = os.path.dirname(os.path.abspath(__file__))
input_filename = os.path.join(dirpwd, args.inputfile)
with open(input_filename, "r") as f:
config = json.load(f)
hydragnn.utils.setup_log(get_log_name_config(config))
##################################################################################################################
# Always initialize for multi-rank training.
comm_size, rank = hydragnn.utils.setup_ddp()
##################################################################################################################
comm = MPI.COMM_WORLD

datasetname = "qm7x"

comm.Barrier()

timer = Timer("load_data")
timer.start()
if args.format == "pickle":
info("Pickle load")
basedir = os.path.join(
os.path.dirname(__file__), "dataset", "%s.pickle" % modelname
)
trainset = SimplePickleDataset(
basedir=basedir,
label="trainset",
var_config=config["NeuralNetwork"]["Variables_of_interest"],
)
valset = SimplePickleDataset(
basedir=basedir,
label="valset",
var_config=config["NeuralNetwork"]["Variables_of_interest"],
)
testset = SimplePickleDataset(
basedir=basedir,
label="testset",
var_config=config["NeuralNetwork"]["Variables_of_interest"],
)
pna_deg = trainset.pna_deg
else:
raise NotImplementedError("No supported format: %s" % (args.format))

model = create_model_config(
config=config["NeuralNetwork"],
verbosity=config["Verbosity"]["level"],
)

model = torch.nn.parallel.DistributedDataParallel(model)

load_existing_model(model, modelname, path="./logs/")
model.eval()

variable_index = 0
for output_name, output_type, output_dim in zip(
config["NeuralNetwork"]["Variables_of_interest"]["output_names"],
config["NeuralNetwork"]["Variables_of_interest"]["type"],
config["NeuralNetwork"]["Variables_of_interest"]["output_dim"],
):

test_MAE = 0.0

num_samples = len(testset)
true_values = []
predicted_values = []

for data_id, data in enumerate(tqdm(testset)):
predicted = model(data.to(get_device()))
predicted = predicted[variable_index].flatten()
start = data.y_loc[0][variable_index].item()
end = data.y_loc[0][variable_index + 1].item()
true = data.y[start:end, 0]
test_MAE += torch.norm(predicted - true, p=1).item() / len(testset)
predicted_values.extend(predicted.tolist())
true_values.extend(true.tolist())

hist2d_norm = getcolordensity(true_values, predicted_values)

fig, ax = plt.subplots()
plt.scatter(true_values, predicted_values, s=8, c=hist2d_norm, vmin=0, vmax=1)
plt.clim(0, 1)
ax.plot(ax.get_xlim(), ax.get_xlim(), ls="--", color="red")
plt.colorbar()
plt.xlabel("True values")
plt.ylabel("Predicted values")
plt.title(f"{output_name}")
plt.draw()
plt.tight_layout()
plt.savefig(f"./{output_name}_Scatterplot" + ".png", dpi=400)

print(f"Test MAE {output_name}: ", test_MAE)

variable_index += 1
67 changes: 67 additions & 0 deletions examples/qm7x/qm7x.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"Verbosity": {
"level": 2
},
"NeuralNetwork": {
"Architecture": {
"model_type": "EGNN",
"edge_features": ["bond_length"],
"equivariance": true,
"max_neighbours": 20,
"num_gaussians": 50,
"num_filters": 50,
"envelope_exponent": 5,
"int_emb_size": 64,
"basis_emb_size": 8,
"out_emb_size": 128,
"num_after_skip": 2,
"num_before_skip": 1,
"num_radial": 6,
"num_spherical": 7,
"radius": 5,
"hidden_dim": 200,
"num_conv_layers": 6,
"output_heads": {
"graph": {
"num_sharedlayers": 2,
"dim_sharedlayers": 200,
"num_headlayers": 2,
"dim_headlayers": [
1000,
1000
]
},
"node": {
"num_headlayers": 2,
"dim_headlayers": [1000,1000],
"type": "mlp"
}
},
"task_weights": [
1, 1, 1, 1, 1
]
},
"Variables_of_interest": {
"input_node_features": [0, 1, 2, 3],
"output_index": [
0, 1, 2, 3, 4
],
"type": [
"graph", "node", "node", "node", "node"
],
"output_dim": [1, 3, 1, 1, 1],
"output_names": ["HLGAP", "forces", "hCHG", "hVDIP", "hRAT"],
"denormalize_output": false
},
"Training": {
"Checkpoint" : true,
"num_epoch": 20,
"batch_size": 32,
"continue": 1,
"startfrom": "/gpfs/alpine/lrn026/world-shared/HydraGNN_Max_QM7X/HydraGNN/logs/qm7x_fullx/qm7x_fullx",
"Optimizer": {
"learning_rate": 0.001
}
}
}
}
65 changes: 65 additions & 0 deletions examples/qm7x/qm7x_single_tasking.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"Verbosity": {
"level": 2
},
"NeuralNetwork": {
"Architecture": {
"model_type": "EGNN",
"edge_features": ["bond_length"],
"max_neighbours": 20,
"num_gaussians": 50,
"num_filters": 50,
"envelope_exponent": 5,
"int_emb_size": 64,
"basis_emb_size": 8,
"out_emb_size": 128,
"num_after_skip": 2,
"num_before_skip": 1,
"num_radial": 6,
"num_spherical": 7,
"radius": 5,
"hidden_dim": 200,
"num_conv_layers": 6,
"output_heads": {
"graph": {
"num_sharedlayers": 2,
"dim_sharedlayers": 200,
"num_headlayers": 2,
"dim_headlayers": [
1000,
1000
]
},
"node": {
"num_headlayers": 2,
"dim_headlayers": [1000,1000],
"type": "mlp"
}
},
"task_weights": [
1
]
},
"Variables_of_interest": {
"input_node_features": [0, 1, 2, 3],
"output_index": [
0
],
"type": [
"graph"
],
"output_dim": [1],
"output_names": ["HLGAP"],
"denormalize_output": false
},
"Training": {
"num_epoch": 3,
"batch_size": 1,
"continue": 0,
"startfrom": "existing_model",
"Optimizer": {
"learning_rate": 0.001
}
}
}
}
Loading

0 comments on commit a1f1c43

Please sign in to comment.