Skip to content

Commit eb8f191

Browse files
committed
minor
1 parent ee18a21 commit eb8f191

File tree

3 files changed

+74
-89
lines changed

3 files changed

+74
-89
lines changed

NADE.jl

+11-36
Original file line numberDiff line numberDiff line change
@@ -190,15 +190,15 @@ function train(
190190
calc_fidelity=false,
191191
target=nothing,
192192
calc_observable=false,
193-
fidelity_path=nothing,
194193
num_samples=nothing,
195194
observable=nothing,
196195
observable_args=nothing,
197-
observable_path=nothing,
198196
early_stopping=nothing,
199197
early_stopping_args=nothing
200198
)
201199

200+
return_args = []
201+
202202
# TODO: what if train_size % batch_size != 0
203203
num_batches = Int(size(train_data, 1) / batch_size)
204204

@@ -213,11 +213,6 @@ function train(
213213
observable_stats = []
214214
end
215215

216-
# TODO
217-
#if calc_NLL
218-
# NLLs = zeros(epochs / log_every)
219-
#end
220-
221216
count = 1
222217
for ep in 1:epochs
223218
# shuffle training data
@@ -234,7 +229,6 @@ function train(
234229
println("epoch: ", ep)
235230

236231
if calc_fidelity
237-
238232
fid = fidelity(space, target)
239233
fidelities = vcat(fid, fidelities)
240234
println("Fidelity = ",fid)
@@ -275,40 +269,21 @@ function train(
275269

276270
end
277271

278-
# save NADE parameters
279-
if parameter_path != nothing
280-
@save parameter_path*".jld2" θ
281-
else
282-
@save "NADE_parameters.jld2" θ
283-
end
284-
285-
# save metrics
286-
if calc_fidelity
287-
if fidelity_path != nothing
288-
tmp = fidelity_path
289-
else
290-
tmp = "training_fidelities"
291-
end
292-
293-
open(tmp, "w") do io
294-
writedlm(io, fidelities)
295-
end
272+
if calc_fidelity
273+
push!(return_args, fidelities)
296274
end
297275

298276
if calc_observable
299-
if observable_path != nothing
300-
tmp = observable_path
301-
else
302-
tmp = "training_"*string(observable)
303-
end
304-
305-
open(tmp, "w") do io
306-
writedlm(io, observable_stats)
307-
end
277+
push!(return_args, observable_stats)
308278
end
309-
279+
return return_args
280+
310281
end
311282

283+
function save_params(path)
284+
@save path θ
285+
end
286+
312287
function generate_hilbert_space()
313288
dim = [i for i in 0:2^N-1]
314289
space = space = parse.(Int64, split(bitstring(dim[1])[end-N+1:end],""))

Theory.md

+21-6
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
## Introduction
44

5-
When algorithmically modeling ground (or thermal) states in computational / statistical physics, an important issue that plagues some algorithms is the equilibration time to generate uncorrelated samples from our model (e.g. Markov chain Monte Carlo). Typically, these algorithms that employ some form of Markov chain are required in order to avoid calculating the exponentially-scaling partition function. Algorithms that produce samples via a Markov chain are usually not desirable, albeit there are plenty of algorithms in existence wherein the equilibration time to produce uncorrelated samples from the model is relatively small. And sometimes, Markov chain methods are all we have. But, generally speaking, a model where samples can only be drawn in this manner is undesirable if alternatives are available.
5+
When algorithmically modeling ground (or thermal) states in computational / statistical physics, an important issue that plagues some algorithms is the equilibration time to generate uncorrelated samples from our model (e.g. Markov Chain Monte Carlo). Typically, these algorithms that employ some form of Markov Chain are required in order to avoid calculating the exponentially-scaling partition function. Algorithms that produce samples via a Markov Chain are usually not desirable, albeit there are plenty of algorithms in existence wherein the equilibration time to produce uncorrelated samples from the model is relatively small. And sometimes, Markov Chain methods are all we have. But, generally speaking, a model where samples can only be drawn in this manner is undesirable if alternatives are available.
66

7-
In machine learning, the Restricted Boltzmann machine (RBM) is a generative model that is burdened by a Markov-chain-like procedure to produce samples called Gibbs sampling. However, even though the RBM has this undesirable property, it has many properties that physicists and people in the machine learning community find appealing. And, in some cases, the equilibration time in the Gibbs sampling procedure is quite small.
7+
In machine learning, the Restricted Boltzmann machine (RBM) is a generative model that is burdened by a Markov Chain-like procedure to produce samples called Gibbs sampling. However, even though the RBM has this undesirable property it has many properties that physicists and people in the machine learning community find very appealing. And in some cases, the equilibration time in the Gibbs sampling procedure is quite small.
88

9-
Algorithms wherein the partition function need not be calculated, yet the probability distribution defined by the model can be directly sampled, and therefore a Markov chain is not required, are called autoregressive. There exists generative models that have this desirable property (e.g. recurrent neural networks). In this blog post, we will go through one autoregressive generative model called a neural autoregressive distribtuions estimator (NADE). Oddly enough, its network architecture stems from an RBM.
9+
Algorithms wherein the partition function need not be calculated, yet the probability distribution defined by the model can be directly/exactly sampled, and therefore a Markov Chain is not required, are called autoregressive. There exists generative models that have this desirable property (e.g. recurrent neural networks). In this blog post, we will go through one autoregressive generative model called a neural autoregressive distribtuions estimator (NADE). Oddly enough, its network architecture stems from an RBM. This blog post is based upon Refs. [1-3].
1010

1111
## An RBM as a Bayesian Network
1212

@@ -22,7 +22,7 @@ $$
2222
p(\mathbf{v}) = \frac{e^{-\sum_{\mathbf{h} \in \mathcal{H}_{\mathbf{h}}}E(\mathbf{v},h)}}{Z},
2323
$$
2424

25-
where $\mathbf{v}$ and $\mathbf{h}$ denote the visible and hidden layer of the RBM, respectively. Models that are autoregressive define a probability distribution that is the product of conditional disitributions of the $i^{\text{th}}$ visible unit given all preceeding visible units.
25+
where $\mathbf{v}$ and $\mathbf{h}$ denote the visible and hidden layer of the RBM, respectively. Models that are autoregressive define a probability distribution that is the product of conditional disitributions of the $i^{\text{th}}$ visible unit ($v_i$) given all preceeding visible units ($\mathbf{v}_{<i}$).
2626

2727
$$
2828
p_{\text{autoreg.}}(\mathbf{v}) = \prod_{i} p(v_i \vert \mathbf{v}_{<i})
@@ -34,7 +34,7 @@ $$
3434
p(\mathbf{v}) = \prod_{i} p(v_i \vert \mathbf{v}_{<i}) = \prod_{i} \frac{p(v_i, \mathbf{v}_{ \lt i})}{p(\mathbf{v}_{ \lt i})}
3535
$$
3636

37-
However, this is not tractable. If we can approximate the numerator and denominator, then there may be instances where the above expression is tractable and, therefore, we've made the RBM autoregressive.
37+
However, $p(v_i, \mathbf{v}_{ \lt i})$ nor $p(\mathbf{v}_{ \lt i})$ are tractable. If we can approximate both quantities, then there might be instances where the above expression is tractable and we've made the RBM autoregressive.
3838

3939
Consider a mean-field approach for the approximation (recall that a mean-field approximation just relates to the idea that our variables are independent, e.g. $p(a,b) = p(a)p(b)$): approximate $p(v_i \vert \mathbf{v}_{<i})$ by finding a tractable approximation for $p(v_i, \mathbf{v}_{>i}, \mathbf{h} \vert \mathbf{v}_{<i}) \approx q(v_i, \mathbf{v}_{>i}, \mathbf{h} \vert \mathbf{v}_{<i})$ such that $q(v_i \vert \mathbf{v}_{<i})$ is easily obtainable. In our mean-field approximation for $p(v_i, \mathbf{v}_{>i}, \mathbf{h} \vert \mathbf{v}_{<i})$,
4040

@@ -165,4 +165,19 @@ $$
165165
&\qquad \delta \mathbf{a} \leftarrow \delta \mathbf{a} + \delta \mathbf{h}_i \bigodot \mathbf{h}_i \bigodot (1 - \mathbf{h}_i) \\
166166
&\text{return} \qquad \delta \mathbf{b}, \delta \mathbf{c}, \delta \mathbf{W}, \delta \mathbf{U}
167167
\end{aligned}
168-
$$
168+
$$
169+
170+
## Try for yourself!
171+
172+
I have open-source code for using NADEs to do quantum state reconstruction. It is relatively new and continues to be updated with more functionality regularily. Go check it out [here](https://github.com/isaacdevlugt/GreNADE.git).
173+
174+
## References
175+
176+
[1] B. McNaughton, M. V. Milošević, A. Perali, and S. Pilati, ArXiv:2002.04292 (2020).
177+
178+
[2] H. Larochelle and I. Murray, AISTATS 15, 9 (2011).
179+
180+
[3] B. Uria, M.-A. Côté, K. Gregor, I. Murray, and H. Larochelle, ArXiv:1605.02226 (2016).
181+
182+
183+

run.jl

+42-47
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,32 @@ using DelimitedFiles
44
using Random
55
using Distributions
66
using LinearAlgebra
7+
using ArgParse
78

89
include("NADE.jl")
10+
include("postprocess.jl")
11+
12+
function parse_commandline()
13+
s = ArgParseSettings()
14+
@add_arg_table! s begin
15+
"--Nh"
16+
help = "number of hidden units"
17+
arg_type=Int
18+
"--train_path"
19+
help = "training data path"
20+
arg_type=String
21+
"--psi_path"
22+
help = "true psi path"
23+
arg_type=String
24+
end
25+
return parse_args(s)
26+
end
27+
28+
parsed_args = parse_commandline()
929

10-
train_path = "tfim1D_samples"
11-
psi_path = "tfim1D_psi"
30+
Nh = parsed_args["Nh"]
31+
train_path = parsed_args["train_path"]
32+
psi_path = parsed_args["psi_path"]
1233

1334
train_data = Int.(readdlm(train_path))
1435
true_psi = readdlm(psi_path)[:,1]
@@ -17,8 +38,8 @@ N = size(train_data,2)
1738
NADE_ID = rand(0:10000)
1839

1940
# names of files to save things to
20-
fidelity_path = "fidelities_N=$N"*"_ID=$NADE_ID"
21-
parameter_path = "parameters_N=$N"*"_ID=$NADE_ID"
41+
fidelity_path = "fidelities/fidelity_N=$N"*"_Nh=$Nh"*"_ID=$NADE_ID"
42+
parameter_path = "params/parameters_N=$N"*"_Nh=$Nh"*"_ID=$NADE_ID"
2243

2344
function fidelity_stopping(current_fid, desired_fid)
2445
if current_fid >= desired_fid
@@ -28,64 +49,38 @@ function fidelity_stopping(current_fid, desired_fid)
2849
end
2950
end
3051

31-
function observable_stopping(current_obs_stats, desired_obs)
32-
if abs(current_obs_stats[1] - desired_obs[1]) / desired_obs[1] <= desired_obs[2]
33-
return true
34-
else
35-
return false
36-
end
37-
end
38-
39-
function true_magnetization()
40-
magnetization = 0
41-
for Ket = 0:2^N-1
42-
SumSz = 0.
43-
for SpinIndex = 0:N-1
44-
Spin1 = 2*((Ket>>SpinIndex)&1) - 1
45-
SumSz += Spin1
46-
end
47-
magnetization += SumSz*SumSz*psi[Ket+1]^2
48-
end
49-
return magnetization / N
50-
end
51-
52-
function spin_flip(idx, s)
53-
s[idx] *= -1.0
54-
end
55-
56-
function magnetization(sample)
57-
sample = (sample .* 2) .- 1
58-
return sum(sample)*sum(sample) / N
59-
end
60-
6152
# Change these hyperparameters to your liking
62-
Nh = 20 # number of hidden units
63-
64-
η = 0.001
53+
η = 0.01
6554
batch_size = 100
66-
epochs = 500
67-
log_every = 10
55+
epochs = 10000
56+
log_every = 100
6857
opt = ADAM(η)
6958

7059
desired_fid = 0.995
71-
72-
#tolerance = 0.05
73-
# arguments for early_stopping function
74-
#desired_obs = (true_magnetization(), tolerance)
75-
7660
initialize_parameters(seed=9999)
7761

78-
train(
62+
args = train(
7963
train_data,
8064
batch_size=batch_size,
8165
opt=opt,
8266
epochs=epochs,
8367
calc_fidelity=true,
8468
target=true_psi,
85-
fidelity_path=fidelity_path,
8669
early_stopping=fidelity_stopping,
8770
early_stopping_args=desired_fid,
88-
log_every=1
71+
log_every=log_every
8972
)
9073

74+
fidelities = args[1]
9175

76+
if fidelities[size(fidelities,1)] >= desired_fid
77+
println("Reached desired fidelity")
78+
open(fidelity_path, "w") do io
79+
writedlm(io, fidelities)
80+
end
81+
@save parameter_path θ
82+
else
83+
println("Increasing Nh by 5")
84+
Nh += 5
85+
submit_new_job(Nh, train_path, psi_path)
86+
end

0 commit comments

Comments
 (0)