-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcustom_trans.py
55 lines (45 loc) · 1.85 KB
/
custom_trans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Flatten
from keras import backend as K
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
x = layers.LayerNormalization(epsilon=1e-6)(inputs)
x = layers.MultiHeadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(x, x)
x = layers.Dropout(dropout)(x)
res = x+inputs
x = layers.LayerNormalization(epsilon=1e-6)(res)
x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation='relu')(x)
x = layers.Dropout(dropout)(x)
x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
return x+res
def build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
inputs = keras.Input(shape=input_shape)
x = inputs
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
x = layers.GlobalAveragePooling1D(data_format='channels_first')(x)
for dim in mlp_units:
x = layers.Dense(dim, activation='elu')(x)
x = layers.Dropout(mlp_dropout)(x)
outputs = layers.Dense(1, activation='linear')(x)
return keras.Model(inputs, outputs)
def lr_scheduler(epoch, lr, warmup_epochs=30, decay_epochs=100, initial_lr=1e-6, base_lr=1e-3, min_lr=5e-5):
if epoch <= warmup_epochs:
pct = epoch / warmup_epochs
return ((base_lr - initial_lr) * pct) + initial_lr
if epoch > warmup_epochs and epoch < warmup_epochs+decay_epochs:
pct = 1 - ((epoch - warmup_epochs) / decay_epochs)
return ((base_lr - min_lr) * pct) + min_lr
return min_lr
def shift(xs, n):
e = np.empty_like(xs)
if n >= 0:
e[:n] = np.nan
e[n:] = xs[:-n]
else:
e[n:] = np.nan
e[:n] = xs[-n:]
return e