-
Notifications
You must be signed in to change notification settings - Fork 0
/
src_tgt.yaml
60 lines (54 loc) · 1.09 KB
/
src_tgt.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
data:
train:
path_src: data/src_tgt/src-train-bpe.txt
path_tgt: data/src_tgt/tgt-train-bpe.txt
weight: 1
valid:
path_src: data/src_tgt/src-val-bpe.txt
path_tgt: data/src_tgt/tgt-val-bpe.txt
save_data: data/src_tgt/
src_vocab: data/src_tgt/src.vocab
tgt_vocab: data/src_tgt/tgt.vocab
# General opts
save_model: data/src_tgt/train/model
save_checkpoint_steps: 1000
keep_checkpoint: 5
train_steps: 200000
valid_steps: 1000
report_every: 100
# Batching
queue_size: 10000
bucket_size: 32768
world_size: 1
gpu_ranks: 0
batch_type: "tokens"
batch_size: 4096
valid_batch_size: 8
max_generator_batches: 2
accum_count: [4]
accum_steps: [0]
# Optimization
model_dtype: "fp32"
optim: "adam"
learning_rate: 2
warmup_steps: 8000
decay_method: "noam"
adam_beta2: 0.998
max_grad_norm: 0
label_smoothing: 0.1
param_init: 0
param_init_glorot: true
normalization: "tokens"
# Model
encoder_type: transformer
decoder_type: transformer
position_encoding: true
enc_layers: 6
dec_layers: 6
heads: 8
rnn_size: 512
word_vec_size: 512
transformer_ff: 2048
dropout_steps: [0]
dropout: [0.3]
attention_dropout: [0.1]