prompt_risk/smoke_test.sh at master · thomaspzollo/prompt_risk · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# smoke test with a tiny amount of data to ensure everything runs correctly

# full_chat tests
# embed
python -u -m scripts.generate_outputs \
    --datasets full_chat \
    --model-name-or-path sentence-transformers/multi-qa-mpnet-base-dot-v1 \
    --num-gpus 2 \
    --n-total 20 \
    --batch-size 20 \
    --seed 42 \
    --embed

# generate with flan-t5
python -m scripts.generate_outputs \
    --datasets full_chat \
    --model-name-or-path google/flan-t5-xl \
    --num-gpus 1 \
    --print-container-logs \
    --n-total 20 \
    --num-hypotheses 2 \
    --seed 42

# eval full_chat
python -m scripts.compute_loss \
    --output-dir output \
    --datasets full_chat \
    --loss-fn weqweasdas/hh_rlhf_rm_open_llama_3b \
    --batch-size 30 \
    --eval-models google/flan-t5-xl

# red_team_chat tests
# embed
python -u -m scripts.generate_outputs \
    --datasets red_team_chat \
    --model-name-or-path sentence-transformers/multi-qa-mpnet-base-dot-v1 \
    --num-gpus 2 \
    --n-total 20 \
    --batch-size 20 \
    --seed 42 \
    --embed

# generate with flan-t5
python -m scripts.generate_outputs \
    --datasets red_team_chat \
    --model-name-or-path google/flan-t5-xl \
    --num-gpus 1 \
    --print-container-logs \
    --n-total 20 \
    --num-hypotheses 2 \
    --seed 42

# eval red_team_chat
python -m scripts.compute_loss \
    --output-dir output \
    --datasets red_team_chat \
    --loss-fn weqweasdas/hh_rlhf_rm_open_llama_3b \
    --batch-size 30 \
    --eval-models google/flan-t5-xl

# mbpp tests
python -m scripts.generate_outputs \
    --datasets mbpp \
    --model-name-or-path codellama/CodeLlama-7b-Instruct-hf \
    --num-gpus 1 \
    --print-container-logs \
    --n-total 20 \
    --num-hypotheses 2 \
    --num-return-sequences 2 \
    --seed 42 \
    --do-sample

# eval mbpp
python -m scripts.compute_loss \
    --output-dir output \
    --datasets mbpp \
    --loss-fn pass@k

# meqsum tests
python -m scripts.generate_outputs \
    --datasets bigbio/meqsum \
    --model-name-or-path tiiuae/falcon-7b-instruct \
    --num-gpus 1 \
    --print-container-logs \
    --n-total 20 \
    --num-hypotheses 2 \
    --seed 42

# eval meqsum
python -m scripts.compute_loss \
    --output-dir output \
    --datasets bigbio/meqsum \
    --loss-fn rouge

# cnn_dailymail tests
# embed
python -u -m scripts.generate_outputs \
    --datasets cnn_dailymail \
    --model-name-or-path sentence-transformers/multi-qa-mpnet-base-dot-v1 \
    --num-gpus 2 \
    --n-total 20 \
    --batch-size 20 \
    --seed 42 \
    --embed

# generate with llama 2
python -m scripts.generate_outputs \
    --datasets cnn_dailymail \
    --model-name-or-path meta-llama/Llama-2-7b-chat-hf \
    --num-gpus 1 \
    --print-container-logs \
    --n-total 20 \
    --num-hypotheses 2 \
    --seed 42

# eval cnn_dailymail
python -m scripts.compute_loss \
    --output-dir output \
    --datasets cnn_dailymail \
    --loss-fn rouge

# xsum tests
# embed
python -u -m scripts.generate_outputs \
    --datasets xsum \
    --model-name-or-path sentence-transformers/multi-qa-mpnet-base-dot-v1 \
    --num-gpus 2 \
    --n-total 20 \
    --batch-size 20 \
    --seed 42 \
    --embed

# generate with llama 2
python -m scripts.generate_outputs \
    --datasets xsum \
    --model-name-or-path meta-llama/Llama-2-7b-chat-hf \
    --num-gpus 1 \
    --print-container-logs \
    --n-total 20 \
    --num-hypotheses 2 \
    --seed 42

# eval xsum
python -m scripts.compute_loss \
    --output-dir output \
    --datasets xsum \
    --loss-fn rouge