Skip to content

Commit 03ec570

Browse files
damien-lemoalaxboe
authored andcommitted
fio: Introduce the log_prio option
Introduce the log_prio option to expand priority logging from just a single bit information (priority high vs low) to the full value of the priority value used to execute IOs. When this option is set, the priority value is printed as a 16-bits hexadecimal value combining the I/O priority class and priority level as defined by the ioprio_value() helper. Similarly to the log_offset option, this option does not result in actual I/O priority logging when log_avg_msec is set. This patch also fixes a problem with the IO_U_F_PRIORITY flag, namely that this flag is used to indicate that the IO is being executed with a high priority on the device while at the same time indicating how to account for the IO completion latency (high_prio clat vs low_prio clat). With the introduction of the cmdprio_class and cmdprio options, these assumptions are not necesarilly compatible anymore. These problems are addressed as follows: * The priority_bit field of struct iosample is replaced with the 16-bits priority field representing the full io_u->ioprio value. When log_prio is set, the priority field value is logged as is. When log_prio is not set, 1 is logged as the entry's priority field if the sample priority class is IOPRIO_CLASS_RT, and 0 otherwise. * IO_U_F_PRIORITY is renamed to IO_U_F_HIGH_PRIO to indicate that a job IO has the highest priority within the job context and so must be accounted as such using high_prio clat. While fio final statistics only show accounting of high vs low IO completion latency statistics, the log_prio option allows a user to perform more detailed statistical analysis of a workload using multiple different IO priorities. Signed-off-by: Damien Le Moal <[email protected]> Signed-off-by: Niklas Cassel <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 1437d63 commit 03ec570

22 files changed

+170
-71
lines changed

cconv.c

+2
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
192192
o->log_hist_coarseness = le32_to_cpu(top->log_hist_coarseness);
193193
o->log_max = le32_to_cpu(top->log_max);
194194
o->log_offset = le32_to_cpu(top->log_offset);
195+
o->log_prio = le32_to_cpu(top->log_prio);
195196
o->log_gz = le32_to_cpu(top->log_gz);
196197
o->log_gz_store = le32_to_cpu(top->log_gz_store);
197198
o->log_unix_epoch = le32_to_cpu(top->log_unix_epoch);
@@ -417,6 +418,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
417418
top->log_avg_msec = cpu_to_le32(o->log_avg_msec);
418419
top->log_max = cpu_to_le32(o->log_max);
419420
top->log_offset = cpu_to_le32(o->log_offset);
421+
top->log_prio = cpu_to_le32(o->log_prio);
420422
top->log_gz = cpu_to_le32(o->log_gz);
421423
top->log_gz_store = cpu_to_le32(o->log_gz_store);
422424
top->log_unix_epoch = cpu_to_le32(o->log_unix_epoch);

client.c

+2
Original file line numberDiff line numberDiff line change
@@ -1679,6 +1679,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
16791679
ret->log_type = le32_to_cpu(ret->log_type);
16801680
ret->compressed = le32_to_cpu(ret->compressed);
16811681
ret->log_offset = le32_to_cpu(ret->log_offset);
1682+
ret->log_prio = le32_to_cpu(ret->log_prio);
16821683
ret->log_hist_coarseness = le32_to_cpu(ret->log_hist_coarseness);
16831684

16841685
if (*store_direct)
@@ -1696,6 +1697,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
16961697
s->data.val = le64_to_cpu(s->data.val);
16971698
s->__ddir = __le32_to_cpu(s->__ddir);
16981699
s->bs = le64_to_cpu(s->bs);
1700+
s->priority = le16_to_cpu(s->priority);
16991701

17001702
if (ret->log_offset) {
17011703
struct io_sample_offset *so = (void *) s;

engines/filecreate.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ static int open_file(struct thread_data *td, struct fio_file *f)
4949
uint64_t nsec;
5050

5151
nsec = ntime_since_now(&start);
52-
add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0);
52+
add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false);
5353
}
5454

5555
return 0;

engines/filedelete.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static int delete_file(struct thread_data *td, struct fio_file *f)
5151
uint64_t nsec;
5252

5353
nsec = ntime_since_now(&start);
54-
add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0);
54+
add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false);
5555
}
5656

5757
return 0;

engines/filestat.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ static int stat_file(struct thread_data *td, struct fio_file *f)
125125
uint64_t nsec;
126126

127127
nsec = ntime_since_now(&start);
128-
add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0);
128+
add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false);
129129
}
130130

131131
return 0;

engines/io_uring.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ static void fio_ioring_prio_prep(struct thread_data *td, struct io_u *io_u)
463463
* than the priority set by "prio" and "prioclass"
464464
* options.
465465
*/
466-
io_u->flags |= IO_U_F_PRIORITY;
466+
io_u->flags |= IO_U_F_HIGH_PRIO;
467467
}
468468
} else {
469469
sqe->ioprio = td->ioprio;
@@ -474,9 +474,11 @@ static void fio_ioring_prio_prep(struct thread_data *td, struct io_u *io_u)
474474
* is higher (has a lower value) than the async IO
475475
* priority.
476476
*/
477-
io_u->flags |= IO_U_F_PRIORITY;
477+
io_u->flags |= IO_U_F_HIGH_PRIO;
478478
}
479479
}
480+
481+
io_u->ioprio = sqe->ioprio;
480482
}
481483

482484
static enum fio_q_status fio_ioring_queue(struct thread_data *td,

engines/libaio.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -215,22 +215,23 @@ static void fio_libaio_prio_prep(struct thread_data *td, struct io_u *io_u)
215215
ioprio_value(cmdprio->class[ddir], cmdprio->level[ddir]);
216216

217217
if (p && rand_between(&td->prio_state, 0, 99) < p) {
218+
io_u->ioprio = cmdprio_value;
218219
io_u->iocb.aio_reqprio = cmdprio_value;
219220
io_u->iocb.u.c.flags |= IOCB_FLAG_IOPRIO;
220221
if (!td->ioprio || cmdprio_value < td->ioprio) {
221222
/*
222223
* The async IO priority is higher (has a lower value)
223224
* than the default context priority.
224225
*/
225-
io_u->flags |= IO_U_F_PRIORITY;
226+
io_u->flags |= IO_U_F_HIGH_PRIO;
226227
}
227228
} else if (td->ioprio && td->ioprio < cmdprio_value) {
228229
/*
229230
* The IO will be executed with the default context priority,
230231
* and this priority is higher (has a lower value) than the
231232
* async IO priority.
232233
*/
233-
io_u->flags |= IO_U_F_PRIORITY;
234+
io_u->flags |= IO_U_F_HIGH_PRIO;
234235
}
235236
}
236237

eta.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ bool calc_thread_status(struct jobs_eta *je, int force)
509509
memcpy(&rate_prev_time, &now, sizeof(now));
510510
regrow_agg_logs();
511511
for_each_rw_ddir(ddir) {
512-
add_agg_sample(sample_val(je->rate[ddir]), ddir, 0, 0);
512+
add_agg_sample(sample_val(je->rate[ddir]), ddir, 0);
513513
}
514514
}
515515

fio.1

+13-2
Original file line numberDiff line numberDiff line change
@@ -3266,6 +3266,11 @@ If this is set, the iolog options will include the byte offset for the I/O
32663266
entry as well as the other data values. Defaults to 0 meaning that
32673267
offsets are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
32683268
.TP
3269+
.BI log_prio \fR=\fPbool
3270+
If this is set, the iolog options will include the I/O priority for the I/O
3271+
entry as well as the other data values. Defaults to 0 meaning that
3272+
I/O priorities are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
3273+
.TP
32693274
.BI log_compression \fR=\fPint
32703275
If this is set, fio will compress the I/O logs as it goes, to keep the
32713276
memory footprint lower. When a log reaches the specified size, that chunk is
@@ -4199,8 +4204,14 @@ The entry's `block size' is always in bytes. The `offset' is the position in byt
41994204
from the start of the file for that particular I/O. The logging of the offset can be
42004205
toggled with \fBlog_offset\fR.
42014206
.P
4202-
`Command priority` is 0 for normal priority and 1 for high priority. This is controlled
4203-
by the ioengine specific \fBcmdprio_percentage\fR.
4207+
If \fBlog_prio\fR is not set, the entry's `Command priority` is 1 for an IO executed
4208+
with the highest RT priority class (\fBprioclass\fR=1 or \fBcmdprio_class\fR=1) and 0
4209+
otherwise. This is controlled by the \fBprioclass\fR option and the ioengine specific
4210+
\fBcmdprio_percentage\fR \fBcmdprio_class\fR options. If \fBlog_prio\fR is set, the
4211+
entry's `Command priority` is the priority set for the IO, as a 16-bits hexadecimal
4212+
number with the lowest 13 bits indicating the priority value (\fBprio\fR and
4213+
\fBcmdprio\fR options) and the highest 3 bits indicating the IO priority class
4214+
(\fBprioclass\fR and \fBcmdprio_class\fR options).
42044215
.P
42054216
Fio defaults to logging every individual I/O but when windowed logging is set
42064217
through \fBlog_avg_msec\fR, either the average (by default) or the maximum

init.c

+4
Original file line numberDiff line numberDiff line change
@@ -1583,6 +1583,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
15831583
.hist_coarseness = o->log_hist_coarseness,
15841584
.log_type = IO_LOG_TYPE_LAT,
15851585
.log_offset = o->log_offset,
1586+
.log_prio = o->log_prio,
15861587
.log_gz = o->log_gz,
15871588
.log_gz_store = o->log_gz_store,
15881589
};
@@ -1616,6 +1617,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
16161617
.hist_coarseness = o->log_hist_coarseness,
16171618
.log_type = IO_LOG_TYPE_HIST,
16181619
.log_offset = o->log_offset,
1620+
.log_prio = o->log_prio,
16191621
.log_gz = o->log_gz,
16201622
.log_gz_store = o->log_gz_store,
16211623
};
@@ -1647,6 +1649,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
16471649
.hist_coarseness = o->log_hist_coarseness,
16481650
.log_type = IO_LOG_TYPE_BW,
16491651
.log_offset = o->log_offset,
1652+
.log_prio = o->log_prio,
16501653
.log_gz = o->log_gz,
16511654
.log_gz_store = o->log_gz_store,
16521655
};
@@ -1678,6 +1681,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
16781681
.hist_coarseness = o->log_hist_coarseness,
16791682
.log_type = IO_LOG_TYPE_IOPS,
16801683
.log_offset = o->log_offset,
1684+
.log_prio = o->log_prio,
16811685
.log_gz = o->log_gz,
16821686
.log_gz_store = o->log_gz_store,
16831687
};

io_u.c

+10-4
Original file line numberDiff line numberDiff line change
@@ -1595,7 +1595,7 @@ struct io_u *__get_io_u(struct thread_data *td)
15951595
assert(io_u->flags & IO_U_F_FREE);
15961596
io_u_clear(td, io_u, IO_U_F_FREE | IO_U_F_NO_FILE_PUT |
15971597
IO_U_F_TRIMMED | IO_U_F_BARRIER |
1598-
IO_U_F_VER_LIST | IO_U_F_PRIORITY);
1598+
IO_U_F_VER_LIST | IO_U_F_HIGH_PRIO);
15991599

16001600
io_u->error = 0;
16011601
io_u->acct_ddir = -1;
@@ -1799,6 +1799,10 @@ struct io_u *get_io_u(struct thread_data *td)
17991799
io_u->xfer_buf = io_u->buf;
18001800
io_u->xfer_buflen = io_u->buflen;
18011801

1802+
/*
1803+
* Remember the issuing context priority. The IO engine may change this.
1804+
*/
1805+
io_u->ioprio = td->ioprio;
18021806
out:
18031807
assert(io_u->file);
18041808
if (!td_io_prep(td, io_u)) {
@@ -1884,7 +1888,8 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
18841888
unsigned long long tnsec;
18851889

18861890
tnsec = ntime_since(&io_u->start_time, &icd->time);
1887-
add_lat_sample(td, idx, tnsec, bytes, io_u->offset, io_u_is_prio(io_u));
1891+
add_lat_sample(td, idx, tnsec, bytes, io_u->offset,
1892+
io_u->ioprio, io_u_is_high_prio(io_u));
18881893

18891894
if (td->flags & TD_F_PROFILE_OPS) {
18901895
struct prof_io_ops *ops = &td->prof_io_ops;
@@ -1905,7 +1910,8 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
19051910

19061911
if (ddir_rw(idx)) {
19071912
if (!td->o.disable_clat) {
1908-
add_clat_sample(td, idx, llnsec, bytes, io_u->offset, io_u_is_prio(io_u));
1913+
add_clat_sample(td, idx, llnsec, bytes, io_u->offset,
1914+
io_u->ioprio, io_u_is_high_prio(io_u));
19091915
io_u_mark_latency(td, llnsec);
19101916
}
19111917

@@ -2162,7 +2168,7 @@ void io_u_queued(struct thread_data *td, struct io_u *io_u)
21622168
td = td->parent;
21632169

21642170
add_slat_sample(td, io_u->ddir, slat_time, io_u->xfer_buflen,
2165-
io_u->offset, io_u_is_prio(io_u));
2171+
io_u->offset, io_u->ioprio);
21662172
}
21672173
}
21682174

io_u.h

+7-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ enum {
2121
IO_U_F_TRIMMED = 1 << 5,
2222
IO_U_F_BARRIER = 1 << 6,
2323
IO_U_F_VER_LIST = 1 << 7,
24-
IO_U_F_PRIORITY = 1 << 8,
24+
IO_U_F_HIGH_PRIO = 1 << 8,
2525
};
2626

2727
/*
@@ -46,6 +46,11 @@ struct io_u {
4646
*/
4747
unsigned short numberio;
4848

49+
/*
50+
* IO priority.
51+
*/
52+
unsigned short ioprio;
53+
4954
/*
5055
* Allocated/set buffer and length
5156
*/
@@ -188,7 +193,6 @@ static inline enum fio_ddir acct_ddir(struct io_u *io_u)
188193
td_flags_clear((td), &(io_u->flags), (val))
189194
#define io_u_set(td, io_u, val) \
190195
td_flags_set((td), &(io_u)->flags, (val))
191-
#define io_u_is_prio(io_u) \
192-
(io_u->flags & (unsigned int) IO_U_F_PRIORITY) != 0
196+
#define io_u_is_high_prio(io_u) (io_u->flags & IO_U_F_HIGH_PRIO)
193197

194198
#endif

iolog.c

+35-10
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,7 @@ void setup_log(struct io_log **log, struct log_params *p,
737737
INIT_FLIST_HEAD(&l->io_logs);
738738
l->log_type = p->log_type;
739739
l->log_offset = p->log_offset;
740+
l->log_prio = p->log_prio;
740741
l->log_gz = p->log_gz;
741742
l->log_gz_store = p->log_gz_store;
742743
l->avg_msec = p->avg_msec;
@@ -769,6 +770,8 @@ void setup_log(struct io_log **log, struct log_params *p,
769770

770771
if (l->log_offset)
771772
l->log_ddir_mask = LOG_OFFSET_SAMPLE_BIT;
773+
if (l->log_prio)
774+
l->log_ddir_mask |= LOG_PRIO_SAMPLE_BIT;
772775

773776
INIT_FLIST_HEAD(&l->chunk_list);
774777

@@ -895,33 +898,55 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
895898
void flush_samples(FILE *f, void *samples, uint64_t sample_size)
896899
{
897900
struct io_sample *s;
898-
int log_offset;
901+
int log_offset, log_prio;
899902
uint64_t i, nr_samples;
903+
unsigned int prio_val;
904+
const char *fmt;
900905

901906
if (!sample_size)
902907
return;
903908

904909
s = __get_sample(samples, 0, 0);
905910
log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
911+
log_prio = (s->__ddir & LOG_PRIO_SAMPLE_BIT) != 0;
912+
913+
if (log_offset) {
914+
if (log_prio)
915+
fmt = "%lu, %" PRId64 ", %u, %llu, %llu, 0x%04x\n";
916+
else
917+
fmt = "%lu, %" PRId64 ", %u, %llu, %llu, %u\n";
918+
} else {
919+
if (log_prio)
920+
fmt = "%lu, %" PRId64 ", %u, %llu, 0x%04x\n";
921+
else
922+
fmt = "%lu, %" PRId64 ", %u, %llu, %u\n";
923+
}
906924

907925
nr_samples = sample_size / __log_entry_sz(log_offset);
908926

909927
for (i = 0; i < nr_samples; i++) {
910928
s = __get_sample(samples, log_offset, i);
911929

930+
if (log_prio)
931+
prio_val = s->priority;
932+
else
933+
prio_val = ioprio_value_is_class_rt(s->priority);
934+
912935
if (!log_offset) {
913-
fprintf(f, "%lu, %" PRId64 ", %u, %llu, %u\n",
914-
(unsigned long) s->time,
915-
s->data.val,
916-
io_sample_ddir(s), (unsigned long long) s->bs, s->priority_bit);
936+
fprintf(f, fmt,
937+
(unsigned long) s->time,
938+
s->data.val,
939+
io_sample_ddir(s), (unsigned long long) s->bs,
940+
prio_val);
917941
} else {
918942
struct io_sample_offset *so = (void *) s;
919943

920-
fprintf(f, "%lu, %" PRId64 ", %u, %llu, %llu, %u\n",
921-
(unsigned long) s->time,
922-
s->data.val,
923-
io_sample_ddir(s), (unsigned long long) s->bs,
924-
(unsigned long long) so->offset, s->priority_bit);
944+
fprintf(f, fmt,
945+
(unsigned long) s->time,
946+
s->data.val,
947+
io_sample_ddir(s), (unsigned long long) s->bs,
948+
(unsigned long long) so->offset,
949+
prio_val);
925950
}
926951
}
927952
}

iolog.h

+14-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct io_sample {
4242
uint64_t time;
4343
union io_sample_data data;
4444
uint32_t __ddir;
45-
uint8_t priority_bit;
45+
uint16_t priority;
4646
uint64_t bs;
4747
};
4848

@@ -104,6 +104,11 @@ struct io_log {
104104
*/
105105
unsigned int log_offset;
106106

107+
/*
108+
* Log I/O priorities
109+
*/
110+
unsigned int log_prio;
111+
107112
/*
108113
* Max size of log entries before a chunk is compressed
109114
*/
@@ -145,7 +150,13 @@ struct io_log {
145150
* If the upper bit is set, then we have the offset as well
146151
*/
147152
#define LOG_OFFSET_SAMPLE_BIT 0x80000000U
148-
#define io_sample_ddir(io) ((io)->__ddir & ~LOG_OFFSET_SAMPLE_BIT)
153+
/*
154+
* If the bit following the upper bit is set, then we have the priority
155+
*/
156+
#define LOG_PRIO_SAMPLE_BIT 0x40000000U
157+
158+
#define LOG_SAMPLE_BITS (LOG_OFFSET_SAMPLE_BIT | LOG_PRIO_SAMPLE_BIT)
159+
#define io_sample_ddir(io) ((io)->__ddir & ~LOG_SAMPLE_BITS)
149160

150161
static inline void io_sample_set_ddir(struct io_log *log,
151162
struct io_sample *io,
@@ -262,6 +273,7 @@ struct log_params {
262273
int hist_coarseness;
263274
int log_type;
264275
int log_offset;
276+
int log_prio;
265277
int log_gz;
266278
int log_gz_store;
267279
int log_compress;

options.c

+10
Original file line numberDiff line numberDiff line change
@@ -4292,6 +4292,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
42924292
.category = FIO_OPT_C_LOG,
42934293
.group = FIO_OPT_G_INVALID,
42944294
},
4295+
{
4296+
.name = "log_prio",
4297+
.lname = "Log priority of IO",
4298+
.type = FIO_OPT_BOOL,
4299+
.off1 = offsetof(struct thread_options, log_prio),
4300+
.help = "Include priority value of IO for each log entry",
4301+
.def = "0",
4302+
.category = FIO_OPT_C_LOG,
4303+
.group = FIO_OPT_G_INVALID,
4304+
},
42954305
#ifdef CONFIG_ZLIB
42964306
{
42974307
.name = "log_compression",

0 commit comments

Comments
 (0)