[PATCH] Update to libaio?
Rusty Russell
rusty at rustcorp.com.au
Tue Jan 8 06:02:14 UTC 2008
I recently tried to use libaio (0.3.106), and discovered it didn't have
eventfd support. Or preadv/pwritev support. And the testsuite didn't
compile. Or work.
Anyway, it's shipped by the distros, so I figure it's worth patching.
I'm cc'ing Ben in the hope he's still maintaining it. If not I'll
find a home somewhere for it.
(Thanks for Jeff's feedback on the first version of this patch).
Cheers,
Rusty.
diff -r 3a023dc4e63a ChangeLog
--- a/ChangeLog Tue Jan 08 14:38:53 2008 +1100
+++ b/ChangeLog Tue Jan 08 17:00:53 2008 +1100
@@ -1,3 +1,14 @@ 0.4.0
+rusty-v1
+ - Make tests compile again on modern systems (warnings + -Werror)
+ - Add 'make partcheck' and don't require manual setup for testing.
+ - Change test harness to compile against this dir, not global install
+ - Fix 5.t for archs where PROT_WRITE mappings are readable.
+ - Allow sending of SIGXFSZ on aio over limits
+ - Explicitly specify bash for runtests.sh
+ - Put deprecating comments on never-merged io_prep_poll
+ - Add io_prep_preadv and io_prep_pwritev
+ - Add eventfd support (io_set_eventfd).
+
0.4.0
- remove libredhat-kernel
- add rough outline for man pages
diff -r 3a023dc4e63a Makefile
--- a/Makefile Tue Jan 08 14:38:53 2008 +1100
+++ b/Makefile Tue Jan 08 17:00:53 2008 +1100
@@ -17,6 +17,11 @@ install:
install:
@$(MAKE) -C src install prefix=$(prefix) includedir=$(includedir) libdir=$(libdir)
+check:
+ @$(MAKE) -C harness check
+
+partcheck: all
+ @$(MAKE) -C harness partcheck
clean:
@$(MAKE) -C src clean
diff -r 3a023dc4e63a harness/Makefile
--- a/harness/Makefile Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/Makefile Tue Jan 08 17:00:53 2008 +1100
@@ -1,37 +1,56 @@
# foo.
TEST_SRCS:=$(shell find cases/ -name \*.t | sort -n -t/ -k2)
-PROGS:=$(patsubst %.t,%.p,$(TEST_SRCS))
+EXTRAPROGS:=cases/8.p cases/10.p
+PARTPROGS:=$(filter-out $(EXTRAPROGS), $(patsubst %.t,%.p,$(TEST_SRCS)))
+PROGS:=$(PARTPROGS) $(EXTRAPROGS)
HARNESS_SRCS:=main.c
# io_queue.c
-CFLAGS=-Wall -Werror -g -O -laio
+CFLAGS=-Wall -Werror -I../src -g -O
#-lpthread -lrt
all: $(PROGS)
$(PROGS): %.p: %.t $(HARNESS_SRCS)
- $(CC) $(CFLAGS) -DTEST_NAME=\"$<\" -o $@ main.c
+ $(CC) $(CFLAGS) -DTEST_NAME=\"$<\" -o $@ main.c ../src/libaio.a
clean:
rm -f $(PROGS) *.o runtests.out rofile wofile rwfile
.PHONY:
-testdir/rofile: .PHONY
+testdir/rofile: testdir .PHONY
rm -f $@
echo "test" >$@
chmod 400 $@
-testdir/wofile: .PHONY
+testdir/wofile: testdir .PHONY
rm -f $@
echo "test" >$@
chmod 200 $@
-testdir/rwfile: .PHONY
+testdir/rwfile: testdir .PHONY
rm -f $@
echo "test" >$@
chmod 600 $@
-check: $(PROGS) testdir/rofile testdir/rwfile testdir/wofile
- ./runtests.sh $(PROGS)
+testdir testdir.enospc testdir.ext2:
+ mkdir $@
+root: .PHONY
+ @if [ `id -u` -ne 0 ]; then echo Need root for check, try partcheck >&2; exit 1; fi
+
+partcheck: $(PARTPROGS) testdir/rofile testdir/rwfile testdir/wofile
+ ./runtests.sh $(PARTPROGS)
+
+ext2.img:
+ dd if=/dev/zero bs=1M count=10 of=$@
+ mke2fs -F -b 4096 $@
+
+extracheck: $(EXTRAPROGS) root testdir.ext2 testdir.enospc ext2.img
+ mount -o loop -t ext2 ext2-enospc.img testdir.enospc
+ ./runtests.sh cases/10.p; ret=$$?; umount testdir.enospc; exit $$ret
+ mount -o loop -t ext2 ext2.img testdir.ext2
+ ./runtests.sh cases/8.p; ret=$$?; umount testdir.ext2; exit $$ret
+
+check: partcheck extracheck
diff -r 3a023dc4e63a harness/cases/12.t
--- a/harness/cases/12.t Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/12.t Tue Jan 08 17:00:53 2008 +1100
@@ -20,11 +20,15 @@ int test_main(void)
{
int res, status;
pid_t pid;
+ sigset_t set;
if (attempt_io_submit(io_ctx, 0, NULL, 0))
return 1;
- sigblock(sigmask(SIGCHLD) | siggetmask());
+ sigemptyset(&set);
+ sigaddset(&set, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+
fflush(NULL);
pid = fork(); assert(pid != -1);
diff -r 3a023dc4e63a harness/cases/14.t
--- a/harness/cases/14.t Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/14.t Tue Jan 08 17:00:53 2008 +1100
@@ -61,11 +61,14 @@ int test_main(void)
{
int res, status;
pid_t pid;
+ sigset_t set;
if (attempt_io_submit(io_ctx, 0, NULL, 0))
return 1;
- sigblock(sigmask(SIGCHLD) | siggetmask());
+ sigemptyset(&set);
+ sigaddset(&set, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &set, NULL);
fflush(NULL);
pid = fork(); assert(pid != -1);
diff -r 3a023dc4e63a harness/cases/15.t
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/harness/cases/15.t Tue Jan 08 17:00:53 2008 +1100
@@ -0,0 +1,94 @@
+/* 15.t
+- pwritev and preadv tests.
+*/
+#include "aio_setup.h"
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <errno.h>
+
+int test_main(void)
+{
+#define SIZE 512
+#define NUM_IOV 10
+ char buf[SIZE*NUM_IOV];
+ struct iovec iov[NUM_IOV];
+ int rwfd;
+ int status = 0, res, i;
+
+ rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1);
+ res = ftruncate(rwfd, sizeof(buf)); assert(res == 0);
+
+ for (i = 0; i < NUM_IOV; i++) {
+ iov[i].iov_base = buf + i*SIZE;
+ iov[i].iov_len = SIZE;
+ memset(iov[i].iov_base, i, SIZE);
+ }
+ status |= attempt_rw(rwfd, iov, NUM_IOV, 0, WRITEV, SIZE*NUM_IOV);
+ res = pread(rwfd, buf, sizeof(buf), 0); assert(res == sizeof(buf));
+ for (i = 0; i < NUM_IOV; i++) {
+ unsigned int j;
+ for (j = 0; j < SIZE; j++) {
+ if (buf[i*SIZE + j] != i) {
+ printf("Unexpected value after writev at %i\n",
+ i*SIZE + j);
+ status |= 1;
+ break;
+ }
+ }
+ }
+ if (!status)
+ printf("Checking memory: [Success]\n");
+
+ memset(buf, 0, sizeof(buf));
+ status |= attempt_rw(rwfd, iov, NUM_IOV, 0, READV, SIZE*NUM_IOV);
+ for (i = 0; i < NUM_IOV; i++) {
+ unsigned int j;
+ for (j = 0; j < SIZE; j++) {
+ if (buf[i*SIZE + j] != i) {
+ printf("Unexpected value after readv at %i\n",
+ i*SIZE + j);
+ status |= 1;
+ break;
+ }
+ }
+ }
+
+ /* Check that offset works. */
+ status |= attempt_rw(rwfd, iov+1, NUM_IOV-1, SIZE, WRITEV,
+ SIZE*(NUM_IOV-1));
+ memset(buf, 0, sizeof(buf));
+ res = pread(rwfd, buf, sizeof(buf), 0); assert(res == sizeof(buf));
+ for (i = 1; i < NUM_IOV; i++) {
+ unsigned int j;
+ for (j = 0; j < SIZE; j++) {
+ if (buf[i*SIZE + j] != i) {
+ printf("Unexpected value after offset writev at %i\n",
+ i*SIZE + j);
+ status |= 1;
+ break;
+ }
+ }
+ }
+ if (!status)
+ printf("Checking memory: [Success]\n");
+
+ memset(buf, 0, sizeof(buf));
+ status |= attempt_rw(rwfd, iov+1, NUM_IOV-1, SIZE, READV,
+ SIZE*(NUM_IOV-1));
+ for (i = 1; i < NUM_IOV; i++) {
+ unsigned int j;
+ for (j = 0; j < SIZE; j++) {
+ if (buf[i*SIZE + j] != i) {
+ printf("Unexpected value after offset readv at %i\n",
+ i*SIZE + j);
+ status |= 1;
+ break;
+ }
+ }
+ }
+ if (!status)
+ printf("Checking memory: [Success]\n");
+
+ return status;
+}
+
diff -r 3a023dc4e63a harness/cases/16.t
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/harness/cases/16.t Tue Jan 08 17:00:53 2008 +1100
@@ -0,0 +1,94 @@
+/* 16.t
+- eventfd tests.
+*/
+#include <stdint.h>
+#include <err.h>
+#include <sys/syscall.h> /* For SYS_xxx definitions */
+
+#ifndef SYS_eventfd
+#if defined(__i386__)
+#define SYS_eventfd 323
+#elif defined(__x86_64__)
+#define SYS_eventfd 284
+#elif defined(__ia64__)
+#define SYS_eventfd 1309
+#elif defined(__PPC__)
+#define SYS_eventfd 307
+#elif defined(__s390__)
+#define SYS_eventfd 318
+#elif defined(__alpha__)
+#define SYS_eventfd 478
+#else
+#error define SYS_eventfd for your arch!
+#endif
+#endif
+
+int test_main(void)
+{
+ /* 10 MB takes long enough that we would fail if eventfd
+ * returned immediately. */
+#define SIZE 10000000
+ char *buf;
+ struct io_event io_event;
+ struct iocb iocb;
+ struct iocb *iocbs[] = { &iocb };
+ int rwfd, efd;
+ int res;
+ io_context_t io_ctx;
+ uint64_t event;
+ struct timespec notime = { .tv_sec = 0, .tv_nsec = 0 };
+
+ buf = malloc(SIZE); assert(buf);
+ efd = syscall(SYS_eventfd, 0);
+ if (efd < 0) {
+ if (errno == ENOSYS) {
+ printf("No eventfd support. [SKIPPING]\n");
+ exit(0);
+ }
+ err(1, "Failed to get eventfd");
+ }
+
+ rwfd = open("testdir/rwfile", O_RDWR); assert(rwfd != -1);
+ res = ftruncate(rwfd, 0); assert(res == 0);
+ memset(buf, 0x42, SIZE);
+
+ /* Write test. */
+ res = io_queue_init(1024, &io_ctx); assert(res == 0);
+ io_prep_pwrite(&iocb, rwfd, buf, SIZE, 0);
+ io_set_eventfd(&iocb, efd);
+ res = io_submit(io_ctx, 1, iocbs); assert(res == 1);
+
+ alarm(30);
+ res = read(efd, &event, sizeof(event)); assert(res == sizeof(event));
+ assert(event == 1);
+
+ /* This should now be ready. */
+ res = io_getevents(io_ctx, 0, 1, &io_event, ¬ime);
+ if (res != 1)
+ err(1, "io_getevents did not return 1 event after eventfd");
+ assert(io_event.res == SIZE);
+ printf("eventfd write test [SUCCESS]\n");
+
+ /* Read test. */
+ memset(buf, 0, SIZE);
+ io_prep_pread(&iocb, rwfd, buf, SIZE, 0);
+ io_set_eventfd(&iocb, efd);
+ res = io_submit(io_ctx, 1, iocbs); assert(res == 1);
+
+ alarm(30);
+ res = read(efd, &event, sizeof(event)); assert(res == sizeof(event));
+ assert(event == 1);
+
+ /* This should now be ready. */
+ res = io_getevents(io_ctx, 0, 1, &io_event, ¬ime);
+ if (res != 1)
+ err(1, "io_getevents did not return 1 event after eventfd");
+ assert(io_event.res == SIZE);
+
+ for (res = 0; res < SIZE; res++)
+ assert(buf[res] == 0x42);
+ printf("eventfd read test [SUCCESS]\n");
+
+ return 0;
+}
+
diff -r 3a023dc4e63a harness/cases/5.t
--- a/harness/cases/5.t Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/5.t Tue Jan 08 17:00:53 2008 +1100
@@ -3,6 +3,7 @@
*/
#include "aio_setup.h"
#include <sys/mman.h>
+#include <errno.h>
int test_main(void)
{
@@ -40,7 +41,13 @@ int test_main(void)
assert(buf != (char *)-1);
status |= attempt_rw(rwfd, buf, SIZE, 0, READ, SIZE);
- status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, -EFAULT);
+
+ /* Whether PROT_WRITE is readable is arch-dependent. So compare
+ * against read result. */
+ res = read(rwfd, buf, SIZE);
+ if (res < 0)
+ res = -errno;
+ status |= attempt_rw(rwfd, buf, SIZE, 0, WRITE, res);
return status;
}
diff -r 3a023dc4e63a harness/cases/7.t
--- a/harness/cases/7.t Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/7.t Tue Jan 08 17:00:53 2008 +1100
@@ -9,12 +9,15 @@
*/
#include <sys/resource.h>
+#include <signal.h>
void SET_RLIMIT(long long limit)
{
struct rlimit rlim;
int res;
+ /* Seems that we do send SIGXFSZ, but hard to fix... */
+ signal(SIGXFSZ, SIG_IGN);
rlim.rlim_cur = limit; assert(rlim.rlim_cur == limit);
rlim.rlim_max = limit; assert(rlim.rlim_max == limit);
diff -r 3a023dc4e63a harness/cases/8.t
--- a/harness/cases/8.t Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/8.t Tue Jan 08 17:00:53 2008 +1100
@@ -2,44 +2,23 @@
- Ditto for the above three tests at the offset maximum (largest
possible ext2/3 file size.) (8.t)
*/
-#include <sys/vfs.h>
-
-#define EXT2_OLD_SUPER_MAGIC 0xEF51
-#define EXT2_SUPER_MAGIC 0xEF53
+#include <sys/types.h>
+#include <unistd.h>
long long get_fs_limit(int fd)
{
- struct statfs s;
- int res;
- long long lim = 0;
+ long long min = 0, max = 9223372036854775807LL;
+ char c = 0;
- res = fstatfs(fd, &s); assert(res == 0);
-
- switch(s.f_type) {
- case EXT2_OLD_SUPER_MAGIC:
- case EXT2_SUPER_MAGIC:
-#if 0
- {
- long long tmp;
- tmp = s.f_bsize / 4;
- /* 12 direct + indirect block + dind + tind */
- lim = 12 + tmp + tmp * tmp + tmp * tmp * tmp;
- lim *= s.f_bsize;
- printf("limit(%ld) = %Ld\n", (long)s.f_bsize, lim);
+ while (max - min > 1) {
+ if (pwrite64(fd, &c, 1, (min + max) / 2) == -1)
+ max = (min + max) / 2;
+ else {
+ ftruncate(fd, 0);
+ min = (min + max) / 2;
+ }
}
-#endif
- switch(s.f_bsize) {
- case 4096: lim = 2199023251456; break;
- default:
- printf("unknown ext2 blocksize %ld\n", (long)s.f_bsize);
- exit(3);
- }
- break;
- default:
- printf("unknown filesystem 0x%08lx\n", (long)s.f_type);
- exit(3);
- }
- return lim;
+ return max;
}
#define SET_RLIMIT(x) do ; while (0)
diff -r 3a023dc4e63a harness/cases/aio_setup.h
--- a/harness/cases/aio_setup.h Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/aio_setup.h Tue Jan 08 17:00:53 2008 +1100
@@ -1,3 +1,4 @@ io_context_t io_ctx;
+#include <time.h>
io_context_t io_ctx;
#define BAD_CTX ((io_context_t)-1)
@@ -58,6 +59,9 @@ int sync_submit(struct iocb *iocb)
#define WRITE 'w'
#define READ_SILENT 'R'
#define WRITE_SILENT 'W'
+#define READV '<'
+#define WRITEV '>'
+
int attempt_rw(int fd, void *buf, int count, long long pos, int rw, int expect)
{
struct iocb iocb;
@@ -74,6 +78,12 @@ int attempt_rw(int fd, void *buf, int co
silent = 1;
case WRITE:
io_prep_pwrite(&iocb, fd, buf, count, pos);
+ break;
+ case WRITEV:
+ io_prep_pwritev(&iocb, fd, buf, count, pos);
+ break;
+ case READV:
+ io_prep_preadv(&iocb, fd, buf, count, pos);
break;
}
diff -r 3a023dc4e63a harness/cases/common-7-8.h
--- a/harness/cases/common-7-8.h Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/common-7-8.h Tue Jan 08 17:00:53 2008 +1100
@@ -2,6 +2,7 @@
*/
#include "aio_setup.h"
+#include <errno.h>
#include <unistd.h>
#define SIZE 512
@@ -13,7 +14,7 @@ int test_main(void)
int status = 0, res;
long long limit;
- rwfd = open(FILENAME, O_RDWR); assert(rwfd != -1);
+ rwfd = open(FILENAME, O_RDWR|O_CREAT, 0600); assert(rwfd != -1);
res = ftruncate(rwfd, 0); assert(res == 0);
buf = malloc(SIZE); assert(buf != NULL);
memset(buf, 0, SIZE);
diff -r 3a023dc4e63a harness/main.c
--- a/harness/main.c Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/main.c Tue Jan 08 17:00:53 2008 +1100
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
#include <stdio.h>
#include <errno.h>
#include <assert.h>
diff -r 3a023dc4e63a harness/runtests.sh
--- a/harness/runtests.sh Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/runtests.sh Tue Jan 08 17:00:53 2008 +1100
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
passes=0
fails=0
diff -r 3a023dc4e63a src/libaio.h
--- a/src/libaio.h Tue Jan 08 14:38:53 2008 +1100
+++ b/src/libaio.h Tue Jan 08 17:00:53 2008 +1100
@@ -33,7 +33,6 @@ struct timespec;
struct timespec;
struct sockaddr;
struct iovec;
-struct iocb;
typedef struct io_context *io_context_t;
@@ -44,8 +43,10 @@ typedef enum io_iocb_cmd {
IO_CMD_FSYNC = 2,
IO_CMD_FDSYNC = 3,
- IO_CMD_POLL = 5,
+ IO_CMD_POLL = 5, /* Never implemented in mainline, see io_prep_poll */
IO_CMD_NOOP = 6,
+ IO_CMD_PREADV = 7,
+ IO_CMD_PWRITEV = 8,
} io_iocb_cmd_t;
#if defined(__i386__) /* little endian, 32 bits */
@@ -89,7 +90,9 @@ struct io_iocb_common {
PADDEDptr(void *buf, __pad1);
PADDEDul(nbytes, __pad2);
long long offset;
- long long __pad3, __pad4;
+ long long __pad3;
+ unsigned flags;
+ unsigned resfd;
}; /* result code is the amount read or -'ve errno */
struct io_iocb_vector {
@@ -169,20 +172,44 @@ static inline void io_prep_pwrite(struct
iocb->u.c.offset = offset;
}
-static inline void io_prep_poll(struct iocb *iocb, int fd, int events)
+static inline void io_prep_preadv(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset)
{
memset(iocb, 0, sizeof(*iocb));
iocb->aio_fildes = fd;
- iocb->aio_lio_opcode = IO_CMD_POLL;
+ iocb->aio_lio_opcode = IO_CMD_PREADV;
iocb->aio_reqprio = 0;
- iocb->u.poll.events = events;
+ iocb->u.c.buf = (void *)iov;
+ iocb->u.c.nbytes = iovcnt;
+ iocb->u.c.offset = offset;
+}
+
+static inline void io_prep_pwritev(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_PWRITEV;
+ iocb->aio_reqprio = 0;
+ iocb->u.c.buf = (void *)iov;
+ iocb->u.c.nbytes = iovcnt;
+ iocb->u.c.offset = offset;
+}
+
+/* Jeff Moyer says this was implemented in Red Hat AS2.1 and RHEL3.
+ * AFAICT, it was never in mainline, and should not be used. --RR */
+static inline void io_prep_poll(struct iocb *iocb, int fd, int events)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_POLL;
+ iocb->aio_reqprio = 0;
+ iocb->u.poll.events = events;
}
static inline int io_poll(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd, int events)
{
- io_prep_poll(iocb, fd, events);
- io_set_callback(iocb, cb);
- return io_submit(ctx, 1, &iocb);
+ io_prep_poll(iocb, fd, events);
+ io_set_callback(iocb, cb);
+ return io_submit(ctx, 1, &iocb);
}
static inline void io_prep_fsync(struct iocb *iocb, int fd)
@@ -215,6 +242,12 @@ static inline int io_fdsync(io_context_t
return io_submit(ctx, 1, &iocb);
}
+static inline void io_set_eventfd(struct iocb *iocb, int eventfd)
+{
+ iocb->u.c.flags |= (1 << 0) /* IOCB_FLAG_RESFD */;
+ iocb->u.c.resfd = eventfd;
+}
+
#ifdef __cplusplus
}
#endif
More information about the Ubuntu-devel-discuss
mailing list