[PATCH] Update to libaio?

Rusty Russell rusty at rustcorp.com.au
Tue Jan 8 06:02:14 UTC 2008


I recently tried to use libaio (0.3.106), and discovered it didn't have
eventfd support.  Or preadv/pwritev support.  And the testsuite didn't
compile.  Or work.

Anyway, it's shipped by the distros, so I figure it's worth patching.
I'm cc'ing Ben in the hope he's still maintaining it.  If not I'll
find a home somewhere for it.

(Thanks for Jeff's feedback on the first version of this patch).

Cheers,
Rusty.

diff -r 3a023dc4e63a ChangeLog
--- a/ChangeLog	Tue Jan 08 14:38:53 2008 +1100
+++ b/ChangeLog	Tue Jan 08 17:00:53 2008 +1100
@@ -1,3 +1,14 @@ 0.4.0
+rusty-v1
+	- Make tests compile again on modern systems (warnings + -Werror)
+	- Add 'make partcheck' and don't require manual setup for testing.
+	- Change test harness to compile against this dir, not global install
+	- Fix 5.t for archs where PROT_WRITE mappings are readable.
+	- Allow sending of SIGXFSZ on aio over limits
+	- Explicitly specify bash for runtests.sh
+	- Put deprecating comments on never-merged io_prep_poll
+	- Add io_prep_preadv and io_prep_pwritev
+	- Add eventfd support (io_set_eventfd).
+
 0.4.0
 	- remove libredhat-kernel
 	- add rough outline for man pages
diff -r 3a023dc4e63a Makefile
--- a/Makefile	Tue Jan 08 14:38:53 2008 +1100
+++ b/Makefile	Tue Jan 08 17:00:53 2008 +1100
@@ -17,6 +17,11 @@ install:
 install:
 	@$(MAKE) -C src install prefix=$(prefix) includedir=$(includedir) libdir=$(libdir)
 
+check:
+	@$(MAKE) -C harness check
+
+partcheck: all
+	@$(MAKE) -C harness partcheck
 
 clean:
 	@$(MAKE) -C src clean
diff -r 3a023dc4e63a harness/Makefile
--- a/harness/Makefile	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/Makefile	Tue Jan 08 17:00:53 2008 +1100
@@ -1,37 +1,56 @@
 # foo.
 TEST_SRCS:=$(shell find cases/ -name \*.t | sort -n -t/ -k2)
-PROGS:=$(patsubst %.t,%.p,$(TEST_SRCS))
+EXTRAPROGS:=cases/8.p cases/10.p
+PARTPROGS:=$(filter-out $(EXTRAPROGS), $(patsubst %.t,%.p,$(TEST_SRCS)))
+PROGS:=$(PARTPROGS) $(EXTRAPROGS)
 HARNESS_SRCS:=main.c
 # io_queue.c
 
-CFLAGS=-Wall -Werror -g -O -laio
+CFLAGS=-Wall -Werror -I../src -g -O
 #-lpthread -lrt
 
 all: $(PROGS)
 
 $(PROGS): %.p: %.t $(HARNESS_SRCS)
-	$(CC) $(CFLAGS) -DTEST_NAME=\"$<\" -o $@ main.c
+	$(CC) $(CFLAGS) -DTEST_NAME=\"$<\" -o $@ main.c ../src/libaio.a
 
 clean:
 	rm -f $(PROGS) *.o runtests.out rofile wofile rwfile
 
 .PHONY:
 
-testdir/rofile: .PHONY
+testdir/rofile: testdir .PHONY
 	rm -f $@
 	echo "test" >$@
 	chmod 400 $@
 
-testdir/wofile: .PHONY
+testdir/wofile: testdir .PHONY
 	rm -f $@
 	echo "test" >$@
 	chmod 200 $@
 
-testdir/rwfile: .PHONY
+testdir/rwfile: testdir .PHONY
 	rm -f $@
 	echo "test" >$@
 	chmod 600 $@
 
-check: $(PROGS) testdir/rofile testdir/rwfile testdir/wofile
-	./runtests.sh $(PROGS)
+testdir testdir.enospc testdir.ext2:
+	mkdir $@
 
+root: .PHONY
+	@if [ `id -u` -ne 0 ]; then echo Need root for check, try partcheck >&2; exit 1; fi
+
+partcheck: $(PARTPROGS) testdir/rofile testdir/rwfile testdir/wofile
+	./runtests.sh $(PARTPROGS)
+
+ext2.img:
+	dd if=/dev/zero bs=1M count=10 of=$@
+	mke2fs -F -b 4096 $@
+
+extracheck: $(EXTRAPROGS) root testdir.ext2 testdir.enospc ext2.img
+	mount -o loop -t ext2 ext2-enospc.img testdir.enospc
+	./runtests.sh cases/10.p; ret=$$?; umount testdir.enospc; exit $$ret
+	mount -o loop -t ext2 ext2.img testdir.ext2
+	./runtests.sh cases/8.p; ret=$$?; umount testdir.ext2; exit $$ret
+
+check: partcheck extracheck 
diff -r 3a023dc4e63a harness/cases/12.t
--- a/harness/cases/12.t	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/12.t	Tue Jan 08 17:00:53 2008 +1100
@@ -20,11 +20,15 @@ int test_main(void)
 {
 	int res, status;
 	pid_t pid;
+	sigset_t set;
 
 	if (attempt_io_submit(io_ctx, 0, NULL, 0))
 		return 1;
 
-	sigblock(sigmask(SIGCHLD) | siggetmask());
+	sigemptyset(&set);
+	sigaddset(&set, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &set, NULL);
+
 	fflush(NULL);
 	pid = fork();				assert(pid != -1);
 
diff -r 3a023dc4e63a harness/cases/14.t
--- a/harness/cases/14.t	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/14.t	Tue Jan 08 17:00:53 2008 +1100
@@ -61,11 +61,14 @@ int test_main(void)
 {
 	int res, status;
 	pid_t pid;
+	sigset_t set;
 
 	if (attempt_io_submit(io_ctx, 0, NULL, 0))
 		return 1;
 
-	sigblock(sigmask(SIGCHLD) | siggetmask());
+	sigemptyset(&set);
+	sigaddset(&set, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &set, NULL);
 	fflush(NULL);
 	pid = fork();				assert(pid != -1);
 
diff -r 3a023dc4e63a harness/cases/15.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/harness/cases/15.t	Tue Jan 08 17:00:53 2008 +1100
@@ -0,0 +1,94 @@
+/* 15.t
+- pwritev and preadv tests.
+*/
+#include "aio_setup.h"
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <errno.h>
+
+int test_main(void)
+{
+#define SIZE	512
+#define NUM_IOV	10
+	char buf[SIZE*NUM_IOV];
+	struct iovec iov[NUM_IOV];
+	int rwfd;
+	int	status = 0, res, i;
+
+	rwfd = open("testdir/rwfile", O_RDWR);		assert(rwfd != -1);
+	res = ftruncate(rwfd, sizeof(buf));		assert(res == 0);
+
+	for (i = 0; i < NUM_IOV; i++) {
+		iov[i].iov_base = buf + i*SIZE;
+		iov[i].iov_len = SIZE;
+		memset(iov[i].iov_base, i, SIZE);
+	}
+	status |= attempt_rw(rwfd, iov, NUM_IOV,  0, WRITEV, SIZE*NUM_IOV);
+	res = pread(rwfd, buf, sizeof(buf), 0);	assert(res == sizeof(buf));
+	for (i = 0; i < NUM_IOV; i++) {
+		unsigned int j;
+		for (j = 0; j < SIZE; j++) {
+			if (buf[i*SIZE + j] != i) {
+				printf("Unexpected value after writev at %i\n",
+				       i*SIZE + j);
+				status |= 1;
+				break;
+			}
+		}
+	}
+	if (!status)
+		printf("Checking memory: [Success]\n");
+
+	memset(buf, 0, sizeof(buf));
+	status |= attempt_rw(rwfd, iov, NUM_IOV,  0,  READV, SIZE*NUM_IOV);
+	for (i = 0; i < NUM_IOV; i++) {
+		unsigned int j;
+		for (j = 0; j < SIZE; j++) {
+			if (buf[i*SIZE + j] != i) {
+				printf("Unexpected value after readv at %i\n",
+				       i*SIZE + j);
+				status |= 1;
+				break;
+			}
+		}
+	}
+
+	/* Check that offset works. */
+	status |= attempt_rw(rwfd, iov+1, NUM_IOV-1,  SIZE, WRITEV,
+			     SIZE*(NUM_IOV-1));
+	memset(buf, 0, sizeof(buf));
+	res = pread(rwfd, buf, sizeof(buf), 0);	assert(res == sizeof(buf));
+	for (i = 1; i < NUM_IOV; i++) {
+		unsigned int j;
+		for (j = 0; j < SIZE; j++) {
+			if (buf[i*SIZE + j] != i) {
+				printf("Unexpected value after offset writev at %i\n",
+				       i*SIZE + j);
+				status |= 1;
+				break;
+			}
+		}
+	}
+	if (!status)
+		printf("Checking memory: [Success]\n");
+
+	memset(buf, 0, sizeof(buf));
+	status |= attempt_rw(rwfd, iov+1, NUM_IOV-1,  SIZE, READV,
+			     SIZE*(NUM_IOV-1));
+	for (i = 1; i < NUM_IOV; i++) {
+		unsigned int j;
+		for (j = 0; j < SIZE; j++) {
+			if (buf[i*SIZE + j] != i) {
+				printf("Unexpected value after offset readv at %i\n",
+				       i*SIZE + j);
+				status |= 1;
+				break;
+			}
+		}
+	}
+	if (!status)
+		printf("Checking memory: [Success]\n");
+
+	return status;
+}
+
diff -r 3a023dc4e63a harness/cases/16.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/harness/cases/16.t	Tue Jan 08 17:00:53 2008 +1100
@@ -0,0 +1,94 @@
+/* 16.t
+- eventfd tests.
+*/
+#include <stdint.h>
+#include <err.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+
+#ifndef SYS_eventfd
+#if defined(__i386__)
+#define SYS_eventfd 323
+#elif defined(__x86_64__)
+#define SYS_eventfd 284
+#elif defined(__ia64__)
+#define SYS_eventfd 1309
+#elif defined(__PPC__)
+#define SYS_eventfd 307
+#elif defined(__s390__)
+#define SYS_eventfd 318
+#elif defined(__alpha__)
+#define SYS_eventfd 478
+#else
+#error define SYS_eventfd for your arch!
+#endif
+#endif
+
+int test_main(void)
+{
+	/* 10 MB takes long enough that we would fail if eventfd
+	 * returned immediately. */
+#define SIZE	10000000
+	char *buf;
+	struct io_event io_event;
+	struct iocb iocb;
+	struct iocb *iocbs[] = { &iocb };
+	int rwfd, efd;
+	int res;
+	io_context_t	io_ctx;
+	uint64_t event;
+	struct timespec	notime = { .tv_sec = 0, .tv_nsec = 0 };
+
+	buf = malloc(SIZE);				assert(buf);
+	efd = syscall(SYS_eventfd, 0);
+	if (efd < 0) {
+		if (errno == ENOSYS) {
+			printf("No eventfd support.  [SKIPPING]\n");
+			exit(0);
+		}
+		err(1, "Failed to get eventfd");
+	}
+
+	rwfd = open("testdir/rwfile", O_RDWR);		assert(rwfd != -1);
+	res = ftruncate(rwfd, 0);			assert(res == 0);
+	memset(buf, 0x42, SIZE);
+
+	/* Write test. */
+	res = io_queue_init(1024, &io_ctx);		assert(res == 0);
+	io_prep_pwrite(&iocb, rwfd, buf, SIZE, 0);
+	io_set_eventfd(&iocb, efd);
+	res = io_submit(io_ctx, 1, iocbs);		assert(res == 1);
+
+	alarm(30);
+	res = read(efd, &event, sizeof(event));		assert(res == sizeof(event));
+	assert(event == 1);
+
+	/* This should now be ready. */
+	res = io_getevents(io_ctx, 0, 1, &io_event, &notime);
+	if (res != 1)
+		err(1, "io_getevents did not return 1 event after eventfd");
+	assert(io_event.res == SIZE);
+	printf("eventfd write test [SUCCESS]\n");
+
+	/* Read test. */
+	memset(buf, 0, SIZE);
+	io_prep_pread(&iocb, rwfd, buf, SIZE, 0);
+	io_set_eventfd(&iocb, efd);
+	res = io_submit(io_ctx, 1, iocbs);		assert(res == 1);
+
+	alarm(30);
+	res = read(efd, &event, sizeof(event));		assert(res == sizeof(event));
+	assert(event == 1);
+
+	/* This should now be ready. */
+	res = io_getevents(io_ctx, 0, 1, &io_event, &notime);
+	if (res != 1)
+		err(1, "io_getevents did not return 1 event after eventfd");
+	assert(io_event.res == SIZE);
+
+	for (res = 0; res < SIZE; res++)
+		assert(buf[res] == 0x42);
+	printf("eventfd read test  [SUCCESS]\n");
+
+	return 0;
+}
+
diff -r 3a023dc4e63a harness/cases/5.t
--- a/harness/cases/5.t	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/5.t	Tue Jan 08 17:00:53 2008 +1100
@@ -3,6 +3,7 @@
 */
 #include "aio_setup.h"
 #include <sys/mman.h>
+#include <errno.h>
 
 int test_main(void)
 {
@@ -40,7 +41,13 @@ int test_main(void)
 	assert(buf != (char *)-1);
 
 	status |= attempt_rw(rwfd, buf, SIZE,  0,  READ, SIZE);
-	status |= attempt_rw(rwfd, buf, SIZE,  0, WRITE, -EFAULT);
+
+	/* Whether PROT_WRITE is readable is arch-dependent.  So compare
+	 * against read result. */
+	res = read(rwfd, buf, SIZE);
+	if (res < 0)
+		res = -errno;
+	status |= attempt_rw(rwfd, buf, SIZE,  0, WRITE, res);
 
 	return status;
 }
diff -r 3a023dc4e63a harness/cases/7.t
--- a/harness/cases/7.t	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/7.t	Tue Jan 08 17:00:53 2008 +1100
@@ -9,12 +9,15 @@
 */
 
 #include <sys/resource.h>
+#include <signal.h>
 
 void SET_RLIMIT(long long limit)
 {
 	struct rlimit rlim;
 	int res;
 
+	/* Seems that we do send SIGXFSZ, but hard to fix... */
+	signal(SIGXFSZ, SIG_IGN);
 	rlim.rlim_cur = limit;			assert(rlim.rlim_cur == limit);
 	rlim.rlim_max = limit;			assert(rlim.rlim_max == limit);
 
diff -r 3a023dc4e63a harness/cases/8.t
--- a/harness/cases/8.t	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/8.t	Tue Jan 08 17:00:53 2008 +1100
@@ -2,44 +2,23 @@
 - Ditto for the above three tests at the offset maximum (largest
   possible ext2/3 file size.) (8.t)
  */
-#include <sys/vfs.h>
-
-#define EXT2_OLD_SUPER_MAGIC	0xEF51
-#define EXT2_SUPER_MAGIC	0xEF53
+#include <sys/types.h>
+#include <unistd.h>
 
 long long get_fs_limit(int fd)
 {
-	struct statfs s;
-	int res;
-	long long lim = 0;
+	long long min = 0, max = 9223372036854775807LL;
+	char c = 0;
 
-	res = fstatfs(fd, &s);		assert(res == 0);
-
-	switch(s.f_type) {
-	case EXT2_OLD_SUPER_MAGIC:
-	case EXT2_SUPER_MAGIC:
-#if 0
-	{
-		long long tmp;
-		tmp = s.f_bsize / 4;
-		/* 12 direct + indirect block + dind + tind */
-		lim = 12 + tmp + tmp * tmp + tmp * tmp * tmp;
-		lim *= s.f_bsize;
-		printf("limit(%ld) = %Ld\n", (long)s.f_bsize, lim);
+	while (max - min > 1) {
+		if (pwrite64(fd, &c, 1, (min + max) / 2) == -1)
+			max = (min + max) / 2;
+		else {
+			ftruncate(fd, 0);
+			min = (min + max) / 2;
+		}
 	}
-#endif
-		switch(s.f_bsize) {
-		case 4096: lim = 2199023251456; break;
-		default:
-			printf("unknown ext2 blocksize %ld\n", (long)s.f_bsize);
-			exit(3);
-		}
-		break;
-	default:
-		printf("unknown filesystem 0x%08lx\n", (long)s.f_type);
-		exit(3);
-	}
-	return lim;
+	return max;
 }
 
 #define SET_RLIMIT(x)	do ; while (0)
diff -r 3a023dc4e63a harness/cases/aio_setup.h
--- a/harness/cases/aio_setup.h	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/aio_setup.h	Tue Jan 08 17:00:53 2008 +1100
@@ -1,3 +1,4 @@ io_context_t	io_ctx;
+#include <time.h>
 io_context_t	io_ctx;
 #define BAD_CTX	((io_context_t)-1)
 
@@ -58,6 +59,9 @@ int sync_submit(struct iocb *iocb)
 #define WRITE		'w'
 #define READ_SILENT	'R'
 #define WRITE_SILENT	'W'
+#define READV		'<'
+#define WRITEV		'>'
+
 int attempt_rw(int fd, void *buf, int count, long long pos, int rw, int expect)
 {
 	struct iocb iocb;
@@ -74,6 +78,12 @@ int attempt_rw(int fd, void *buf, int co
 		silent = 1;
 	case WRITE:
 		io_prep_pwrite(&iocb, fd, buf, count, pos);
+		break;
+	case WRITEV:
+		io_prep_pwritev(&iocb, fd, buf, count, pos);
+		break;
+	case READV:
+		io_prep_preadv(&iocb, fd, buf, count, pos);
 		break;
 	}
 
diff -r 3a023dc4e63a harness/cases/common-7-8.h
--- a/harness/cases/common-7-8.h	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/cases/common-7-8.h	Tue Jan 08 17:00:53 2008 +1100
@@ -2,6 +2,7 @@
 */
 #include "aio_setup.h"
 
+#include <errno.h>
 #include <unistd.h>
 
 #define SIZE	512
@@ -13,7 +14,7 @@ int test_main(void)
 	int status = 0, res;
 	long long limit;
 
-	rwfd = open(FILENAME, O_RDWR);		assert(rwfd != -1);
+	rwfd = open(FILENAME, O_RDWR|O_CREAT, 0600);	assert(rwfd != -1);
 	res = ftruncate(rwfd, 0);			assert(res == 0);
 	buf = malloc(SIZE);				assert(buf != NULL);
 	memset(buf, 0, SIZE);
diff -r 3a023dc4e63a harness/main.c
--- a/harness/main.c	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/main.c	Tue Jan 08 17:00:53 2008 +1100
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <errno.h>
 #include <assert.h>
diff -r 3a023dc4e63a harness/runtests.sh
--- a/harness/runtests.sh	Tue Jan 08 14:38:53 2008 +1100
+++ b/harness/runtests.sh	Tue Jan 08 17:00:53 2008 +1100
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 
 passes=0
 fails=0
diff -r 3a023dc4e63a src/libaio.h
--- a/src/libaio.h	Tue Jan 08 14:38:53 2008 +1100
+++ b/src/libaio.h	Tue Jan 08 17:00:53 2008 +1100
@@ -33,7 +33,6 @@ struct timespec;
 struct timespec;
 struct sockaddr;
 struct iovec;
-struct iocb;
 
 typedef struct io_context *io_context_t;
 
@@ -44,8 +43,10 @@ typedef enum io_iocb_cmd {
 	IO_CMD_FSYNC = 2,
 	IO_CMD_FDSYNC = 3,
 
-	IO_CMD_POLL = 5,
+	IO_CMD_POLL = 5, /* Never implemented in mainline, see io_prep_poll */
 	IO_CMD_NOOP = 6,
+	IO_CMD_PREADV = 7,
+	IO_CMD_PWRITEV = 8,
 } io_iocb_cmd_t;
 
 #if defined(__i386__) /* little endian, 32 bits */
@@ -89,7 +90,9 @@ struct io_iocb_common {
 	PADDEDptr(void	*buf, __pad1);
 	PADDEDul(nbytes, __pad2);
 	long long	offset;
-	long long	__pad3, __pad4;
+	long long	__pad3;
+	unsigned	flags;
+	unsigned	resfd;
 };	/* result code is the amount read or -'ve errno */
 
 struct io_iocb_vector {
@@ -169,20 +172,44 @@ static inline void io_prep_pwrite(struct
 	iocb->u.c.offset = offset;
 }
 
-static inline void io_prep_poll(struct iocb *iocb, int fd, int events)
+static inline void io_prep_preadv(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset)
 {
 	memset(iocb, 0, sizeof(*iocb));
 	iocb->aio_fildes = fd;
-	iocb->aio_lio_opcode = IO_CMD_POLL;
+	iocb->aio_lio_opcode = IO_CMD_PREADV;
 	iocb->aio_reqprio = 0;
-	iocb->u.poll.events = events;
+	iocb->u.c.buf = (void *)iov;
+	iocb->u.c.nbytes = iovcnt;
+	iocb->u.c.offset = offset;
+}
+
+static inline void io_prep_pwritev(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset)
+{
+	memset(iocb, 0, sizeof(*iocb));
+	iocb->aio_fildes = fd;
+	iocb->aio_lio_opcode = IO_CMD_PWRITEV;
+	iocb->aio_reqprio = 0;
+	iocb->u.c.buf = (void *)iov;
+	iocb->u.c.nbytes = iovcnt;
+	iocb->u.c.offset = offset;
+}
+
+/* Jeff Moyer says this was implemented in Red Hat AS2.1 and RHEL3.
+ * AFAICT, it was never in mainline, and should not be used. --RR */
+static inline void io_prep_poll(struct iocb *iocb, int fd, int events)
+{
+        memset(iocb, 0, sizeof(*iocb));
+        iocb->aio_fildes = fd;
+        iocb->aio_lio_opcode = IO_CMD_POLL;
+        iocb->aio_reqprio = 0;
+        iocb->u.poll.events = events;
 }
 
 static inline int io_poll(io_context_t ctx, struct iocb *iocb, io_callback_t cb, int fd, int events)
 {
-	io_prep_poll(iocb, fd, events);
-	io_set_callback(iocb, cb);
-	return io_submit(ctx, 1, &iocb);
+        io_prep_poll(iocb, fd, events);
+        io_set_callback(iocb, cb);
+        return io_submit(ctx, 1, &iocb);
 }
 
 static inline void io_prep_fsync(struct iocb *iocb, int fd)
@@ -215,6 +242,12 @@ static inline int io_fdsync(io_context_t
 	return io_submit(ctx, 1, &iocb);
 }
 
+static inline void io_set_eventfd(struct iocb *iocb, int eventfd)
+{
+	iocb->u.c.flags |= (1 << 0) /* IOCB_FLAG_RESFD */;
+	iocb->u.c.resfd = eventfd;
+}
+
 #ifdef __cplusplus
 }
 #endif




More information about the Ubuntu-devel-discuss mailing list