[Commits] Rev 3377: MDEV-77 - possible deadlock in XtraDB async io subsystem on Windows. in file:///H:/bzr/5.3/

Vladislav Vaintroub wlad at montyprogram.com
Sun Jan 8 22:14:10 EET 2012


At file:///H:/bzr/5.3/

------------------------------------------------------------
revno: 3377
revision-id: wlad at montyprogram.com-20120108201407-mtxueky7hicy89e1
parent: igor at askmonty.org-20120103040636-nc6o55vsxqadd1n0
committer: Vladislav Vaintroub <wlad at montyprogram.com>
branch nick: 5.3
timestamp: Sun 2012-01-08 21:14:07 +0100
message:
  MDEV-77 - possible deadlock in XtraDB async io subsystem on Windows.
  
  Split IO threads into ones that handle only read completion and ones that handle only write completion, as it was originally done, but got lost with "completion port" patch. The reason we need to have dedicated read and dedicated write threads is that read completion routine can block waiting for write io to complete, and in rare cases where all io threads are handling async reads, it can deadlock.
-------------- next part --------------
=== modified file 'storage/xtradb/os/os0file.c'
--- a/storage/xtradb/os/os0file.c	2011-12-11 09:34:44 +0000
+++ b/storage/xtradb/os/os0file.c	2012-01-08 20:14:07 +0000
@@ -249,6 +249,8 @@
 #ifdef _WIN32
 /** IO completion port used by background io threads */
 static HANDLE completion_port;
+/** IO completion port used by background io READ threads */
+static HANDLE read_completion_port;
 /** Thread local storage index for the per-thread event used for synchronous IO */
 static DWORD tls_sync_io = TLS_OUT_OF_INDEXES;
 #endif
@@ -3251,9 +3253,10 @@
 
 	os_last_printout = time(NULL);
 #ifdef _WIN32
-	ut_a(completion_port == 0);
+	ut_a(completion_port == 0 && read_completion_port == 0);
 	completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
-	ut_a(completion_port);
+	read_completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
+	ut_a(completion_port && read_completion_port);
 #endif
 }
 
@@ -3299,6 +3302,7 @@
 	if(completion_port)
 	{
 		PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
+		PostQueuedCompletionStatus(read_completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
 	}
 }
 #endif
@@ -3860,6 +3864,9 @@
 }
 
 #ifdef WIN_ASYNC_IO
+#define READ_SEGMENT(x) (x < srv_n_read_io_threads)
+#define WRITE_SEGMENT(x) !READ_SEGMENT(x)
+
 /**********************************************************************//**
 This function is only used in Windows asynchronous i/o.
 Waits for an aio operation to complete. This function is used to wait the
@@ -3898,18 +3905,45 @@
 	DWORD		len;
 	BOOL		retry		= FALSE;
 	ULONG_PTR key;
-
-	ret = GetQueuedCompletionStatus(completion_port, &len, &key, 
-		(OVERLAPPED **)&slot, INFINITE);
-
-	/* If shutdown key was received, repost the shutdown message and exit */
-	if (ret && (key == IOCP_SHUTDOWN_KEY)) {
-		PostQueuedCompletionStatus(completion_port, 0, key, NULL);
-		os_thread_exit(NULL);
-	}
-
-	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
-		os_thread_exit(NULL);
+	HANDLE port = READ_SEGMENT(segment)? read_completion_port : completion_port;
+
+	for(;;) {
+		ret = GetQueuedCompletionStatus(port, &len, &key, 
+			(OVERLAPPED **)&slot, INFINITE);
+
+		/* If shutdown key was received, repost the shutdown message and exit */
+		if (ret && (key == IOCP_SHUTDOWN_KEY)) {
+			PostQueuedCompletionStatus(port, 0, key, NULL);
+			os_thread_exit(NULL);
+		}
+
+		if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+			os_thread_exit(NULL);
+		}
+
+		if(WRITE_SEGMENT(segment)&& slot->type == OS_FILE_READ) {
+			/*
+			Redirect read completions  to the dedicated completion port 
+			and thread. We need to split read and write threads. If we do not
+			do that, and just allow all io threads process all IO, it is possible 
+			to get stuck in a deadlock in buffer pool code,
+
+			Currently, the problem is solved this way - "write io" threads  
+			always get all completion notifications, from both async reads and
+			writes. Write completion is handled in the same thread that gets it.
+			Read completion is forwarded via PostQueueCompletionStatus())
+			to the second completion port dedicated solely to reads. One of the
+			"read io" threads waiting on this port will finally handle the IO.
+
+			Forwarding IO completion this way costs a context switch , and this 
+			seems tolerable  since asynchronous reads are by far less frequent.
+			*/
+			ut_a(PostQueuedCompletionStatus(read_completion_port, len, key,
+				&slot->control));
+		}
+		else {
+			break;
+		}
 	}
 
 



More information about the commits mailing list