/********************************************************************//** Reads or writes data. This operation is asynchronous (aio). @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ UNIV_INTERN ulint fil_io( /*===*/ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, ORed to OS_FILE_LOG, if a log i/o and ORed to OS_AIO_SIMULATED_WAKE_LATER if simulated aio and we want to post a batch of i/os; NOTE that a simulated batch may introduce hidden chances of deadlocks, because i/os are not actually handled until all have been posted: use with great caution! */ ibool sync, /*!< in: TRUE if synchronous aio is desired */ ulint space_id, /*!< in: space id */ ulint zip_size, /*!< in: compressed page size in bytes; 0 for uncompressed pages */ ulint block_offset, /*!< in: offset in number of blocks */ ulint byte_offset, /*!< in: remainder of offset in bytes; in aio this must be divisible by the OS block size */ ulint len, /*!< in: how many bytes to read or write; this must not cross a file boundary; in aio this must be a block size multiple */ void* buf, /*!< in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ void* message) /*!< in: message for aio handler if non-sync aio used, else ignored */ { ulint mode; fil_space_t* space; fil_node_t* node; ulint offset_high; ulint offset_low; ibool ret; ulint is_log; ulint wake_later; is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; wake_later = type & OS_AIO_SIMULATED_WAKE_LATER; type = type & ~OS_AIO_SIMULATED_WAKE_LATER; ut_ad(byte_offset < UNIV_PAGE_SIZE); ut_ad(!zip_size || !byte_offset); ut_ad(ut_is_2pow(zip_size)); ut_ad(buf); ut_ad(len > 0); #if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE # error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE" #endif ut_ad(fil_validate_skip()); #ifndef UNIV_HOTBACKUP # ifndef UNIV_LOG_DEBUG /* ibuf bitmap pages must be read in the sync aio mode: */ ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE) || !ibuf_bitmap_page(zip_size, block_offset) || sync || is_log); # endif /* UNIV_LOG_DEBUG */ if (sync) { mode = OS_AIO_SYNC; } else if (is_log) { mode = OS_AIO_LOG; } else if (type == OS_FILE_READ && !recv_no_ibuf_operations && ibuf_page(space_id, zip_size, block_offset, NULL)) { mode = OS_AIO_IBUF; } else { mode = OS_AIO_NORMAL; } #else /* !UNIV_HOTBACKUP */ ut_a(sync); mode = OS_AIO_SYNC; #endif /* !UNIV_HOTBACKUP */ if (type == OS_FILE_READ) { srv_data_read+= len; } else if (type == OS_FILE_WRITE) { srv_data_written+= len; } /* Reserve the fil_system mutex and make sure that we can open at least one file while holding it, if the file is not already open */ fil_mutex_enter_and_prepare_for_io(space_id); space = fil_space_get_by_id(space_id); /* If we are deleting a tablespace we don't allow any read operations on that. However, we do allow write operations. */ if (!space || (type == OS_FILE_READ && space->stop_new_ops)) { mutex_exit(&fil_system->mutex); ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error: trying to do i/o" " to a tablespace which does not exist.\n" "InnoDB: i/o type %lu, space id %lu," " page no. %lu, i/o length %lu bytes\n", (ulong) type, (ulong) space_id, (ulong) block_offset, (ulong) len); return(DB_TABLESPACE_DELETED); } ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE)); node = UT_LIST_GET_FIRST(space->chain); for (;;) { if (UNIV_UNLIKELY(node == NULL)) { fil_report_invalid_page_access( block_offset, space_id, space->name, byte_offset, len, type); ut_error; } if (space->id != 0 && node->size == 0) { /* We do not know the size of a single-table tablespace before we open the file */ break; } if (node->size > block_offset) { /* Found! */ break; } else { block_offset -= node->size; node = UT_LIST_GET_NEXT(chain, node); } } /* Open file if closed */ fil_node_prepare_for_io(node, fil_system, space); /* Check that at least the start offset is within the bounds of a single-table tablespace */ if (UNIV_UNLIKELY(node->size <= block_offset) && space->id != 0 && space->purpose == FIL_TABLESPACE) { fil_report_invalid_page_access( block_offset, space_id, space->name, byte_offset, len, type); ut_error; } /* Now we have made the changes in the data structures of fil_system */ mutex_exit(&fil_system->mutex); /* Calculate the low 32 bits and the high 32 bits of the file offset */ if (!zip_size) { offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT)); offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL) + byte_offset; ut_a(node->size - block_offset >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE)); } else { ulint zip_size_shift; switch (zip_size) { case 1024: zip_size_shift = 10; break; case 2048: zip_size_shift = 11; break; case 4096: zip_size_shift = 12; break; case 8192: zip_size_shift = 13; break; case 16384: zip_size_shift = 14; break; default: ut_error; } offset_high = block_offset >> (32 - zip_size_shift); offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL) + byte_offset; ut_a(node->size - block_offset >= (len + (zip_size - 1)) / zip_size); } /* Do aio */ ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0); ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0); #ifdef UNIV_HOTBACKUP /* In ibbackup do normal i/o, not aio */ if (type == OS_FILE_READ) { ret = os_file_read(node->handle, buf, offset_low, offset_high,len); //详见 } else { ret = os_file_write(node->name, node->handle, buf, offset_low, offset_high, len); } #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, offset_low, offset_high, len, node, message); #endif ut_a(ret); if (mode == OS_AIO_SYNC) { /* The i/o operation is already completed when we return from os_aio: */ mutex_enter(&fil_system->mutex); fil_node_complete_io(node, fil_system, type); mutex_exit(&fil_system->mutex); ut_ad(fil_validate_skip()); } return(DB_SUCCESS); }