函数fil_io

 

/********************************************************************//**
Reads or writes data. This operation is asynchronous (aio).
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
UNIV_INTERN
ulint
fil_io(
/*===*/
    ulint    type,        /*!< in: OS_FILE_READ or OS_FILE_WRITE,
                ORed to OS_FILE_LOG, if a log i/o
                and ORed to OS_AIO_SIMULATED_WAKE_LATER
                if simulated aio and we want to post a
                batch of i/os; NOTE that a simulated batch
                may introduce hidden chances of deadlocks,
                because i/os are not actually handled until
                all have been posted: use with great
                caution! */
    ibool    sync,        /*!< in: TRUE if synchronous aio is desired */
    ulint    space_id,    /*!< in: space id */
    ulint    zip_size,    /*!< in: compressed page size in bytes;
                0 for uncompressed pages */
    ulint    block_offset,    /*!< in: offset in number of blocks */
    ulint    byte_offset,    /*!< in: remainder of offset in bytes; in
                aio this must be divisible by the OS block
                size */
    ulint    len,        /*!< in: how many bytes to read or write; this
                must not cross a file boundary; in aio this
                must be a block size multiple */
    void*    buf,        /*!< in/out: buffer where to store read data
                or from where to write; in aio this must be
                appropriately aligned */
    void*    message)    /*!< in: message for aio handler if non-sync
                aio used, else ignored */
{
    ulint        mode;
    fil_space_t*    space;
    fil_node_t*    node;
    ulint        offset_high;
    ulint        offset_low;
    ibool        ret;
    ulint        is_log;
    ulint        wake_later;

    is_log = type & OS_FILE_LOG;
    type = type & ~OS_FILE_LOG;

    wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
    type = type & ~OS_AIO_SIMULATED_WAKE_LATER;

    ut_ad(byte_offset < UNIV_PAGE_SIZE);
    ut_ad(!zip_size || !byte_offset);
    ut_ad(ut_is_2pow(zip_size));
    ut_ad(buf);
    ut_ad(len > 0);
#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
#endif
    ut_ad(fil_validate_skip());
#ifndef UNIV_HOTBACKUP
# ifndef UNIV_LOG_DEBUG
    /* ibuf bitmap pages must be read in the sync aio mode: */
    ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
          || !ibuf_bitmap_page(zip_size, block_offset)
          || sync || is_log);
# endif /* UNIV_LOG_DEBUG */
    if (sync) {
        mode = OS_AIO_SYNC;
    } else if (is_log) {
        mode = OS_AIO_LOG;
    } else if (type == OS_FILE_READ
           && !recv_no_ibuf_operations
           && ibuf_page(space_id, zip_size, block_offset, NULL)) {
        mode = OS_AIO_IBUF;
    } else {
        mode = OS_AIO_NORMAL;
    }
#else /* !UNIV_HOTBACKUP */
    ut_a(sync);
    mode = OS_AIO_SYNC;
#endif /* !UNIV_HOTBACKUP */

    if (type == OS_FILE_READ) {
        srv_data_read+= len;
    } else if (type == OS_FILE_WRITE) {
        srv_data_written+= len;
    }

    /* Reserve the fil_system mutex and make sure that we can open at
    least one file while holding it, if the file is not already open */

    fil_mutex_enter_and_prepare_for_io(space_id);

    space = fil_space_get_by_id(space_id);

    /* If we are deleting a tablespace we don't allow any read
    operations on that. However, we do allow write operations. */
    if (!space || (type == OS_FILE_READ && space->stop_new_ops)) {
        mutex_exit(&fil_system->mutex);

        ut_print_timestamp(stderr);
        fprintf(stderr,
            "  InnoDB: Error: trying to do i/o"
            " to a tablespace which does not exist.\n"
            "InnoDB: i/o type %lu, space id %lu,"
            " page no. %lu, i/o length %lu bytes\n",
            (ulong) type, (ulong) space_id, (ulong) block_offset,
            (ulong) len);

        return(DB_TABLESPACE_DELETED);
    }

    ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));

    node = UT_LIST_GET_FIRST(space->chain);

    for (;;) {
        if (UNIV_UNLIKELY(node == NULL)) {
            fil_report_invalid_page_access(
                block_offset, space_id, space->name,
                byte_offset, len, type);

            ut_error;
        }

        if (space->id != 0 && node->size == 0) {
            /* We do not know the size of a single-table tablespace
            before we open the file */

            break;
        }

        if (node->size > block_offset) {
            /* Found! */
            break;
        } else {
            block_offset -= node->size;
            node = UT_LIST_GET_NEXT(chain, node);
        }
    }

    /* Open file if closed */
    fil_node_prepare_for_io(node, fil_system, space);

    /* Check that at least the start offset is within the bounds of a
    single-table tablespace */
    if (UNIV_UNLIKELY(node->size <= block_offset)
        && space->id != 0 && space->purpose == FIL_TABLESPACE) {

        fil_report_invalid_page_access(
            block_offset, space_id, space->name, byte_offset,
            len, type);

        ut_error;
    }

    /* Now we have made the changes in the data structures of fil_system */
    mutex_exit(&fil_system->mutex);

    /* Calculate the low 32 bits and the high 32 bits of the file offset */

    if (!zip_size) {
        offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
        offset_low  = ((block_offset << UNIV_PAGE_SIZE_SHIFT)
                   & 0xFFFFFFFFUL) + byte_offset;

        ut_a(node->size - block_offset
             >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
             / UNIV_PAGE_SIZE));
    } else {
        ulint    zip_size_shift;
        switch (zip_size) {
        case 1024: zip_size_shift = 10; break;
        case 2048: zip_size_shift = 11; break;
        case 4096: zip_size_shift = 12; break;
        case 8192: zip_size_shift = 13; break;
        case 16384: zip_size_shift = 14; break;
        default: ut_error;
        }
        offset_high = block_offset >> (32 - zip_size_shift);
        offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL)
            + byte_offset;
        ut_a(node->size - block_offset
             >= (len + (zip_size - 1)) / zip_size);
    }

    /* Do aio */

    ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
    ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);

#ifdef UNIV_HOTBACKUP
    /* In ibbackup do normal i/o, not aio */
    if (type == OS_FILE_READ) {
        ret = os_file_read(node->handle, buf, offset_low, offset_high,len); //详见
    } else {
        ret = os_file_write(node->name, node->handle, buf,
                    offset_low, offset_high, len);
    }
#else
    /* Queue the aio request */
    ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
             offset_low, offset_high, len, node, message);
#endif
    ut_a(ret);

    if (mode == OS_AIO_SYNC) {
        /* The i/o operation is already completed when we return from
        os_aio: */

        mutex_enter(&fil_system->mutex);

        fil_node_complete_io(node, fil_system, type);

        mutex_exit(&fil_system->mutex);

        ut_ad(fil_validate_skip());
    }

    return(DB_SUCCESS);
}

 

你可能感兴趣的:(函数fil_io)