diff options
author | Robin H. Johnson <robbat2@gentoo.org> | 2009-07-06 01:19:45 -0700 |
---|---|---|
committer | Robin H. Johnson <robbat2@gentoo.org> | 2009-07-06 01:19:45 -0700 |
commit | 6824a6bd53ac965abb56d3880ed1fa722fd85c1e (patch) | |
tree | 79ed759295ce539214eb3283653952d431570888 /percona | |
parent | Need to use 5 digits of numbering now. LOTS of patches to track. (diff) | |
download | mysql-extras-6824a6bd53ac965abb56d3880ed1fa722fd85c1e.tar.gz mysql-extras-6824a6bd53ac965abb56d3880ed1fa722fd85c1e.tar.bz2 mysql-extras-6824a6bd53ac965abb56d3880ed1fa722fd85c1e.zip |
Start to keep percona patches in a subdir for ease of tracking.
Diffstat (limited to 'percona')
31 files changed, 32995 insertions, 0 deletions
diff --git a/percona/5.0.75-b12/innodb_check_fragmentation.patch b/percona/5.0.75-b12/innodb_check_fragmentation.patch new file mode 100644 index 0000000..4b16731 --- /dev/null +++ b/percona/5.0.75-b12/innodb_check_fragmentation.patch @@ -0,0 +1,275 @@ +diff -r 936d427a9a15 innobase/btr/btr0cur.c +--- a/innobase/btr/btr0cur.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0cur.c Mon Dec 22 00:33:11 2008 -0800 +@@ -516,6 +516,14 @@ + == index->table->comp); + } + ++ if (level == 0) { ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); ++ } ++ + break; + } + +@@ -663,6 +671,12 @@ + btr_cur_add_path_info(cursor, height, + root_height); + } ++ ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); + + break; + } +diff -r 936d427a9a15 innobase/btr/btr0pcur.c +--- a/innobase/btr/btr0pcur.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0pcur.c Mon Dec 22 00:33:11 2008 -0800 +@@ -381,6 +381,7 @@ + last record of the current page */ + mtr_t* mtr) /* in: mtr */ + { ++ ulint page_no; + ulint next_page_no; + ulint space; + page_t* page; +@@ -393,11 +394,22 @@ + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; + + page = btr_pcur_get_page(cursor); ++ page_no = buf_frame_get_page_no(page); + + next_page_no = btr_page_get_next(page, mtr); + space = buf_frame_get_space_id(page); + + ut_ad(next_page_no != FIL_NULL); ++ ++ if (next_page_no - page_no == 1) { ++ innobase_mysql_thd_increment_innodb_scan_cont(1); ++ } else { ++ innobase_mysql_thd_increment_innodb_scan_jump(1); ++ } ++ innobase_mysql_thd_increment_innodb_scan_data( ++ page_get_data_size(page)); ++ innobase_mysql_thd_increment_innodb_scan_garbage( ++ page_header_get_field(page, PAGE_GARBAGE)); + + next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); +@@ -427,6 +439,7 @@ + record of the current page */ + mtr_t* mtr) /* in: mtr */ + { ++ ulint page_no; + ulint prev_page_no; + ulint space; + page_t* page; +@@ -462,9 +475,20 @@ + btr_pcur_restore_position(latch_mode2, cursor, mtr); + + page = btr_pcur_get_page(cursor); ++ page_no = buf_frame_get_page_no(page); + + prev_page_no = btr_page_get_prev(page, mtr); + space = buf_frame_get_space_id(page); ++ ++ if (page_no - prev_page_no == 1) { ++ innobase_mysql_thd_increment_innodb_scan_cont(1); ++ } else { ++ innobase_mysql_thd_increment_innodb_scan_jump(1); ++ } ++ innobase_mysql_thd_increment_innodb_scan_data( ++ page_get_data_size(page)); ++ innobase_mysql_thd_increment_innodb_scan_garbage( ++ page_header_get_field(page, PAGE_GARBAGE)); + + if (btr_pcur_is_before_first_on_page(cursor, mtr) + && (prev_page_no != FIL_NULL)) { +diff -r 936d427a9a15 innobase/btr/btr0sea.c +--- a/innobase/btr/btr0sea.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0sea.c Mon Dec 22 00:33:11 2008 -0800 +@@ -861,6 +861,12 @@ + + buf_pool->n_page_gets++; + ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); ++ + return(TRUE); + + /*-------------------------------------------*/ +diff -r 936d427a9a15 innobase/include/btr0cur.h +--- a/innobase/include/btr0cur.h Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/include/btr0cur.h Mon Dec 22 00:33:11 2008 -0800 +@@ -697,6 +697,17 @@ + extern ulint btr_cur_n_non_sea_old; + extern ulint btr_cur_n_sea_old; + ++/*--------------------------------------*/ ++/* prototypes for new functions added to ha_innodb.cc */ ++void innobase_mysql_thd_init_innodb_scan_cont(); ++void innobase_mysql_thd_increment_innodb_scan_cont(ulong length); ++void innobase_mysql_thd_init_innodb_scan_jump(); ++void innobase_mysql_thd_increment_innodb_scan_jump(ulong length); ++void innobase_mysql_thd_init_innodb_scan_data(); ++void innobase_mysql_thd_increment_innodb_scan_data(ulong length); ++void innobase_mysql_thd_init_innodb_scan_garbage(); ++void innobase_mysql_thd_increment_innodb_scan_garbage(ulong length); ++ + #ifndef UNIV_NONINL + #include "btr0cur.ic" + #endif +diff -r 936d427a9a15 patch_info/innodb_check_fragmentation.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_check_fragmentation.info Mon Dec 22 00:33:11 2008 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_check_fragmentation.patch ++Name=Session status to check fragmentation of the last InnoDB scan ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment=The names are Innodb_scan_* +diff -r 936d427a9a15 sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Dec 22 00:33:11 2008 -0800 +@@ -760,6 +760,102 @@ + } + + /************************************************************************* ++Initializes Innodb_scan_blocks_contiguous. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_cont() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_cont = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_blocks_contiguous. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_cont(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_cont+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_blocks_jumpy. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_jump() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_jump = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_blocks_jumpy. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_jump(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_jump+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_data_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_data() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_data = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_data_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_data(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_data+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_garbages_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_garbage() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_garbage = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_garbages_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_garbage(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_garbage+= length; ++ } ++} ++ ++/************************************************************************* + Gets the InnoDB transaction handle for a MySQL handler object, creates + an InnoDB transaction struct if the corresponding MySQL thread struct still + lacks one. */ +diff -r 936d427a9a15 sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:33:11 2008 -0800 +@@ -6673,6 +6673,10 @@ + {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, + #ifdef HAVE_INNOBASE_DB + {"Innodb_", (char*) &innodb_status_variables, SHOW_VARS}, ++ {"Innodb_scan_pages_contiguous",(char*) offsetof(STATUS_VAR, innodb_scan_cont), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_pages_jumpy", (char*) offsetof(STATUS_VAR, innodb_scan_jump), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_data_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_data), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_garbages_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_garbage), SHOW_LONGLONG_STATUS}, + #endif /*HAVE_INNOBASE_DB*/ + {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG}, + {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG}, +diff -r 936d427a9a15 sql/sql_class.h +--- a/sql/sql_class.h Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/sql_class.h Mon Dec 22 00:33:11 2008 -0800 +@@ -729,6 +729,10 @@ + sense to add to the /global/ status variable counter. + */ + double last_query_cost; ++ ulonglong innodb_scan_cont; ++ ulonglong innodb_scan_jump; ++ ulonglong innodb_scan_data; ++ ulonglong innodb_scan_garbage; + } STATUS_VAR; + + /* diff --git a/percona/5.0.75-b12/innodb_fsync_source.patch b/percona/5.0.75-b12/innodb_fsync_source.patch new file mode 100644 index 0000000..637a7d6 --- /dev/null +++ b/percona/5.0.75-b12/innodb_fsync_source.patch @@ -0,0 +1,594 @@ +diff -r 61031ebb48ce innobase/buf/buf0flu.c +--- a/innobase/buf/buf0flu.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/buf/buf0flu.c Mon Nov 03 05:07:56 2008 -0800 +@@ -341,7 +341,7 @@ + + /* Now flush the doublewrite buffer data to disk */ + +- fil_flush(TRX_SYS_SPACE); ++ fil_flush(TRX_SYS_SPACE, FLUSH_FROM_DIRTY_BUFFER); + + /* We know that the writes have been flushed to disk now + and in recovery we will find them in the doublewrite buffer +@@ -381,7 +381,7 @@ + + /* Now we flush the data to disk (for example, with fsync) */ + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_DIRTY_BUFFER); + + /* We can now reuse the doublewrite memory buffer: */ + +@@ -501,7 +501,8 @@ + } + #else + /* Force the log to the disk before writing the modified block */ +- log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_DIRTY_BUFFER); + #endif + buf_flush_init_for_writing(block->frame, block->newest_modification, + block->space, block->offset); +diff -r 61031ebb48ce innobase/fil/fil0fil.c +--- a/innobase/fil/fil0fil.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/fil/fil0fil.c Mon Nov 03 05:07:56 2008 -0800 +@@ -245,6 +245,7 @@ + request */ + UT_LIST_BASE_NODE_T(fil_space_t) space_list; + /* list of all file spaces */ ++ ulint flush_types[FLUSH_FROM_NUMBER];/* calls to fil_flush by caller */ + }; + + /* The tablespace memory cache. This variable is NULL before the module is +@@ -849,7 +850,7 @@ + /* Flush tablespaces so that we can close modified files in the LRU + list */ + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + count++; + +@@ -1309,7 +1310,10 @@ + + UT_LIST_INIT(system->unflushed_spaces); + UT_LIST_INIT(system->space_list); +- ++ { ++ int x; ++ for (x = 0; x < FLUSH_FROM_NUMBER; ++x) system->flush_types[x] = 0; ++ } + return(system); + } + +@@ -1437,6 +1441,23 @@ + } + + mutex_exit(&(system->mutex)); ++} ++ ++/******************************************************************** ++Prints internal counters */ ++ ++void ++fil_print(FILE *file) ++{ ++ fprintf(file, ++ "fsync callers: %lu buffer pool, %lu other, %lu checkpoint, " ++ "%lu log aio, %lu log sync, %lu archive\n", ++ fil_system->flush_types[FLUSH_FROM_DIRTY_BUFFER], ++ fil_system->flush_types[FLUSH_FROM_OTHER], ++ fil_system->flush_types[FLUSH_FROM_CHECKPOINT], ++ fil_system->flush_types[FLUSH_FROM_LOG_IO_COMPLETE], ++ fil_system->flush_types[FLUSH_FROM_LOG_WRITE_UP_TO], ++ fil_system->flush_types[FLUSH_FROM_ARCHIVE]); + } + + /******************************************************************** +@@ -2256,7 +2277,7 @@ + + os_thread_sleep(20000); + +- fil_flush(id); ++ fil_flush(id, FLUSH_FROM_OTHER); + + goto retry; + +@@ -3574,7 +3595,7 @@ + size_after_extend, *actual_size); */ + mutex_exit(&(system->mutex)); + +- fil_flush(space_id); ++ fil_flush(space_id, FLUSH_FROM_OTHER); + + return(success); + } +@@ -4166,8 +4187,9 @@ + void + fil_flush( + /*======*/ +- ulint space_id) /* in: file space id (this can be a group of ++ ulint space_id, /* in: file space id (this can be a group of + log files or a tablespace of the database) */ ++ flush_from_type flush_type)/* in: identifies the caller */ + { + fil_system_t* system = fil_system; + fil_space_t* space; +@@ -4176,7 +4198,7 @@ + ib_longlong old_mod_counter; + + mutex_enter(&(system->mutex)); +- ++ system->flush_types[flush_type]++; + HASH_SEARCH(hash, system->spaces, space_id, space, + space->id == space_id); + if (!space || space->is_being_deleted) { +@@ -4281,7 +4303,8 @@ + void + fil_flush_file_spaces( + /*==================*/ +- ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */ ++ ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */ ++ flush_from_type flush_type)/* in: identifies the caller */ + { + fil_system_t* system = fil_system; + fil_space_t* space; +@@ -4322,7 +4345,7 @@ + a non-existing space id. */ + for (i = 0; i < n_space_ids; i++) { + +- fil_flush(space_ids[i]); ++ fil_flush(space_ids[i], flush_type); + } + + mem_free(space_ids); +diff -r 61031ebb48ce innobase/include/fil0fil.h +--- a/innobase/include/fil0fil.h Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/include/fil0fil.h Mon Nov 03 05:07:56 2008 -0800 +@@ -197,6 +197,13 @@ + fil_init( + /*=====*/ + ulint max_n_open); /* in: max number of open files */ ++/******************************************************************** ++ * Prints internal counters. */ ++ ++void ++fil_print( ++ /*=====*/ ++ FILE* file); /* in: output stream */ + /*********************************************************************** + Opens all log files and system tablespace data files. They stay open until the + database server shutdown. This should be called at a server startup after the +@@ -621,14 +628,26 @@ + ulint segment); /* in: the number of the segment in the aio + array to wait for */ + /************************************************************************** ++Identifies the caller of fil_flush. */ ++typedef enum { ++ FLUSH_FROM_DIRTY_BUFFER, ++ FLUSH_FROM_OTHER, ++ FLUSH_FROM_CHECKPOINT, ++ FLUSH_FROM_LOG_IO_COMPLETE, ++ FLUSH_FROM_LOG_WRITE_UP_TO, ++ FLUSH_FROM_ARCHIVE, ++ FLUSH_FROM_NUMBER ++} flush_from_type; ++/************************************************************************** + Flushes to disk possible writes cached by the OS. If the space does not exist + or is being dropped, does not do anything. */ + + void + fil_flush( + /*======*/ +- ulint space_id); /* in: file space id (this can be a group of ++ ulint space_id, /* in: file space id (this can be a group of + log files or a tablespace of the database) */ ++ flush_from_type flush_type);/* in: identifies the caller */ + /************************************************************************** + Flushes to disk writes in file spaces of the given type possibly cached by + the OS. */ +@@ -636,7 +655,8 @@ + void + fil_flush_file_spaces( + /*==================*/ +- ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */ ++ ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */ ++ flush_from_type flush_type);/* in: identifies the caller */ + /********************************************************************** + Checks the consistency of the tablespace cache. */ + +diff -r 61031ebb48ce innobase/include/log0log.h +--- a/innobase/include/log0log.h Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/include/log0log.h Mon Nov 03 05:07:56 2008 -0800 +@@ -146,6 +146,22 @@ + log_io_complete( + /*============*/ + log_group_t* group); /* in: log group */ ++ ++/********************************************************** ++Describes the caller of log_write_up_to. */ ++ ++typedef enum { ++ LOG_WRITE_FROM_DIRTY_BUFFER, ++ LOG_WRITE_FROM_BACKGROUND_SYNC, ++ LOG_WRITE_FROM_BACKGROUND_ASYNC, ++ LOG_WRITE_FROM_INTERNAL, ++ LOG_WRITE_FROM_CHECKPOINT_SYNC, ++ LOG_WRITE_FROM_CHECKPOINT_ASYNC, ++ LOG_WRITE_FROM_LOG_ARCHIVE, ++ LOG_WRITE_FROM_COMMIT_SYNC, ++ LOG_WRITE_FROM_COMMIT_ASYNC, ++ LOG_WRITE_FROM_NUMBER ++} log_sync_type; + /********************************************************** + This function is called, e.g., when a transaction wants to commit. It checks + that the log has been written to the log file up to the last log entry written +@@ -159,14 +175,21 @@ + be written, ut_dulint_max if not specified */ + ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ +- ibool flush_to_disk); +- /* in: TRUE if we want the written log also to be +- flushed to disk */ ++ ibool flush_to_disk, ++ /* in: TRUE if we want the written log also to be flushed to disk */ ++ log_sync_type caller);/* in: identifies the caller */ + /******************************************************************** + Does a syncronous flush of the log buffer to disk. */ + + void + log_buffer_flush_to_disk(void); ++/*==========================*/ ++/******************************************************************** ++Flushes the log buffer. Forces it to disk depending on the value of ++the configuration parameter innodb_flush_log_at_trx_commit. */ ++ ++void ++log_buffer_flush_maybe_sync(void); + /*==========================*/ + /******************************************************************** + Advances the smallest lsn for which there are unflushed dirty blocks in the +@@ -744,6 +767,12 @@ + AND flushed to disk */ + ulint n_pending_writes;/* number of currently pending flushes + or writes */ ++ ulint log_sync_callers[LOG_WRITE_FROM_NUMBER]; ++ /* counts calls to log_write_up_to */ ++ ulint log_sync_syncers[LOG_WRITE_FROM_NUMBER]; ++ /* counts calls to log_write_up_to when log file is sync'd */ ++ ulint n_syncs; /* number of fsyncs done for log file */ ++ ulint n_checkpoints; /* number of calls to log_checkpoint */ + /* NOTE on the 'flush' in names of the fields below: starting from + 4.0.14, we separate the write of the log file and the actual fsync() + or other method to flush it to disk. The names below shhould really +diff -r 61031ebb48ce innobase/log/log0log.c +--- a/innobase/log/log0log.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/log/log0log.c Mon Nov 03 05:07:56 2008 -0800 +@@ -782,6 +782,15 @@ + log_sys->written_to_all_lsn = log_sys->lsn; + + log_sys->n_pending_writes = 0; ++ { ++ int x; ++ for (x = 0; x < LOG_WRITE_FROM_NUMBER; ++x) { ++ log_sys->log_sync_callers[x] = 0; ++ log_sys->log_sync_syncers[x] = 0; ++ } ++ } ++ log_sys->n_syncs = 0; ++ log_sys->n_checkpoints = 0; + + log_sys->no_flush_event = os_event_create(NULL); + +@@ -1066,7 +1075,7 @@ + if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE); + } + + #ifdef UNIV_DEBUG +@@ -1088,7 +1097,7 @@ + && srv_unix_file_flush_method != SRV_UNIX_NOSYNC + && srv_flush_log_at_trx_commit != 2) { + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE); + } + + mutex_enter(&(log_sys->mutex)); +@@ -1303,9 +1312,10 @@ + be written, ut_dulint_max if not specified */ + ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ +- ibool flush_to_disk) ++ ibool flush_to_disk, + /* in: TRUE if we want the written log also to be + flushed to disk */ ++ log_sync_type caller) /* in: identifies caller */ + { + log_group_t* group; + ulint start_offset; +@@ -1315,6 +1325,7 @@ + ulint loop_count; + ulint unlock; + ++ log_sys->log_sync_callers[caller]++; + if (recv_no_ibuf_operations) { + /* Recovery is running and no operations on the log files are + allowed yet (the variable name .._no_ibuf_.. is misleading) */ +@@ -1465,13 +1476,17 @@ + so we have also flushed to disk what we have written */ + + log_sys->flushed_to_disk_lsn = log_sys->write_lsn; ++ log_sys->n_syncs++; ++ log_sys->log_sync_syncers[caller]++; + + } else if (flush_to_disk) { + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_WRITE_UP_TO); + log_sys->flushed_to_disk_lsn = log_sys->write_lsn; ++ log_sys->n_syncs++; ++ log_sys->log_sync_syncers[caller]++; + } + + mutex_enter(&(log_sys->mutex)); +@@ -1520,7 +1535,8 @@ + + mutex_exit(&(log_sys->mutex)); + +- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_BACKGROUND_SYNC); + } + + /******************************************************************** +@@ -1551,7 +1567,7 @@ + mutex_exit(&(log->mutex)); + + if (do_flush) { +- log_write_up_to(lsn, LOG_NO_WAIT, FALSE); ++ log_write_up_to(lsn, LOG_NO_WAIT, FALSE, LOG_WRITE_FROM_INTERNAL); + } + } + +@@ -1921,11 +1937,11 @@ + } + + if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_CHECKPOINT); + } + + mutex_enter(&(log_sys->mutex)); +- ++ log_sys->n_checkpoints++; + oldest_lsn = log_buf_pool_get_oldest_modification(); + + mutex_exit(&(log_sys->mutex)); +@@ -1938,7 +1954,8 @@ + write-ahead-logging algorithm ensures that the log has been flushed + up to oldest_lsn. */ + +- log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_CHECKPOINT_SYNC); + + mutex_enter(&(log_sys->mutex)); + +@@ -2566,7 +2583,7 @@ + + mutex_exit(&(log_sys->mutex)); + +- fil_flush(group->archive_space_id); ++ fil_flush(group->archive_space_id, FLUSH_FROM_ARCHIVE); + + mutex_enter(&(log_sys->mutex)); + +@@ -2647,7 +2664,8 @@ + + mutex_exit(&(log_sys->mutex)); + +- log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_LOG_ARCHIVE); + + calc_new_limit = FALSE; + +@@ -3184,8 +3202,8 @@ + } + mutex_exit(&kernel_mutex); + +- fil_flush_file_spaces(FIL_TABLESPACE); +- fil_flush_file_spaces(FIL_LOG); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); ++ fil_flush_file_spaces(FIL_LOG, FLUSH_FROM_OTHER); + + /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer + pool: therefore it is essential that the buffer pool has been +@@ -3218,7 +3236,7 @@ + + fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + fil_close_all_files(); + +@@ -3331,15 +3349,45 @@ + time_elapsed = 0.001 + difftime(current_time, + log_sys->last_printout_time); + fprintf(file, +- "%lu pending log writes, %lu pending chkp writes\n" +- "%lu log i/o's done, %.2f log i/o's/second\n", +- (ulong) log_sys->n_pending_writes, +- (ulong) log_sys->n_pending_checkpoint_writes, +- (ulong) log_sys->n_log_ios, +- ((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed)); ++ "%lu pending log writes, %lu pending chkp writes\n" ++ "%lu log i/o's done, %.2f log i/o's/second, %lu syncs, %lu checkpoints\n", ++ (ulong) log_sys->n_pending_writes, ++ (ulong) log_sys->n_pending_checkpoint_writes, ++ (ulong) log_sys->n_log_ios, ++ (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed, ++ log_sys->n_syncs, ++ log_sys->n_checkpoints); + + log_sys->n_log_ios_old = log_sys->n_log_ios; + log_sys->last_printout_time = current_time; ++ ++ fprintf(file, ++ "log sync callers: %lu buffer pool, background %lu sync and %lu async, " ++ "%lu internal, checkpoint %lu sync and %lu async, %lu archive, " ++ "commit %lu sync and %lu async\n", ++ log_sys->log_sync_callers[LOG_WRITE_FROM_DIRTY_BUFFER], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_ASYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_INTERNAL], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_ASYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_LOG_ARCHIVE], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_ASYNC]); ++ ++ fprintf(file, ++ "log sync syncers: %lu buffer pool, background %lu sync and %lu async, " ++ "%lu internal, checkpoint %lu sync and %lu async, %lu archive, " ++ "commit %lu sync and %lu async\n", ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_DIRTY_BUFFER], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_ASYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_INTERNAL], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_ASYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_LOG_ARCHIVE], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_ASYNC]); + + mutex_exit(&(log_sys->mutex)); + } +diff -r 61031ebb48ce innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Nov 03 05:07:56 2008 -0800 +@@ -1638,6 +1638,12 @@ + (ulong)time_elapsed); + + fputs("----------\n" ++ "BACKGROUND THREAD\n" ++ "----------\n", file); ++ fil_print(file); ++ ++ ++ fputs("----------\n" + "SEMAPHORES\n" + "----------\n", file); + sync_print(file); +diff -r 61031ebb48ce innobase/trx/trx0sys.c +--- a/innobase/trx/trx0sys.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/trx/trx0sys.c Mon Nov 03 05:07:56 2008 -0800 +@@ -511,7 +511,7 @@ + page += UNIV_PAGE_SIZE; + } + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + leave_func: + ut_free(unaligned_read_buf); +diff -r 61031ebb48ce innobase/trx/trx0trx.c +--- a/innobase/trx/trx0trx.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/trx/trx0trx.c Mon Nov 03 05:07:56 2008 -0800 +@@ -916,19 +916,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, +- FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush + them to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +@@ -1659,18 +1661,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush them to + disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +@@ -1906,19 +1911,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, +- FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush + them to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +diff -r 61031ebb48ce patch_info/innodb_fsync_source.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_fsync_source.info Mon Nov 03 05:07:56 2008 -0800 +@@ -0,0 +1,9 @@ ++File=innodb_fsync_source.patch ++Name=Information of fsync callers in InnoDB ++Version=1.0 ++Author=Google ++License=GPL ++Comment= ++ChangeLog= ++2008-11-01 ++VT: Initial porting diff --git a/percona/5.0.75-b12/innodb_io_patches.patch b/percona/5.0.75-b12/innodb_io_patches.patch new file mode 100644 index 0000000..0b3ccef --- /dev/null +++ b/percona/5.0.75-b12/innodb_io_patches.patch @@ -0,0 +1,672 @@ +diff -ruN a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c +--- a/innobase/buf/buf0flu.c 2008-12-19 02:19:35.000000000 +0900 ++++ b/innobase/buf/buf0flu.c 2009-01-09 15:51:10.000000000 +0900 +@@ -898,10 +898,17 @@ + + old_page_count = page_count; + ++ if (srv_flush_neighbor_pages) { + /* Try to flush also all the neighbors */ + page_count += + buf_flush_try_neighbors(space, offset, + flush_type); ++ } else { ++ /* Try to flush the page only */ ++ page_count += ++ buf_flush_try_page(space, offset, ++ flush_type); ++ } + /* fprintf(stderr, + "Flush type %lu, page no %lu, neighb %lu\n", + flush_type, offset, +diff -ruN a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c +--- a/innobase/buf/buf0rea.c 2009-01-09 15:40:22.000000000 +0900 ++++ b/innobase/buf/buf0rea.c 2009-01-09 15:40:46.000000000 +0900 +@@ -189,6 +189,10 @@ + ulint err; + ulint i; + ++ if (!(srv_read_ahead & 1)) { ++ return(0); ++ } ++ + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); +@@ -396,6 +400,10 @@ + ulint err; + ulint i; + ++ if (!(srv_read_ahead & 2)) { ++ return(0); ++ } ++ + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); +diff -ruN a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c +--- a/innobase/ibuf/ibuf0ibuf.c 2008-12-19 02:19:35.000000000 +0900 ++++ b/innobase/ibuf/ibuf0ibuf.c 2009-01-09 15:53:18.000000000 +0900 +@@ -370,8 +370,9 @@ + grow in size, as the references on the upper levels of the tree can + change */ + +- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE +- / IBUF_POOL_SIZE_PER_MAX_SIZE; ++ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE ++ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE); ++ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE; + ibuf->meter = IBUF_THRESHOLD + 1; + + UT_LIST_INIT(ibuf->data_list); +@@ -2258,11 +2259,13 @@ + + mutex_enter(&ibuf_mutex); + ++ if (!srv_ibuf_active_contract) { + if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { + mutex_exit(&ibuf_mutex); + + return; + } ++ } + + sync = FALSE; + +diff -ruN a/innobase/include/os0file.h b/innobase/include/os0file.h +--- a/innobase/include/os0file.h 2009-01-09 15:40:22.000000000 +0900 ++++ b/innobase/include/os0file.h 2009-01-09 15:40:46.000000000 +0900 +@@ -551,8 +551,10 @@ + /*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ +- ulint n_segments, /* in: combined number of segments in the four +- first aio arrays; must be >= 4 */ ++// ulint n_segments, /* in: combined number of segments in the four ++// first aio arrays; must be >= 4 */ ++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */ ++ ulint n_write_threads, /**/ + ulint n_slots_sync); /* in: number of slots in the sync aio array */ + /*********************************************************************** + Requests an asynchronous i/o operation. */ +diff -ruN a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h 2009-01-09 15:40:22.000000000 +0900 ++++ b/innobase/include/srv0srv.h 2009-01-09 15:54:33.000000000 +0900 +@@ -89,6 +89,8 @@ + extern ulint srv_lock_table_size; + + extern ulint srv_n_file_io_threads; ++extern ulint srv_n_read_io_threads; ++extern ulint srv_n_write_io_threads; + + #ifdef UNIV_LOG_ARCHIVE + extern ibool srv_log_archive_on; +@@ -133,6 +135,14 @@ + extern ulong srv_max_purge_lag; + extern ibool srv_use_awe; + extern ibool srv_use_adaptive_hash_indexes; ++ ++extern ulint srv_io_capacity; ++extern long long srv_ibuf_max_size; ++extern ulint srv_ibuf_active_contract; ++extern ulint srv_ibuf_accel_rate; ++extern ulint srv_flush_neighbor_pages; ++extern uint srv_read_ahead; ++extern ulint srv_adaptive_checkpoint; + /*-------------------------------------------*/ + + extern ulint srv_n_rows_inserted; +diff -ruN a/innobase/log/log0log.c b/innobase/log/log0log.c +--- a/innobase/log/log0log.c 2008-12-19 02:19:36.000000000 +0900 ++++ b/innobase/log/log0log.c 2009-01-09 15:40:46.000000000 +0900 +@@ -3326,6 +3326,15 @@ + (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn), + (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn)); + ++ fprintf(file, ++ "Max checkpoint age %lu\n" ++ "Modified age %lu\n" ++ "Checkpoint age %lu\n", ++ (ulong) log_sys->max_checkpoint_age, ++ (ulong) ut_dulint_minus(log_sys->lsn, ++ log_buf_pool_get_oldest_modification()), ++ (ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn)); ++ + current_time = time(NULL); + + time_elapsed = 0.001 + difftime(current_time, +diff -ruN a/innobase/os/os0file.c b/innobase/os/os0file.c +--- a/innobase/os/os0file.c 2009-01-09 15:40:23.000000000 +0900 ++++ b/innobase/os/os0file.c 2009-01-09 15:40:46.000000000 +0900 +@@ -2877,8 +2877,10 @@ + /*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ +- ulint n_segments, /* in: combined number of segments in the four +- first aio arrays; must be >= 4 */ ++// ulint n_segments, /* in: combined number of segments in the four ++// first aio arrays; must be >= 4 */ ++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/ ++ ulint n_write_threads, /**/ + ulint n_slots_sync) /* in: number of slots in the sync aio array */ + { + ulint n_read_segs; +@@ -2888,6 +2890,8 @@ + #ifdef POSIX_ASYNC_IO + sigset_t sigset; + #endif ++ ulint n_segments = 2 + n_read_threads + n_write_threads; ++ + ut_ad(n % n_segments == 0); + ut_ad(n_segments >= 4); + +@@ -2898,8 +2902,8 @@ + } + + n_per_seg = n / n_segments; +- n_write_segs = (n_segments - 2) / 2; +- n_read_segs = n_segments - 2 - n_write_segs; ++ n_write_segs = n_write_threads; ++ n_read_segs = n_read_threads; + + /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */ + +@@ -3180,6 +3184,13 @@ + struct aiocb* control; + #endif + ulint i; ++ ulint prim_segment; ++ ulint n; ++ ++ n = array->n_slots / array->n_segments; ++ /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */ ++ prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments); ++ + loop: + os_mutex_enter(array->mutex); + +@@ -3198,6 +3209,16 @@ + goto loop; + } + ++ for (i = prim_segment * n; i < array->n_slots; i++) { ++ slot = os_aio_array_get_nth_slot(array, i); ++ ++ if (slot->reserved == FALSE) { ++ break; ++ } ++ } ++ ++ if (slot->reserved == TRUE){ ++ /* Not found after the intended segment. So we should search before. */ + for (i = 0;; i++) { + slot = os_aio_array_get_nth_slot(array, i); + +@@ -3205,6 +3226,7 @@ + break; + } + } ++ } + + array->n_reserved++; + +diff -ruN a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c 2009-01-09 15:40:23.000000000 +0900 ++++ b/innobase/srv/srv0srv.c 2009-01-09 15:58:36.000000000 +0900 +@@ -167,6 +167,8 @@ + ulint srv_lock_table_size = ULINT_MAX; + + ulint srv_n_file_io_threads = ULINT_MAX; ++ulint srv_n_read_io_threads = 1; ++ulint srv_n_write_io_threads = 1; + + #ifdef UNIV_LOG_ARCHIVE + ibool srv_log_archive_on = FALSE; +@@ -324,6 +326,22 @@ + ibool srv_use_awe = FALSE; + ibool srv_use_adaptive_hash_indexes = TRUE; + ++ulint srv_io_capacity = 100; ++ ++/* Returns the number of IO operations that is X percent of the capacity. ++PCT_IO(5) -> returns the number of IO operations that is 5% of the max ++where max is srv_io_capacity. */ ++#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0))) ++ ++long long srv_ibuf_max_size = 0; ++ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */ ++ulint srv_ibuf_accel_rate = 100; ++#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0))) ++ ++ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */ ++ ++uint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ ++ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */ + /*-------------------------------------------*/ + ulong srv_n_spin_wait_rounds = 20; + ulong srv_n_free_tickets_to_enter = 500; +@@ -2214,6 +2232,8 @@ + ibool skip_sleep = FALSE; + ulint i; + ++ dulint oldest_lsn; ++ + #ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Master thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +@@ -2302,9 +2322,9 @@ + + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; +- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { ++ if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) { + srv_main_thread_op_info = "doing insert buffer merge"; +- ibuf_contract_for_n_pages(TRUE, 5); ++ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(5)); + + srv_main_thread_op_info = "flushing log"; + +@@ -2317,7 +2337,7 @@ + /* Try to keep the number of modified pages in the + buffer pool under the limit wished by the user */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), + ut_dulint_max); + + /* If we had to do the flush, it may have taken +@@ -2326,6 +2346,49 @@ + iteration of this loop. */ + + skip_sleep = TRUE; ++ } else if (srv_adaptive_checkpoint) { ++ ++ /* Try to keep modified age not to exceed ++ max_checkpoint_age * 7/8 line */ ++ ++ mutex_enter(&(log_sys->mutex)); ++ ++ oldest_lsn = buf_pool_get_oldest_modification(); ++ if (ut_dulint_is_zero(oldest_lsn)) { ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ } else { ++ if (ut_dulint_minus(log_sys->lsn, oldest_lsn) ++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) { ++ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */ ++ /* We should not flush from here. */ ++ mutex_exit(&(log_sys->mutex)); ++ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn) ++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) { ++ ++ /* 2nd defence line (max_checkpoint_age * 3/4) */ ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ++ ut_dulint_max); ++ skip_sleep = TRUE; ++ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn) ++ > (log_sys->max_checkpoint_age)/2 ) { ++ ++ /* 1st defence line (max_checkpoint_age * 1/2) */ ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10), ++ ut_dulint_max); ++ skip_sleep = TRUE; ++ } else { ++ mutex_exit(&(log_sys->mutex)); ++ } ++ } ++ + } + + if (srv_activity_count == old_activity_count) { +@@ -2352,10 +2415,10 @@ + n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; +- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { ++ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) { + + srv_main_thread_op_info = "flushing buffer pool pages"; +- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); ++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); +@@ -2365,7 +2428,7 @@ + even if the server were active */ + + srv_main_thread_op_info = "doing insert buffer merge"; +- ibuf_contract_for_n_pages(TRUE, 5); ++ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(5)); + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); +@@ -2407,14 +2470,14 @@ + (> 70 %), we assume we can afford reserving the disk(s) for + the time it requires to flush 100 pages */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), + ut_dulint_max); + } else { + /* Otherwise, we only flush a small number of pages so that + we do not unnecessarily use much disk i/o capacity from + other work */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10), + ut_dulint_max); + } + +@@ -2503,7 +2566,7 @@ + if (srv_fast_shutdown && srv_shutdown_state > 0) { + n_bytes_merged = 0; + } else { +- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20); ++ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(100)); + } + + srv_main_thread_op_info = "reserving kernel mutex"; +@@ -2520,7 +2583,7 @@ + + if (srv_fast_shutdown < 2) { + n_pages_flushed = +- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); ++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); + } else { + /* In the fastest shutdown we do not flush the buffer pool + to data files: we set n_pages_flushed to 0 artificially. */ +diff -ruN a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c +--- a/innobase/srv/srv0start.c 2008-12-19 02:19:37.000000000 +0900 ++++ b/innobase/srv/srv0start.c 2009-01-09 15:40:46.000000000 +0900 +@@ -1205,24 +1205,28 @@ + return(DB_ERROR); + } + ++ /* over write innodb_file_io_threads */ ++ srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads; ++ + /* Restrict the maximum number of file i/o threads */ + if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { + + srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; ++ srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2; + } + + if (!os_aio_use_native_aio) { + /* In simulated aio we currently have use only for 4 threads */ +- srv_n_file_io_threads = 4; ++ /*srv_n_file_io_threads = 4;*/ + + os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD + * srv_n_file_io_threads, +- srv_n_file_io_threads, +- SRV_MAX_N_PENDING_SYNC_IOS); ++ srv_n_read_io_threads, srv_n_write_io_threads, ++ SRV_MAX_N_PENDING_SYNC_IOS * 8); + } else { + os_aio_init(SRV_N_PENDING_IOS_PER_THREAD + * srv_n_file_io_threads, +- srv_n_file_io_threads, ++ srv_n_read_io_threads, srv_n_write_io_threads, + SRV_MAX_N_PENDING_SYNC_IOS); + } + +diff -ruN a/patch_info/innodb_io_patches.info b/patch_info/innodb_io_patches.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ b/patch_info/innodb_io_patches.info 2009-01-09 15:59:05.000000000 +0900 +@@ -0,0 +1,11 @@ ++File=innodb_io_patches.patch ++Name=Cluster of past InnoDB IO patches ++Version=1.1 ++Author=Percona ++License=GPL ++Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush) ++ChangeLog= ++2008-11-06 ++YK: Initial release ++2009-01-09 ++YK: Some parameters are added +diff -ruN a/sql/ha_innodb.cc b/sql/ha_innodb.cc +--- a/sql/ha_innodb.cc 2009-01-09 15:40:23.000000000 +0900 ++++ b/sql/ha_innodb.cc 2009-01-09 15:40:46.000000000 +0900 +@@ -149,6 +149,7 @@ + innobase_lock_wait_timeout, innobase_force_recovery, + innobase_open_files; + ++long innobase_read_io_threads, innobase_write_io_threads; + longlong innobase_buffer_pool_size, innobase_log_file_size; + + /* The default values for the following char* start-up parameters +@@ -1403,6 +1404,8 @@ + srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; + + srv_n_file_io_threads = (ulint) innobase_file_io_threads; ++ srv_n_read_io_threads = (ulint) innobase_read_io_threads; ++ srv_n_write_io_threads = (ulint) innobase_write_io_threads; + + srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; + srv_force_recovery = (ulint) innobase_force_recovery; +diff -ruN a/sql/ha_innodb.h b/sql/ha_innodb.h +--- a/sql/ha_innodb.h 2009-01-09 15:40:23.000000000 +0900 ++++ b/sql/ha_innodb.h 2009-01-09 15:59:41.000000000 +0900 +@@ -204,6 +204,7 @@ + extern long innobase_additional_mem_pool_size; + extern long innobase_buffer_pool_awe_mem_mb; + extern long innobase_file_io_threads, innobase_lock_wait_timeout; ++extern long innobase_read_io_threads, innobase_write_io_threads; + extern long innobase_force_recovery; + extern long innobase_open_files; + extern char *innobase_data_home_dir, *innobase_data_file_path; +@@ -234,6 +235,13 @@ + extern ulong srv_thread_concurrency; + extern ulong srv_commit_concurrency; + extern ulong srv_flush_log_at_trx_commit; ++extern ulong srv_io_capacity; ++extern long long srv_ibuf_max_size; ++extern ulong srv_ibuf_active_contract; ++extern ulong srv_ibuf_accel_rate; ++extern ulong srv_flush_neighbor_pages; ++extern uint srv_read_ahead; ++extern ulong srv_adaptive_checkpoint; + } + + bool innobase_init(void); +diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc +--- a/sql/mysqld.cc 2009-01-09 15:40:23.000000000 +0900 ++++ b/sql/mysqld.cc 2009-01-09 16:01:25.000000000 +0900 +@@ -5036,6 +5036,15 @@ + OPT_INNODB_ROLLBACK_ON_TIMEOUT, + OPT_SECURE_FILE_PRIV, + OPT_KEEP_FILES_ON_CREATE, ++ OPT_INNODB_IO_CAPACITY, ++ OPT_INNODB_IBUF_MAX_SIZE, ++ OPT_INNODB_IBUF_ACTIVE_CONTRACT, ++ OPT_INNODB_IBUF_ACCEL_RATE, ++ OPT_INNODB_FLUSH_NEIGHBOR_PAGES, ++ OPT_INNODB_READ_AHEAD, ++ OPT_INNODB_ADAPTIVE_CHECKPOINT, ++ OPT_INNODB_READ_IO_THREADS, ++ OPT_INNODB_WRITE_IO_THREADS, + OPT_INNODB_ADAPTIVE_HASH_INDEX, + OPT_FEDERATED + }; +@@ -5344,6 +5353,41 @@ + (gptr*) &global_system_variables.innodb_table_locks, + (gptr*) &global_system_variables.innodb_table_locks, + 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, ++ {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY, ++ "Number of IO operations per second the server can do. Tunes background IO rate.", ++ (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity, ++ 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0}, ++ {"innodb_ibuf_max_size", OPT_INNODB_IBUF_MAX_SIZE, ++ "The maximum size of the insert buffer. (in bytes)", ++ (gptr*) &srv_ibuf_max_size, (gptr*) &srv_ibuf_max_size, 0, ++ GET_LL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, ++ {"innodb_ibuf_active_contract", OPT_INNODB_IBUF_ACTIVE_CONTRACT, ++ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable", ++ (gptr*) &srv_ibuf_active_contract, (gptr*) &srv_ibuf_active_contract, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_ibuf_accel_rate", OPT_INNODB_IBUF_ACCEL_RATE, ++ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)", ++ (gptr*) &srv_ibuf_accel_rate, (gptr*) &srv_ibuf_accel_rate, ++ 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0}, ++ {"innodb_flush_neighbor_pages", OPT_INNODB_FLUSH_NEIGHBOR_PAGES, ++ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable", ++ (gptr*) &srv_flush_neighbor_pages, (gptr*) &srv_flush_neighbor_pages, ++ 0, GET_ULONG, REQUIRED_ARG, 1, 0, 1, 0, 0, 0}, ++ {"innodb_read_ahead", OPT_INNODB_READ_AHEAD, ++ "Control read ahead activity. (none, random, linear, [both])", ++ 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, ++ {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT, ++ "Enable/Diasable flushing along modified age. 0:disable 1:enable", ++ (gptr*) &srv_adaptive_checkpoint, (gptr*) &srv_adaptive_checkpoint, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS, ++ "Number of background read I/O threads in InnoDB.", ++ (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads, ++ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, ++ {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS, ++ "Number of background write I/O threads in InnoDB.", ++ (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads, ++ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, + #endif /* End HAVE_INNOBASE_DB */ + {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", + (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, +@@ -7637,6 +7636,22 @@ + case OPT_INNODB_LOG_ARCHIVE: + innobase_log_archive= argument ? test(atoi(argument)) : 1; + break; ++ case OPT_INNODB_READ_AHEAD: ++ if (argument == disabled_my_option) ++ srv_read_ahead = 0; ++ else if (! argument) ++ srv_read_ahead = 3; ++ else ++ { ++ int type; ++ if ((type=find_type(argument, &innodb_read_ahead_typelib, 2)) <= 0) ++ { ++ fprintf(stderr,"Unknown innodb_read_ahead type: %s\n",argument); ++ exit(1); ++ } ++ srv_read_ahead = (uint) ((type - 1) & 3); ++ } ++ break; + #endif /* HAVE_INNOBASE_DB */ + case OPT_MYISAM_RECOVER: + { +diff -ruN a/sql/set_var.cc b/sql/set_var.cc +--- a/sql/set_var.cc 2009-01-09 15:40:23.000000000 +0900 ++++ b/sql/set_var.cc 2009-01-09 16:05:22.000000000 +0900 +@@ -484,6 +484,37 @@ + sys_var_long_ptr sys_innodb_flush_log_at_trx_commit( + "innodb_flush_log_at_trx_commit", + &srv_flush_log_at_trx_commit); ++sys_var_long_ptr sys_innodb_io_capacity("innodb_io_capacity", ++ &srv_io_capacity); ++sys_var_long_ptr sys_innodb_ibuf_active_contract("innodb_ibuf_active_contract", ++ &srv_ibuf_active_contract); ++sys_var_long_ptr sys_innodb_ibuf_accel_rate("innodb_ibuf_accel_rate", ++ &srv_ibuf_accel_rate); ++sys_var_long_ptr sys_innodb_flush_neighbor_pages("innodb_flush_neighbor_pages", ++ &srv_flush_neighbor_pages); ++ ++const char *innodb_read_ahead_names[]= ++{ ++ "none", /* 0 */ ++ "random", ++ "linear", ++ "both", /* 3 */ ++ /* For compatibility of the older patch */ ++ "0", /* 4 ("none" + 4) */ ++ "1", ++ "2", ++ "3", /* 7 ("both" + 4) */ ++ NullS ++}; ++TYPELIB innodb_read_ahead_typelib= ++{ ++ array_elements(innodb_read_ahead_names) - 1, "innodb_read_ahead_typelib", ++ innodb_read_ahead_names, NULL ++}; ++sys_var_enum sys_innodb_read_ahead("innodb_read_ahead", &srv_read_ahead, ++ &innodb_read_ahead_typelib, fix_innodb_read_ahead); ++sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint", ++ &srv_adaptive_checkpoint); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -847,6 +859,12 @@ + &sys_innodb_thread_concurrency, + &sys_innodb_commit_concurrency, + &sys_innodb_flush_log_at_trx_commit, ++ &sys_innodb_io_capacity, ++ &sys_innodb_ibuf_active_contract, ++ &sys_innodb_ibuf_accel_rate, ++ &sys_innodb_flush_neighbor_pages, ++ &sys_innodb_read_ahead, ++ &sys_innodb_adaptive_checkpoint, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -982,6 +1000,15 @@ + {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, + {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, + {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, ++ {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS}, ++ {"innodb_ibuf_max_size", (char*) &srv_ibuf_max_size, SHOW_LONGLONG}, ++ {sys_innodb_ibuf_active_contract.name, (char*) &sys_innodb_ibuf_active_contract, SHOW_SYS}, ++ {sys_innodb_ibuf_accel_rate.name, (char*) &sys_innodb_ibuf_accel_rate, SHOW_SYS}, ++ {sys_innodb_flush_neighbor_pages.name, (char*) &sys_innodb_flush_neighbor_pages, SHOW_SYS}, ++ {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS}, ++ {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS}, ++ {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG}, ++ {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG}, + #endif + {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, + {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, +@@ -1482,6 +1501,13 @@ + } + } + ++#ifdef HAVE_INNOBASE_DB ++extern void fix_innodb_read_ahead(THD *thd, enum_var_type type) ++{ ++ srv_read_ahead &= 3; ++} ++#endif /* HAVE_INNOBASE_DB */ ++ + static void fix_max_binlog_size(THD *thd, enum_var_type type) + { + DBUG_ENTER("fix_max_binlog_size"); +diff -ruN a/sql/set_var.h b/sql/set_var.h +--- a/sql/set_var.h 2009-01-12 11:20:31.000000000 +0900 ++++ b/sql/set_var.h 2009-01-12 15:26:35.000000000 +0900 +@@ -31,6 +31,10 @@ + + extern TYPELIB bool_typelib, delay_key_write_typelib, sql_mode_typelib; + ++#ifdef HAVE_INNOBASE_DB ++extern TYPELIB innodb_read_ahead_typelib; ++#endif /* HAVE_INNOBASE_DB */ ++ + typedef int (*sys_check_func)(THD *, set_var *); + typedef bool (*sys_update_func)(THD *, set_var *); + typedef void (*sys_after_update_func)(THD *,enum_var_type); +@@ -1114,6 +1118,9 @@ + int sql_set_variables(THD *thd, List<set_var_base> *var_list); + bool not_all_support_one_shot(List<set_var_base> *var_list); + void fix_delay_key_write(THD *thd, enum_var_type type); ++#ifdef HAVE_INNOBASE_DB ++void fix_innodb_read_ahead(THD *thd, enum_var_type type); ++#endif /* HAVE_INNOBASE_DB */ + ulong fix_sql_mode(ulong sql_mode); + extern sys_var_const_str sys_charset_system; + extern sys_var_str sys_init_connect; diff --git a/percona/5.0.75-b12/innodb_io_pattern.patch b/percona/5.0.75-b12/innodb_io_pattern.patch new file mode 100644 index 0000000..604404f --- /dev/null +++ b/percona/5.0.75-b12/innodb_io_pattern.patch @@ -0,0 +1,688 @@ +diff -r 2bbfde0e0e70 include/mysql_com.h +--- a/include/mysql_com.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/include/mysql_com.h Mon Dec 22 00:33:48 2008 -0800 +@@ -121,6 +121,9 @@ + #define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */ + #define REFRESH_DES_KEY_FILE 0x40000L + #define REFRESH_USER_RESOURCES 0x80000L ++ ++/* TRUNCATE INFORMATION_SCHEMA.INNODB_IO_PATTERN */ ++#define REFRESH_INNODB_IO_PATTERN 0x1000000L + + #define CLIENT_LONG_PASSWORD 1 /* new more secure passwords */ + #define CLIENT_FOUND_ROWS 2 /* Found instead of affected rows */ +diff -r 2bbfde0e0e70 innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/buf/buf0buf.c Mon Dec 22 00:33:48 2008 -0800 +@@ -653,6 +653,9 @@ + } + + buf_pool->page_hash = hash_create(2 * max_size); ++ buf_pool->io_counter_hash = NULL; ++ buf_pool->io_counter_heap = NULL; ++ buf_pool->io_counters = 0; + + buf_pool->n_pend_reads = 0; + +@@ -1966,6 +1969,9 @@ + ulint io_type; + ulint read_page_no; + ++ buf_io_counter_t* io_counter; ++ ulint fold; ++ + ut_ad(block); + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); +@@ -2067,6 +2073,26 @@ + buf_pool->n_pages_read++; + + rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ); ++ /* io_counter here */ ++ if (srv_io_pattern && srv_io_pattern_trace_running) { ++ fold = buf_page_address_fold(block->space, block->offset); ++ HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter, ++ (io_counter->space == block->space) && (io_counter->offset == block->offset)); ++ if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) { ++ io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t))); ++ io_counter->space = block->space; ++ io_counter->offset = block->offset; ++ io_counter->n_read = 0; ++ io_counter->n_write = 0; ++ HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash, ++ buf_page_address_fold(block->space, block->offset), io_counter); ++ buf_pool->io_counters++; ++ } ++ if (io_counter != NULL) { ++ io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block))); ++ io_counter->n_read++; ++ } ++ } + + #ifdef UNIV_DEBUG + if (buf_debug_prints) { +@@ -2082,6 +2108,26 @@ + buf_flush_write_complete(block); + + rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE); ++ /* io_counter here */ ++ if (srv_io_pattern && srv_io_pattern_trace_running) { ++ fold = buf_page_address_fold(block->space, block->offset); ++ HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter, ++ (io_counter->space == block->space) && (io_counter->offset == block->offset)); ++ if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) { ++ io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t))); ++ io_counter->space = block->space; ++ io_counter->offset = block->offset; ++ io_counter->n_read = 0; ++ io_counter->n_write = 0; ++ HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash, ++ buf_page_address_fold(block->space, block->offset), io_counter); ++ buf_pool->io_counters++; ++ } ++ if (io_counter != NULL) { ++ io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block))); ++ io_counter->n_write++; ++ } ++ } + + buf_pool->n_pages_written++; + +@@ -2656,3 +2702,58 @@ + return buf_pool_get_nth_block(buf_pool, i); + + } ++ ++/************************************************************************* ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++buf_io_counter_control(void) ++/*========================*/ ++{ ++ ulint n; ++ ++ mutex_enter(&(buf_pool->mutex)); ++ if (srv_io_pattern_trace) { ++ if (buf_pool->io_counter_hash == NULL) { ++ /* estimating (buf_pool * 10) */ ++ buf_pool->io_counter_hash = hash_create(20 * buf_pool->max_size); ++ buf_pool->io_counter_heap = mem_heap_create(4096 * 1024); ++ buf_pool->io_counters = 0; ++ ++ srv_io_pattern = TRUE; ++ } ++ } else { ++ if (buf_pool->io_counter_hash != NULL) { ++ srv_io_pattern = FALSE; ++ ++ for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ (buf_pool->io_counter_hash->array + n)->node = NULL; ++ } ++ mem_heap_free(buf_pool->io_counter_heap); ++ buf_pool->io_counter_heap = NULL; ++ buf_pool->io_counters = 0; ++ ++ hash_table_free(buf_pool->io_counter_hash); ++ buf_pool->io_counter_hash = NULL; ++ } ++ } ++ mutex_exit(&(buf_pool->mutex)); ++} ++ ++void ++buf_io_counter_clear(void) ++/*======================*/ ++{ ++ ulint n; ++ ++ mutex_enter(&(buf_pool->mutex)); ++ if (buf_pool->io_counter_hash != NULL) { ++ for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ (buf_pool->io_counter_hash->array + n)->node = NULL; ++ } ++ mem_heap_empty(buf_pool->io_counter_heap); ++ buf_pool->io_counters = 0; ++ } ++ mutex_exit(&(buf_pool->mutex)); ++} +diff -r 2bbfde0e0e70 innobase/include/buf0buf.h +--- a/innobase/include/buf0buf.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/include/buf0buf.h Mon Dec 22 00:33:48 2008 -0800 +@@ -709,6 +709,18 @@ + void buf_pool_dump(void); + buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i); + ++ ++/************************************************************************* ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++buf_io_counter_control(void); ++/*=========================*/ ++ ++void ++buf_io_counter_clear(void); ++/*=======================*/ + + /* The buffer control block structure */ + +@@ -930,6 +942,9 @@ + ulint curr_size; /* current pool size in pages; + currently always the same as + max_size */ ++ hash_table_t* io_counter_hash; ++ mem_heap_t* io_counter_heap; ++ ulint io_counters; + hash_table_t* page_hash; /* hash table of the file pages */ + + ulint n_pend_reads; /* number of pending read operations */ +@@ -1015,6 +1030,15 @@ + locki table, are not in this list */ + }; + ++struct buf_io_counter_struct{ ++ ulint space; ++ ulint offset; ++ buf_io_counter_t* hash; ++ ulint index_id; ++ ulint n_read; ++ ulint n_write; ++}; ++ + /* States of a control block */ + #define BUF_BLOCK_NOT_USED 211 /* is in the free list */ + #define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns +diff -r 2bbfde0e0e70 innobase/include/buf0types.h +--- a/innobase/include/buf0types.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/include/buf0types.h Mon Dec 22 00:33:48 2008 -0800 +@@ -12,6 +12,8 @@ + typedef struct buf_block_struct buf_block_t; + typedef struct buf_pool_struct buf_pool_t; + ++typedef struct buf_io_counter_struct buf_io_counter_t; ++ + /* The 'type' used of a buffer frame */ + typedef byte buf_frame_t; + +diff -r 2bbfde0e0e70 innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/include/srv0srv.h Mon Dec 22 00:33:48 2008 -0800 +@@ -141,6 +141,11 @@ + extern ulint srv_io_capacity; + extern ulint srv_read_ahead; + extern ulint srv_adaptive_checkpoint; ++ ++extern volatile ibool srv_io_pattern; ++extern ulong srv_io_pattern_trace; ++extern ulong srv_io_pattern_trace_running; ++extern ulong srv_io_pattern_size_limit; + /*-------------------------------------------*/ + + extern ulint srv_n_rows_inserted; +diff -r 2bbfde0e0e70 innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:33:48 2008 -0800 +@@ -337,6 +337,11 @@ + + ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ + ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */ ++ ++volatile ibool srv_io_pattern = FALSE; ++ulint srv_io_pattern_trace = 0; ++ulint srv_io_pattern_trace_running = 0; ++ulint srv_io_pattern_size_limit = ULINT_MAX - (1024 * 1024); + /*-------------------------------------------*/ + ulong srv_n_spin_wait_rounds = 20; + ulong srv_n_free_tickets_to_enter = 500; +diff -r 2bbfde0e0e70 mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Mon Dec 22 00:33:11 2008 -0800 ++++ b/mysql-test/r/information_schema.result Mon Dec 22 00:33:48 2008 -0800 +@@ -59,6 +59,7 @@ + USER_PRIVILEGES + USER_STATISTICS + VIEWS ++INNODB_IO_PATTERN + columns_priv + db + func +@@ -742,7 +743,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-108 ++109 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -812,12 +813,13 @@ + TABLE_PRIVILEGES TABLE_NAME select + TABLE_STATISTICS TABLE_NAME select + VIEWS TABLE_NAME select ++INNODB_IO_PATTERN TABLE_NAME select + delete from mysql.user where user='mysqltest_4'; + delete from mysql.db where user='mysqltest_4'; + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 23 ++information_schema 24 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1225,6 +1227,7 @@ + USER_PRIVILEGES GRANTEE + USER_STATISTICS USER + VIEWS TABLE_SCHEMA ++INNODB_IO_PATTERN SPACE + SELECT t.table_name, c1.column_name + FROM information_schema.tables t + INNER JOIN +@@ -1263,6 +1266,7 @@ + USER_PRIVILEGES GRANTEE + USER_STATISTICS USER + VIEWS TABLE_SCHEMA ++INNODB_IO_PATTERN SPACE + SELECT MAX(table_name) FROM information_schema.tables; + MAX(table_name) + VIEWS +@@ -1337,6 +1341,7 @@ + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 + INNODB_BUFFER_POOL_CONTENT information_schema.INNODB_BUFFER_POOL_CONTENT 1 ++INNODB_IO_PATTERN information_schema.INNODB_IO_PATTERN 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 + PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 +diff -r 2bbfde0e0e70 mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Mon Dec 22 00:33:11 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Mon Dec 22 00:33:48 2008 -0800 +@@ -28,6 +28,7 @@ + USER_PRIVILEGES + USER_STATISTICS + VIEWS ++INNODB_IO_PATTERN + show tables from INFORMATION_SCHEMA like 'T%'; + Tables_in_information_schema (T%) + TABLES +diff -r 2bbfde0e0e70 mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Mon Dec 22 00:33:11 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Mon Dec 22 00:33:48 2008 -0800 +@@ -102,6 +102,7 @@ + | USER_PRIVILEGES | + | USER_STATISTICS | + | VIEWS | ++| INNODB_IO_PATTERN | + +---------------------------------------+ + Database: INFORMATION_SCHEMA + +---------------------------------------+ +@@ -130,6 +131,7 @@ + | USER_PRIVILEGES | + | USER_STATISTICS | + | VIEWS | ++| INNODB_IO_PATTERN | + +---------------------------------------+ + Wildcard: inf_rmation_schema + +--------------------+ +diff -r 2bbfde0e0e70 patch_info/innodb_io_pattern.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_io_pattern.info Mon Dec 22 00:33:48 2008 -0800 +@@ -0,0 +1,8 @@ ++File=innodb_io_pattern.patch ++Name=Information schema table of InnoDB IO counts for each datafile pages ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment=INFORMATION_SCHEMA.INNODB_IO_PATTERN ++2008-12-01 ++YK: fix for mysql-test +diff -r 2bbfde0e0e70 sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -1569,6 +1569,8 @@ + pthread_cond_init(&commit_cond, NULL); + innodb_inited= 1; + ++ buf_io_counter_control(); ++ + /* If this is a replication slave and we needed to do a crash recovery, + set the master binlog position to what InnoDB internally knew about + how far we got transactions durable inside InnoDB. There is a +@@ -6527,6 +6529,28 @@ + } + + /**************************************************************************** ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++innodb_io_pattern_control(void) ++/*===========================*/ ++{ ++ if (innodb_inited) { ++ buf_io_counter_control(); ++ } ++} ++ ++void ++innodb_io_pattern_clear(void) ++/*=========================*/ ++{ ++ if (innodb_inited) { ++ buf_io_counter_clear(); ++ } ++} ++ ++/**************************************************************************** + Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB + Monitor to the client. */ + +diff -r 2bbfde0e0e70 sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/ha_innodb.h Mon Dec 22 00:33:48 2008 -0800 +@@ -240,6 +240,9 @@ + extern ulong srv_adaptive_checkpoint; + extern ulong srv_show_locks_held; + extern ulong srv_show_verbose_locks; ++extern ulong srv_io_pattern_trace; ++extern ulong srv_io_pattern_trace_running; ++extern ulong srv_io_pattern_size_limit; + } + + bool innobase_init(void); +@@ -266,6 +269,9 @@ + bool innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables); + bool innodb_mutex_show_status(THD* thd); + void innodb_export_status(void); ++ ++void innodb_io_pattern_control(void); ++void innodb_io_pattern_clear(void); + + void innobase_release_temporary_latches(THD *thd); + +diff -r 2bbfde0e0e70 sql/lex.h +--- a/sql/lex.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/lex.h Mon Dec 22 00:33:48 2008 -0800 +@@ -244,6 +244,7 @@ + { "INNER", SYM(INNER_SYM)}, + { "INNOBASE", SYM(INNOBASE_SYM)}, + { "INNODB", SYM(INNOBASE_SYM)}, ++ { "INNODB_IO_PATTERN", SYM(INNODB_IO_PATTERN)}, + { "INOUT", SYM(INOUT_SYM)}, + { "INSENSITIVE", SYM(INSENSITIVE_SYM)}, + { "INSERT", SYM(INSERT)}, +diff -r 2bbfde0e0e70 sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -4983,6 +4983,9 @@ + OPT_INNODB_SYNC_SPIN_LOOPS, + OPT_INNODB_CONCURRENCY_TICKETS, + OPT_INNODB_THREAD_SLEEP_DELAY, ++ OPT_INNODB_IO_PATTERN_TRACE, ++ OPT_INNODB_IO_PATTERN_TRACE_RUNNING, ++ OPT_INNODB_IO_PATTERN_SIZE_LIMIT, + OPT_BDB_CACHE_SIZE, + OPT_BDB_LOG_BUFFER_SIZE, + OPT_BDB_MAX_LOCK, +@@ -5382,6 +5385,18 @@ + "Number of background write I/O threads in InnoDB.", + (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads, + 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, ++ {"innodb_io_pattern_trace", OPT_INNODB_IO_PATTERN_TRACE, ++ "Create/Drop the internal hash table for IO pattern tracing.", ++ (gptr*) &srv_io_pattern_trace, (gptr*) &srv_io_pattern_trace, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_io_pattern_trace_running", OPT_INNODB_IO_PATTERN_TRACE_RUNNING, ++ "Control IO pattern trace running or not.", ++ (gptr*) &srv_io_pattern_trace_running, (gptr*) &srv_io_pattern_trace_running, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_io_pattern_size_limit", OPT_INNODB_IO_PATTERN_SIZE_LIMIT, ++ "Set max number of counters per data pages. (0 = disable counting).", ++ (gptr*) &srv_io_pattern_size_limit, (gptr*) &srv_io_pattern_size_limit, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, ULONG_MAX - (1024 * 1024), 0, 0, 0}, + #endif /* End HAVE_INNOBASE_DB */ + {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", + (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, +diff -r 2bbfde0e0e70 sql/set_var.cc +--- a/sql/set_var.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/set_var.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -501,6 +501,12 @@ + sys_var_long_ptr sys_innodb_show_verbose_locks( + "innodb_show_verbose_locks", + &srv_show_verbose_locks); ++sys_var_innodb_io_pattern_trace sys_innodb_io_pattern_trace("innodb_io_pattern_trace", ++ &srv_io_pattern_trace); ++sys_var_long_ptr sys_innodb_io_pattern_trace_running("innodb_io_pattern_trace_running", ++ &srv_io_pattern_trace_running); ++sys_var_long_ptr sys_innodb_io_pattern_size_limit("innodb_io_pattern_size_limit", ++ &srv_io_pattern_size_limit); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -870,6 +876,9 @@ + &sys_innodb_adaptive_checkpoint, + &sys_innodb_show_locks_held, + &sys_innodb_show_verbose_locks, ++ &sys_innodb_io_pattern_trace, ++ &sys_innodb_io_pattern_trace_running, ++ &sys_innodb_io_pattern_size_limit, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -1012,6 +1021,9 @@ + {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS}, + {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG}, + {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG}, ++ {sys_innodb_io_pattern_trace.name, (char*) &sys_innodb_io_pattern_trace, SHOW_SYS}, ++ {sys_innodb_io_pattern_trace_running.name, (char*) &sys_innodb_io_pattern_trace_running, SHOW_SYS}, ++ {sys_innodb_io_pattern_size_limit.name, (char*) &sys_innodb_io_pattern_size_limit, SHOW_SYS}, + #endif + {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, + {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, +@@ -3117,6 +3129,19 @@ + thd->variables.lc_time_names= global_system_variables.lc_time_names; + } + ++#ifdef HAVE_INNOBASE_DB ++bool sys_var_innodb_io_pattern_trace::update(THD *thd, set_var *var) ++{ ++ bool ret; ++ ++ ret = sys_var_long_ptr_global::update(thd, var); ++ ++ innodb_io_pattern_control(); ++ ++ return ret; ++} ++#endif /* HAVE_INNOBASE_DB */ ++ + /* + Functions to update thd->options bits + */ +diff -r 2bbfde0e0e70 sql/set_var.h +--- a/sql/set_var.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/set_var.h Mon Dec 22 00:33:48 2008 -0800 +@@ -985,6 +985,17 @@ + virtual void set_default(THD *thd, enum_var_type type); + }; + ++#ifdef HAVE_INNOBASE_DB ++/* sys_var_innodb_io_pattern_trace */ ++class sys_var_innodb_io_pattern_trace :public sys_var_long_ptr ++{ ++public: ++ sys_var_innodb_io_pattern_trace(const char *name_arg, ulong *value_ptr_arg) ++ :sys_var_long_ptr(name_arg,value_ptr_arg) {} ++ bool update(THD *thd, set_var *var); ++}; ++#endif /* HAVE_INNOBASE_DB */ ++ + /**************************************************************************** + Classes for parsing of the SET command + ****************************************************************************/ +diff -r 2bbfde0e0e70 sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/sql_parse.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -7998,6 +7998,13 @@ + } + pthread_mutex_unlock(&LOCK_global_user_client_stats); + } ++#ifdef HAVE_INNOBASE_DB ++ if (options & REFRESH_INNODB_IO_PATTERN) ++ { ++ tmp_write_to_binlog= 0; ++ innodb_io_pattern_clear(); ++ } ++#endif /* HAVE_INNOBASE_DB */ + *write_to_binlog= tmp_write_to_binlog; + return result; + } +diff -r 2bbfde0e0e70 sql/sql_show.cc +--- a/sql/sql_show.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/sql_show.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -32,6 +32,17 @@ + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + #endif ++ ++#ifdef HAVE_INNOBASE_DB ++#define INSIDE_HA_INNOBASE_CC ++extern "C" { ++#include "srv0srv.h" ++#include "buf0buf.h" ++#include "dict0dict.h" ++} ++/* We need to undef it in InnoDB */ ++#undef byte ++#endif /* HAVE_INNOBASE_DB */ + + #ifndef NO_EMBEDDED_ACCESS_CHECKS + static const char *grant_names[]={ +@@ -4074,6 +4085,67 @@ + DBUG_RETURN(res); + } + ++int innodb_io_pattern_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) ++{ ++ TABLE *table= (TABLE *) tables->table; ++ ++ buf_io_counter_t* io_counter; ++ dict_index_t* index; ++ ++ DBUG_ENTER("innodb_io_pattern_fill_table"); ++ int returnable= 0; ++ ++ /* We cannot use inline functions of InnoDB here */ ++ ++ /* !!!!!ATTENTION!!!!!: This function is not protected by mutex for performance. */ ++ /* Don't use "DROP TABLE innodb_io_pattern" and INFORMATION_SCHEMA.INNODB_IO_PATTERN */ ++ /* at the same time as possible. */ ++ ++ if (srv_io_pattern) { ++ for (ulint n=0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ if (!srv_io_pattern) ++ goto end_func; ++ ++ io_counter = (buf_io_counter_t*)(buf_pool->io_counter_hash->array + n)->node; ++ while (io_counter) { ++ if (!srv_io_pattern) ++ goto end_func; ++ ++ if (dict_sys != NULL) { ++ dulint id; ++ id.high = 0; ++ id.low = io_counter->index_id; ++ index = dict_index_find_on_id_low(id); ++ } else { ++ index = NULL; ++ } ++ ++ table->field[0]->store(io_counter->space); ++ table->field[1]->store(io_counter->offset); ++ table->field[2]->store(io_counter->index_id); ++ if (index != NULL) { ++ table->field[3]->store(index->table_name,strlen(index->table_name),system_charset_info); ++ table->field[4]->store(index->name,strlen(index->name),system_charset_info); ++ } else { ++ table->field[3]->store("",0,system_charset_info); ++ table->field[4]->store("",0,system_charset_info); ++ } ++ table->field[5]->store(io_counter->n_read); ++ table->field[6]->store(io_counter->n_write); ++ if (schema_table_store_record(thd, table)) ++ { ++ returnable= 1; ++ goto end_func; ++ } ++ io_counter = io_counter->hash; ++ } ++ } ++ } ++ ++ end_func: ++ DBUG_RETURN(returnable); ++} ++ + /* + Find schema_tables elment by name + +@@ -4880,6 +4952,19 @@ + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} + }; + ++#ifdef HAVE_INNOBASE_DB ++ST_FIELD_INFO innodb_io_pattern_field_info[]= ++{ ++ {"SPACE", 11, MYSQL_TYPE_LONG, 0, 0, "space_id"}, ++ {"OFFSET", 11, MYSQL_TYPE_LONG, 0, 0, "offset"}, ++ {"INDEX_ID", 11, MYSQL_TYPE_LONG, 0, 0, "index id"}, ++ {"TABLE_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "table name"}, ++ {"INDEX_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "index name"}, ++ {"N_READ", 11, MYSQL_TYPE_LONG, 0, 0, "read ios"}, ++ {"N_WRITE", 11, MYSQL_TYPE_LONG, 0, 0, "write ios"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++#endif + + ST_FIELD_INFO variables_fields_info[]= + { +@@ -5055,6 +5140,10 @@ + make_old_format, 0, -1, -1, 1}, + {"VIEWS", view_fields_info, create_schema_table, + get_all_tables, 0, get_schema_views_record, 1, 2, 0}, ++#ifdef HAVE_INNOBASE_DB ++ {"INNODB_IO_PATTERN", innodb_io_pattern_field_info, create_schema_table, ++ innodb_io_pattern_fill_table, 0, 0, -1, -1, 0}, ++#endif + {0, 0, 0, 0, 0, 0, 0, 0, 0} + }; + +diff -r 2bbfde0e0e70 sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/sql_yacc.yy Mon Dec 22 00:33:48 2008 -0800 +@@ -685,6 +685,7 @@ + %token INFILE + %token INNER_SYM + %token INNOBASE_SYM ++%token INNODB_IO_PATTERN + %token INOUT_SYM + %token INSENSITIVE_SYM + %token INSERT +@@ -8541,6 +8542,7 @@ + | MASTER_SYM { Lex->type|= REFRESH_MASTER; } + | DES_KEY_FILE { Lex->type|= REFRESH_DES_KEY_FILE; } + | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; } ++ | INNODB_IO_PATTERN { Lex->type|= REFRESH_INNODB_IO_PATTERN; } + | CLIENT_STATS_SYM { Lex->type|= REFRESH_CLIENT_STATS; } + | USER_STATS_SYM { Lex->type|= REFRESH_USER_STATS; } + | TABLE_STATS_SYM { Lex->type|= REFRESH_TABLE_STATS; } +@@ -9594,6 +9596,7 @@ + | ISOLATION {} + | ISSUER_SYM {} + | INNOBASE_SYM {} ++ | INNODB_IO_PATTERN {} + | INSERT_METHOD {} + | IO_SYM {} + | IPC_SYM {} diff --git a/percona/5.0.75-b12/innodb_locks_held.patch b/percona/5.0.75-b12/innodb_locks_held.patch new file mode 100644 index 0000000..18c99f4 --- /dev/null +++ b/percona/5.0.75-b12/innodb_locks_held.patch @@ -0,0 +1,219 @@ +diff -r ae6708ab17e5 innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Mon Dec 22 00:32:07 2008 -0800 ++++ b/innobase/include/srv0srv.h Mon Dec 22 00:32:58 2008 -0800 +@@ -80,6 +80,8 @@ + extern ulint srv_log_file_size; + extern ulint srv_log_buffer_size; + extern ulong srv_flush_log_at_trx_commit; ++extern ulong srv_show_locks_held; ++extern ulong srv_show_verbose_locks; + + extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 + character set */ +diff -r ae6708ab17e5 innobase/lock/lock0lock.c +--- a/innobase/lock/lock0lock.c Mon Dec 22 00:32:07 2008 -0800 ++++ b/innobase/lock/lock0lock.c Mon Dec 22 00:32:58 2008 -0800 +@@ -4181,6 +4181,7 @@ + #endif /* UNIV_SYNC_DEBUG */ + } + ++ if ( srv_show_verbose_locks ) { + for (i = 0; i < lock_rec_get_n_bits(lock); i++) { + + if (lock_rec_get_nth_bit(lock, i)) { +@@ -4198,6 +4199,7 @@ + putc('\n', file); + } + } ++ } /* srv_show_verbose_locks */ + + mtr_commit(&mtr); + if (UNIV_LIKELY_NULL(heap)) { +@@ -4369,7 +4371,7 @@ + } + } + +- if (!srv_print_innodb_lock_monitor) { ++ if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) { + nth_trx++; + goto loop; + } +@@ -4426,9 +4428,9 @@ + + nth_lock++; + +- if (nth_lock >= 10) { ++ if (nth_lock >= srv_show_locks_held) { + fputs( +- "10 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n", ++ "TOO MANY LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n", + file); + + nth_trx++; +diff -r ae6708ab17e5 innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Dec 22 00:32:07 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:32:58 2008 -0800 +@@ -116,6 +116,8 @@ + ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ + ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ + ulong srv_flush_log_at_trx_commit = 1; ++ulint srv_show_locks_held = 10; ++ulint srv_show_verbose_locks = 0; + + byte srv_latin1_ordering[256] /* The sort order table of the latin1 + character set. The following table is +@@ -1694,24 +1696,6 @@ + + mutex_exit(&dict_foreign_err_mutex); + +- lock_print_info_summary(file); +- if (trx_start) { +- long t = ftell(file); +- if (t < 0) { +- *trx_start = ULINT_UNDEFINED; +- } else { +- *trx_start = (ulint) t; +- } +- } +- lock_print_info_all_transactions(file); +- if (trx_end) { +- long t = ftell(file); +- if (t < 0) { +- *trx_end = ULINT_UNDEFINED; +- } else { +- *trx_end = (ulint) t; +- } +- } + fputs("--------\n" + "FILE I/O\n" + "--------\n", file); +@@ -1805,6 +1789,25 @@ + srv_n_rows_deleted_old = srv_n_rows_deleted; + srv_n_rows_read_old = srv_n_rows_read; + ++ lock_print_info_summary(file); ++ if (trx_start) { ++ long t = ftell(file); ++ if (t < 0) { ++ *trx_start = ULINT_UNDEFINED; ++ } else { ++ *trx_start = (ulint) t; ++ } ++ } ++ lock_print_info_all_transactions(file); ++ if (trx_end) { ++ long t = ftell(file); ++ if (t < 0) { ++ *trx_end = ULINT_UNDEFINED; ++ } else { ++ *trx_end = (ulint) t; ++ } ++ } ++ + fputs("----------------------------\n" + "END OF INNODB MONITOR OUTPUT\n" + "============================\n", file); +diff -r ae6708ab17e5 libmysqld/set_var.cc +--- a/libmysqld/set_var.cc Mon Dec 22 00:32:07 2008 -0800 ++++ b/libmysqld/set_var.cc Mon Dec 22 00:32:58 2008 -0800 +@@ -821,6 +821,8 @@ + &sys_innodb_thread_concurrency, + &sys_innodb_commit_concurrency, + &sys_innodb_flush_log_at_trx_commit, ++ &sys_innodb_show_locks_held, ++ &sys_innodb_show_verbose_locks, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -936,6 +938,8 @@ + {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, + {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL}, + {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS}, ++ {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS }, ++ {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS }, + {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR}, + {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG }, + {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG }, +diff -r ae6708ab17e5 patch_info/innodb_locks_held.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_locks_held.info Mon Dec 22 00:32:58 2008 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_locks_held.patch ++Name=Add locks held, remove locked records in SHOW INNODB STATUS ++Version=1.0 ++Author=Baron Schwartz <baron@xaprb.com> ++License=GPL ++Comment=Bug #29126 fix +diff -r ae6708ab17e5 sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Dec 22 00:32:07 2008 -0800 ++++ b/sql/ha_innodb.h Mon Dec 22 00:32:58 2008 -0800 +@@ -238,6 +238,8 @@ + extern ulong srv_io_capacity; + extern ulong srv_read_ahead; + extern ulong srv_adaptive_checkpoint; ++extern ulong srv_show_locks_held; ++extern ulong srv_show_verbose_locks; + } + + bool innobase_init(void); +diff -r ae6708ab17e5 sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:32:07 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:32:58 2008 -0800 +@@ -4969,6 +4969,8 @@ + OPT_INNODB_MAX_PURGE_LAG, + OPT_INNODB_FILE_IO_THREADS, + OPT_INNODB_LOCK_WAIT_TIMEOUT, ++ OPT_INNODB_SHOW_LOCKS_HELD, ++ OPT_INNODB_SHOW_VERBOSE_LOCKS, + OPT_INNODB_THREAD_CONCURRENCY, + OPT_INNODB_COMMIT_CONCURRENCY, + OPT_INNODB_FORCE_RECOVERY, +@@ -5308,6 +5310,14 @@ + (gptr*) &srv_flush_log_at_trx_commit, + (gptr*) &srv_flush_log_at_trx_commit, + 0, GET_ULONG, OPT_ARG, 1, 0, 2, 0, 0, 0}, ++ {"innodb_show_locks_held", OPT_INNODB_SHOW_LOCKS_HELD, ++ "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.", ++ (gptr*) &srv_show_locks_held, (gptr*) &srv_show_locks_held, ++ 0, GET_LONG, OPT_ARG, 10, 0, 1000, 0, 1, 0}, ++ {"innodb_show_verbose_locks", OPT_INNODB_SHOW_VERBOSE_LOCKS, ++ "Whether to show records locked in SHOW INNODB STATUS.", ++ (gptr*) &srv_show_verbose_locks, (gptr*) &srv_show_verbose_locks, ++ 0, GET_LONG, OPT_ARG, 0, 0, 1, 0, 1, 0}, + {"innodb_flush_method", OPT_INNODB_FLUSH_METHOD, + "With which method to flush data.", (gptr*) &innobase_unix_file_flush_method, + (gptr*) &innobase_unix_file_flush_method, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, +diff -r ae6708ab17e5 sql/set_var.cc +--- a/sql/set_var.cc Mon Dec 22 00:32:07 2008 -0800 ++++ b/sql/set_var.cc Mon Dec 22 00:32:58 2008 -0800 +@@ -495,6 +495,12 @@ + &srv_read_ahead); + sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint", + &srv_adaptive_checkpoint); ++sys_var_long_ptr sys_innodb_show_locks_held( ++ "innodb_show_locks_held", ++ &srv_show_locks_held); ++sys_var_long_ptr sys_innodb_show_verbose_locks( ++ "innodb_show_verbose_locks", ++ &srv_show_verbose_locks); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -862,6 +868,8 @@ + &sys_innodb_io_capacity, + &sys_innodb_read_ahead, + &sys_innodb_adaptive_checkpoint, ++ &sys_innodb_show_locks_held, ++ &sys_innodb_show_verbose_locks, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -977,6 +985,8 @@ + {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, + {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL}, + {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS}, ++ {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS }, ++ {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS }, + {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR}, + {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG }, + {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG }, diff --git a/percona/5.0.75-b12/innodb_rw_lock.patch b/percona/5.0.75-b12/innodb_rw_lock.patch new file mode 100644 index 0000000..3070bb0 --- /dev/null +++ b/percona/5.0.75-b12/innodb_rw_lock.patch @@ -0,0 +1,1459 @@ +diff -r 962aec0d731c innobase/configure +--- a/innobase/configure Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/configure Thu Oct 09 08:30:28 2008 -0700 +@@ -20519,6 +20519,88 @@ + + fi + done ++ ++ ++# as http://lists.mysql.com/commits/40686 does ++{ echo "$as_me:$LINENO: checking whether the compiler provides atomic builtins" >&5 ++echo $ECHO_N "checking whether the compiler provides atomic builtins... $ECHO_C" >&6; } ++if test "${mysql_cv_atomic_builtins+set}" = set; then ++ echo $ECHO_N "(cached) $ECHO_C" >&6 ++else ++ if test "$cross_compiling" = yes; then ++ { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling ++See \`config.log' for more details." >&5 ++echo "$as_me: error: cannot run test program while cross compiling ++See \`config.log' for more details." >&2;} ++ { (exit 1); exit 1; }; } ++else ++ cat >conftest.$ac_ext <<_ACEOF ++/* confdefs.h. */ ++_ACEOF ++cat confdefs.h >>conftest.$ac_ext ++cat >>conftest.$ac_ext <<_ACEOF ++/* end confdefs.h. */ ++ ++ int main() ++ { ++ int foo= -10; int bar= 10; ++ __sync_fetch_and_add(&foo, bar); ++ if (foo) ++ return -1; ++ bar= __sync_lock_test_and_set(&foo, bar); ++ if (bar || foo != 10) ++ return -1; ++ bar= __sync_val_compare_and_swap(&bar, foo, 15); ++ if (bar) ++ return -1; ++ return 0; ++ } ++ ++_ACEOF ++rm -f conftest$ac_exeext ++if { (ac_try="$ac_link" ++case "(($ac_try" in ++ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; ++ *) ac_try_echo=$ac_try;; ++esac ++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ++ (eval "$ac_link") 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); } && { ac_try='./conftest$ac_exeext' ++ { (case "(($ac_try" in ++ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; ++ *) ac_try_echo=$ac_try;; ++esac ++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ++ (eval "$ac_try") 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); }; }; then ++ mysql_cv_atomic_builtins=yes ++else ++ echo "$as_me: program exited with status $ac_status" >&5 ++echo "$as_me: failed program was:" >&5 ++sed 's/^/| /' conftest.$ac_ext >&5 ++ ++( exit $ac_status ) ++mysql_cv_atomic_builtins=no ++fi ++rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext ++fi ++ ++ ++fi ++{ echo "$as_me:$LINENO: result: $mysql_cv_atomic_builtins" >&5 ++echo "${ECHO_T}$mysql_cv_atomic_builtins" >&6; } ++ ++if test "x$mysql_cv_atomic_builtins" = xyes; then ++ ++cat >>confdefs.h <<\_ACEOF ++#define HAVE_ATOMIC_BUILTINS 1 ++_ACEOF ++ ++fi + + #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args. + # Some versions of Unix only take 2 arguments. +diff -r 962aec0d731c innobase/configure.in +--- a/innobase/configure.in Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/configure.in Thu Oct 09 08:30:28 2008 -0700 +@@ -42,6 +42,31 @@ + AC_CHECK_FUNCS(sched_yield) + AC_CHECK_FUNCS(fdatasync) + AC_CHECK_FUNCS(localtime_r) ++ ++# as http://lists.mysql.com/commits/40686 does ++AC_CACHE_CHECK([whether the compiler provides atomic builtins], ++ [mysql_cv_atomic_builtins], [AC_TRY_RUN([ ++ int main() ++ { ++ int foo= -10; int bar= 10; ++ __sync_fetch_and_add(&foo, bar); ++ if (foo) ++ return -1; ++ bar= __sync_lock_test_and_set(&foo, bar); ++ if (bar || foo != 10) ++ return -1; ++ bar= __sync_val_compare_and_swap(&bar, foo, 15); ++ if (bar) ++ return -1; ++ return 0; ++ } ++], [mysql_cv_atomic_builtins=yes], [mysql_cv_atomic_builtins=no])]) ++ ++if test "x$mysql_cv_atomic_builtins" = xyes; then ++ AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, ++ [Define to 1 if compiler provides atomic builtins.]) ++fi ++ + #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args. + # Some versions of Unix only take 2 arguments. + #AC_C_INLINE Already checked in MySQL +diff -r 962aec0d731c innobase/ib_config.h +--- a/innobase/ib_config.h Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/ib_config.h Thu Oct 09 08:30:28 2008 -0700 +@@ -3,6 +3,9 @@ + + /* Define to 1 if you have the <aio.h> header file. */ + #define HAVE_AIO_H 1 ++ ++/* Define to 1 if compiler provides atomic builtins. */ ++#define HAVE_ATOMIC_BUILTINS 1 + + /* Define to 1 if you have the <dlfcn.h> header file. */ + #define HAVE_DLFCN_H 1 +diff -r 962aec0d731c innobase/ib_config.h.in +--- a/innobase/ib_config.h.in Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/ib_config.h.in Thu Oct 09 08:30:28 2008 -0700 +@@ -2,6 +2,9 @@ + + /* Define to 1 if you have the <aio.h> header file. */ + #undef HAVE_AIO_H ++ ++/* Define to 1 if compiler provides atomic builtins. */ ++#undef HAVE_ATOMIC_BUILTINS + + /* Define to 1 if you have the <dlfcn.h> header file. */ + #undef HAVE_DLFCN_H +diff -r 962aec0d731c innobase/include/sync0rw.h +--- a/innobase/include/sync0rw.h Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/include/sync0rw.h Thu Oct 09 08:30:28 2008 -0700 +@@ -325,7 +325,17 @@ + Accessor functions for rw lock. */ + UNIV_INLINE + ulint +-rw_lock_get_waiters( ++rw_lock_get_s_waiters( ++/*==================*/ ++ rw_lock_t* lock); ++UNIV_INLINE ++ulint ++rw_lock_get_x_waiters( ++/*==================*/ ++ rw_lock_t* lock); ++UNIV_INLINE ++ulint ++rw_lock_get_wx_waiters( + /*================*/ + rw_lock_t* lock); + UNIV_INLINE +@@ -408,6 +418,11 @@ + rw_lock_debug_t* info); /* in: debug struct */ + #endif /* UNIV_SYNC_DEBUG */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++/* This value means NOT_LOCKED */ ++#define RW_LOCK_BIAS 0x00100000 ++#endif ++ + /* NOTE! The structure appears here only for the compiler to know its size. + Do not use its fields directly! The structure used in the spin lock + implementation of a read-write lock. Several threads may have a shared lock +@@ -417,9 +432,9 @@ + field. Then no new readers are allowed in. */ + + struct rw_lock_struct { +- os_event_t event; /* Used by sync0arr.c for thread queueing */ +- +-#ifdef __WIN__ ++ /* Used by sync0arr.c for thread queueing */ ++ os_event_t s_event; /* Used for s_lock */ ++ os_event_t x_event; /* Used for x_lock */ + os_event_t wait_ex_event; /* This windows specific event is + used by the thread which has set the + lock state to RW_LOCK_WAIT_EX. The +@@ -427,31 +442,35 @@ + thread will be the next one to proceed + once the current the event gets + signalled. See LEMMA 2 in sync0sync.c */ ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ volatile lint lock_word; /* Used by using atomic builtin */ + #endif + +- ulint reader_count; /* Number of readers who have locked this ++ volatile ulint reader_count; /* Number of readers who have locked this + lock in the shared mode */ +- ulint writer; /* This field is set to RW_LOCK_EX if there ++ volatile ulint writer; /* This field is set to RW_LOCK_EX if there + is a writer owning the lock (in exclusive + mode), RW_LOCK_WAIT_EX if a writer is + queueing for the lock, and + RW_LOCK_NOT_LOCKED, otherwise. */ +- os_thread_id_t writer_thread; ++ volatile os_thread_id_t writer_thread; + /* Thread id of a possible writer thread */ +- ulint writer_count; /* Number of times the same thread has ++ volatile ulint writer_count; /* Number of times the same thread has + recursively locked the lock in the exclusive + mode */ ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_t mutex; /* The mutex protecting rw_lock_struct */ ++#endif + ulint pass; /* Default value 0. This is set to some + value != 0 given by the caller of an x-lock + operation, if the x-lock is to be passed to + another thread to unlock (which happens in + asynchronous i/o). */ +- ulint waiters; /* This ulint is set to 1 if there are +- waiters (readers or writers) in the global +- wait array, waiting for this rw_lock. +- Otherwise, == 0. */ +- ibool writer_is_wait_ex; ++ volatile ulint s_waiters; /* 1: there are waiters (s_lock) */ ++ volatile ulint x_waiters; /* 1: there are waiters (x_lock) */ ++ volatile ulint wait_ex_waiters; /* 1: there are waiters (wait_ex) */ ++ volatile ibool writer_is_wait_ex; + /* This is TRUE if the writer field is + RW_LOCK_WAIT_EX; this field is located far + from the memory update hotspot fields which +diff -r 962aec0d731c innobase/include/sync0rw.ic +--- a/innobase/include/sync0rw.ic Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/include/sync0rw.ic Thu Oct 09 08:30:28 2008 -0700 +@@ -47,20 +47,52 @@ + Accessor functions for rw lock. */ + UNIV_INLINE + ulint +-rw_lock_get_waiters( ++rw_lock_get_s_waiters( + /*================*/ + rw_lock_t* lock) + { +- return(lock->waiters); ++ return(lock->s_waiters); ++} ++UNIV_INLINE ++ulint ++rw_lock_get_x_waiters( ++/*================*/ ++ rw_lock_t* lock) ++{ ++ return(lock->x_waiters); ++} ++UNIV_INLINE ++ulint ++rw_lock_get_wx_waiters( ++/*================*/ ++ rw_lock_t* lock) ++{ ++ return(lock->wait_ex_waiters); + } + UNIV_INLINE + void +-rw_lock_set_waiters( +-/*================*/ ++rw_lock_set_s_waiters( + rw_lock_t* lock, + ulint flag) + { +- lock->waiters = flag; ++ lock->s_waiters = flag; ++} ++UNIV_INLINE ++void ++rw_lock_set_x_waiters( ++ rw_lock_t* lock, ++ ulint flag) ++{ ++ lock->x_waiters = flag; ++} ++UNIV_INLINE ++void ++rw_lock_set_wx_waiters( ++/*================*/ ++ rw_lock_t* lock, ++ ulint flag) ++{ ++ lock->wait_ex_waiters = flag; + } + UNIV_INLINE + ulint +@@ -68,7 +100,19 @@ + /*===============*/ + rw_lock_t* lock) + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (lock->writer == RW_LOCK_NOT_LOCKED) { ++ return(RW_LOCK_NOT_LOCKED); ++ } ++ ++ if (lock->writer_is_wait_ex) { ++ return(RW_LOCK_WAIT_EX); ++ } else { ++ return(RW_LOCK_EX); ++ } ++#else + return(lock->writer); ++#endif + } + UNIV_INLINE + void +@@ -96,6 +140,7 @@ + { + lock->reader_count = count; + } ++#ifndef HAVE_ATOMIC_BUILTINS + UNIV_INLINE + mutex_t* + rw_lock_get_mutex( +@@ -104,6 +149,7 @@ + { + return(&(lock->mutex)); + } ++#endif + + /********************************************************************** + Returns the value of writer_count for the lock. Does not reserve the lock +@@ -133,14 +179,26 @@ + const char* file_name, /* in: file name where lock requested */ + ulint line) /* in: line where requested */ + { +-#ifdef UNIV_SYNC_DEBUG ++#if defined(UNIV_SYNC_DEBUG) && !defined(HAVE_ATOMIC_BUILTINS) + ut_ad(mutex_own(rw_lock_get_mutex(lock))); + #endif /* UNIV_SYNC_DEBUG */ + /* Check if the writer field is free */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) { ++ /* try s-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) { ++ /* fail */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ return(FALSE); /* locking did not succeed */ ++ } ++ /* success */ ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) { + /* Set the shared lock by incrementing the reader count */ + lock->reader_count++; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, +@@ -167,11 +225,15 @@ + const char* file_name, /* in: file name where requested */ + ulint line) /* in: line where lock requested */ + { +- ut_ad(lock->writer == RW_LOCK_NOT_LOCKED); ++ ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); + ut_ad(rw_lock_get_reader_count(lock) == 0); + + /* Set the shared lock by incrementing the reader count */ ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + lock->reader_count++; ++#endif + + lock->last_s_file_name = file_name; + lock->last_s_line = line; +@@ -199,7 +261,11 @@ + + rw_lock_set_writer(lock, RW_LOCK_EX); + lock->writer_thread = os_thread_get_curr_id(); ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(lock->writer_count),1); ++#else + lock->writer_count++; ++#endif + lock->pass = 0; + + lock->last_x_file_name = file_name; +@@ -241,15 +307,21 @@ + ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */ + #endif /* UNIV_SYNC_DEBUG */ + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Success */ + } else { + /* Did not succeed, try spin wait */ ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + rw_lock_s_lock_spin(lock, pass, file_name, line); + +@@ -272,11 +344,23 @@ + { + ibool success = FALSE; + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { ++ /* try s-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) { ++ /* fail */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ return(FALSE); /* locking did not succeed */ ++ } ++ /* success */ ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + mutex_enter(rw_lock_get_mutex(lock)); + + if (lock->writer == RW_LOCK_NOT_LOCKED) { + /* Set the shared lock by incrementing the reader count */ + lock->reader_count++; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, +@@ -289,7 +373,9 @@ + success = TRUE; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return(success); + } +@@ -309,6 +395,55 @@ + { + ibool success = FALSE; + os_thread_id_t curr_thread = os_thread_get_curr_id(); ++#ifdef HAVE_ATOMIC_BUILTINS ++ if ((lock->lock_word == RW_LOCK_BIAS) ++ && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { ++ /* try x-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word), ++ RW_LOCK_BIAS) == 0) { ++ /* success */ ++ /* try to lock writer */ ++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX) ++ == RW_LOCK_NOT_LOCKED) { ++ /* success */ ++ lock->writer_thread = curr_thread; ++ lock->pass = 0; ++ lock->writer_is_wait_ex = FALSE; ++ /* next function may work as memory barrier */ ++ relock: ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ ut_ad(rw_lock_validate(lock)); ++ ++ return(TRUE); ++ } else { ++ /* x-unlock */ ++ __sync_fetch_and_add(&(lock->lock_word), ++ RW_LOCK_BIAS); ++ } ++ } else { ++ /* fail (x-lock) */ ++ __sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS); ++ } ++ } ++ ++ if (lock->pass == 0 ++ && os_thread_eq(lock->writer_thread, curr_thread) ++ && rw_lock_get_writer(lock) == RW_LOCK_EX) { ++ goto relock; ++ } ++ ++ ut_ad(rw_lock_validate(lock)); ++ ++ return(FALSE); ++#else + mutex_enter(rw_lock_get_mutex(lock)); + + if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) { +@@ -339,6 +474,7 @@ + ut_ad(rw_lock_validate(lock)); + + return(success); ++#endif + } + + /********************************************************************** +@@ -354,16 +490,33 @@ + #endif + ) + { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_t* mutex = &(lock->mutex); +- ibool sg = FALSE; ++#endif ++ ibool x_sg = FALSE; ++ ibool wx_sg = FALSE; ++#ifdef HAVE_ATOMIC_BUILTINS ++ ibool last = FALSE; ++#endif + ++#ifndef HAVE_ATOMIC_BUILTINS + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter(mutex); ++#endif + + /* Reset the shared lock by decrementing the reader count */ + + ut_a(lock->reader_count > 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* unlock lock_word */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ ++ if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) { ++ last = TRUE; ++ } ++#else + lock->reader_count--; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED); +@@ -372,20 +525,36 @@ + /* If there may be waiters and this was the last s-lock, + signal the object */ + +- if (UNIV_UNLIKELY(lock->waiters) ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (UNIV_UNLIKELY(last && lock->wait_ex_waiters)) { ++#else ++ if (UNIV_UNLIKELY(lock->wait_ex_waiters) + && lock->reader_count == 0) { +- sg = TRUE; ++#endif ++ wx_sg = TRUE; + +- rw_lock_set_waiters(lock, 0); ++ rw_lock_set_wx_waiters(lock, 0); ++ } ++#ifdef HAVE_ATOMIC_BUILTINS ++ else if (UNIV_UNLIKELY(last && lock->x_waiters)) { ++#else ++ else if (UNIV_UNLIKELY(lock->x_waiters) ++ && lock->reader_count == 0) { ++#endif ++ x_sg = TRUE; ++ ++ rw_lock_set_x_waiters(lock, 0); + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(mutex); ++#endif + +- if (UNIV_UNLIKELY(sg)) { +-#ifdef __WIN__ ++ if (UNIV_UNLIKELY(wx_sg)) { + os_event_set(lock->wait_ex_event); +-#endif +- os_event_set(lock->event); ++ sync_array_object_signalled(sync_primary_wait_array); ++ } else if (UNIV_UNLIKELY(x_sg)) { ++ os_event_set(lock->x_event); + sync_array_object_signalled(sync_primary_wait_array); + } + +@@ -409,13 +578,22 @@ + + ut_ad(lock->reader_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_sub_and_fetch(&(lock->reader_count),1); ++#else + lock->reader_count--; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED); + #endif + ++#ifdef HAVE_ATOMIC_BUILTINS ++ ut_ad(!lock->s_waiters); ++ ut_ad(!lock->x_waiters); ++#else + ut_ad(!lock->waiters); ++#endif + ut_ad(rw_lock_validate(lock)); + #ifdef UNIV_SYNC_PERF_STAT + rw_s_exit_count++; +@@ -435,41 +613,81 @@ + #endif + ) + { +- ibool sg = FALSE; ++#ifdef HAVE_ATOMIC_BUILTINS ++ ibool last = FALSE; ++#endif ++ ibool s_sg = FALSE; ++ ibool x_sg = FALSE; + ++#ifndef HAVE_ATOMIC_BUILTINS + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter(&(lock->mutex)); ++#endif + + /* Reset the exclusive lock if this thread no longer has an x-mode + lock */ + + ut_ad(lock->writer_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) { ++ last = TRUE; ++ } ++ ++ if (last) { ++ /* unlock lock_word */ ++ __sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS); ++ ++ /* FIXME: It is a value of bad manners for pthread. ++ But we shouldn't keep an ID of not-owner. */ ++ lock->writer_thread = -1; ++ ++ /* atomic operation may be safer about memory order. */ ++ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); ++ __sync_synchronize(); ++ } ++#else + lock->writer_count--; + + if (lock->writer_count == 0) { + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + } ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX); + #endif + + /* If there may be waiters, signal the lock */ +- if (UNIV_UNLIKELY(lock->waiters) +- && lock->writer_count == 0) { +- +- sg = TRUE; +- rw_lock_set_waiters(lock, 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (last) { ++#else ++ if (lock->writer_count == 0) { ++#endif ++ if(lock->s_waiters){ ++ s_sg = TRUE; ++ rw_lock_set_s_waiters(lock, 0); ++ } ++ if(lock->x_waiters){ ++ x_sg = TRUE; ++ rw_lock_set_x_waiters(lock, 0); ++ } + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + +- if (UNIV_UNLIKELY(sg)) { ++ if (UNIV_UNLIKELY(s_sg)) { ++ os_event_set(lock->s_event); ++ sync_array_object_signalled(sync_primary_wait_array); ++ } ++ if (UNIV_UNLIKELY(x_sg)) { + #ifdef __WIN__ ++ /* I doubt the necessity of it. */ + os_event_set(lock->wait_ex_event); + #endif +- os_event_set(lock->event); ++ os_event_set(lock->x_event); + sync_array_object_signalled(sync_primary_wait_array); + } + +@@ -494,9 +712,13 @@ + + ut_ad(lock->writer_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) { ++#else + lock->writer_count--; + + if (lock->writer_count == 0) { ++#endif + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + } + +@@ -504,7 +726,12 @@ + rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX); + #endif + ++#ifdef HAVE_ATOMIC_BUILTINS ++ ut_ad(!lock->s_waiters); ++ ut_ad(!lock->x_waiters); ++#else + ut_ad(!lock->waiters); ++#endif + ut_ad(rw_lock_validate(lock)); + + #ifdef UNIV_SYNC_PERF_STAT +diff -r 962aec0d731c innobase/sync/sync0arr.c +--- a/innobase/sync/sync0arr.c Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/sync/sync0arr.c Thu Oct 09 08:30:28 2008 -0700 +@@ -309,13 +309,13 @@ + { + if (type == SYNC_MUTEX) { + return(os_event_reset(((mutex_t *) object)->event)); +-#ifdef __WIN__ + } else if (type == RW_LOCK_WAIT_EX) { + return(os_event_reset( + ((rw_lock_t *) object)->wait_ex_event)); +-#endif +- } else { +- return(os_event_reset(((rw_lock_t *) object)->event)); ++ } else if (type == RW_LOCK_SHARED) { ++ return(os_event_reset(((rw_lock_t *) object)->s_event)); ++ } else { /* RW_LOCK_EX */ ++ return(os_event_reset(((rw_lock_t *) object)->x_event)); + } + } + +@@ -415,15 +415,12 @@ + + if (cell->request_type == SYNC_MUTEX) { + event = ((mutex_t*) cell->wait_object)->event; +-#ifdef __WIN__ +- /* On windows if the thread about to wait is the one which +- has set the state of the rw_lock to RW_LOCK_WAIT_EX, then +- it waits on a special event i.e.: wait_ex_event. */ + } else if (cell->request_type == RW_LOCK_WAIT_EX) { + event = ((rw_lock_t*) cell->wait_object)->wait_ex_event; +-#endif +- } else { +- event = ((rw_lock_t*) cell->wait_object)->event; ++ } else if (cell->request_type == RW_LOCK_SHARED) { ++ event = ((rw_lock_t*) cell->wait_object)->s_event; ++ } else { ++ event = ((rw_lock_t*) cell->wait_object)->x_event; + } + + cell->waiting = TRUE; +@@ -464,6 +461,7 @@ + mutex_t* mutex; + rw_lock_t* rwlock; + ulint type; ++ ulint writer; + + type = cell->request_type; + +@@ -492,12 +490,10 @@ + (ulong) mutex->waiters); + + } else if (type == RW_LOCK_EX +-#ifdef __WIN__ + || type == RW_LOCK_WAIT_EX +-#endif + || type == RW_LOCK_SHARED) { + +- fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file); ++ fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file); + + rwlock = cell->old_wait_rw_lock; + +@@ -505,21 +501,23 @@ + " RW-latch at %p created in file %s line %lu\n", + rwlock, rwlock->cfile_name, + (ulong) rwlock->cline); +- if (rwlock->writer != RW_LOCK_NOT_LOCKED) { ++ writer = rw_lock_get_writer(rwlock); ++ if (writer != RW_LOCK_NOT_LOCKED) { + fprintf(file, + "a writer (thread id %lu) has reserved it in mode %s", + (ulong) os_thread_pf(rwlock->writer_thread), +- rwlock->writer == RW_LOCK_EX ++ writer == RW_LOCK_EX + ? " exclusive\n" + : " wait exclusive\n"); + } + + fprintf(file, +- "number of readers %lu, waiters flag %lu\n" ++ "number of readers %lu, s_waiters flag %lu, x_waiters flag %lu\n" + "Last time read locked in file %s line %lu\n" + "Last time write locked in file %s line %lu\n", + (ulong) rwlock->reader_count, +- (ulong) rwlock->waiters, ++ (ulong) rwlock->s_waiters, ++ (ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters), + rwlock->last_s_file_name, + (ulong) rwlock->last_s_line, + rwlock->last_x_file_name, +@@ -839,11 +837,15 @@ + /*========================*/ + sync_array_t* arr) /* in: wait array */ + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(arr->sg_count),1); ++#else + sync_array_enter(arr); + + arr->sg_count++; + + sync_array_exit(arr); ++#endif + } + + /************************************************************************** +@@ -880,19 +882,23 @@ + + mutex = cell->wait_object; + os_event_set(mutex->event); +-#ifdef __WIN__ + } else if (cell->request_type + == RW_LOCK_WAIT_EX) { + rw_lock_t* lock; + + lock = cell->wait_object; + os_event_set(lock->wait_ex_event); +-#endif +- } else { ++ } else if (cell->request_type ++ == RW_LOCK_SHARED) { + rw_lock_t* lock; + + lock = cell->wait_object; +- os_event_set(lock->event); ++ os_event_set(lock->s_event); ++ } else { ++ rw_lock_t* lock; ++ ++ lock = cell->wait_object; ++ os_event_set(lock->x_event); + } + } + } +diff -r 962aec0d731c innobase/sync/sync0rw.c +--- a/innobase/sync/sync0rw.c Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/sync/sync0rw.c Thu Oct 09 08:30:28 2008 -0700 +@@ -99,6 +99,7 @@ + object is created, then the following call initializes + the sync system. */ + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_create(rw_lock_get_mutex(lock)); + mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK); + +@@ -108,8 +109,14 @@ + lock->mutex.cmutex_name = cmutex_name; + lock->mutex.mutex_type = 1; + #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ ++#endif /* !HAVE_ATOMIC_BUILTINS */ + +- rw_lock_set_waiters(lock, 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ lock->lock_word = RW_LOCK_BIAS; ++#endif ++ rw_lock_set_s_waiters(lock, 0); ++ rw_lock_set_x_waiters(lock, 0); ++ rw_lock_set_wx_waiters(lock, 0); + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + lock->writer_count = 0; + rw_lock_set_reader_count(lock, 0); +@@ -130,11 +137,9 @@ + lock->last_x_file_name = "not yet reserved"; + lock->last_s_line = 0; + lock->last_x_line = 0; +- lock->event = os_event_create(NULL); +- +-#ifdef __WIN__ ++ lock->s_event = os_event_create(NULL); ++ lock->x_event = os_event_create(NULL); + lock->wait_ex_event = os_event_create(NULL); +-#endif + + mutex_enter(&rw_lock_list_mutex); + +@@ -162,19 +167,21 @@ + ut_a(rw_lock_validate(lock)); + #endif /* UNIV_DEBUG */ + ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); +- ut_a(rw_lock_get_waiters(lock) == 0); ++ ut_a(rw_lock_get_s_waiters(lock) == 0); ++ ut_a(rw_lock_get_x_waiters(lock) == 0); ++ ut_a(rw_lock_get_wx_waiters(lock) == 0); + ut_a(rw_lock_get_reader_count(lock) == 0); + + lock->magic_n = 0; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_free(rw_lock_get_mutex(lock)); ++#endif + + mutex_enter(&rw_lock_list_mutex); +- os_event_free(lock->event); +- +-#ifdef __WIN__ ++ os_event_free(lock->s_event); ++ os_event_free(lock->x_event); + os_event_free(lock->wait_ex_event); +-#endif + + if (UT_LIST_GET_PREV(list, lock)) { + ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); +@@ -192,6 +199,8 @@ + Checks that the rw-lock has been initialized and that there are no + simultaneous shared and exclusive locks. */ + ++/* MEMO: If HAVE_ATOMIC_BUILTINS, we should use this function statically. */ ++ + ibool + rw_lock_validate( + /*=============*/ +@@ -199,7 +208,9 @@ + { + ut_a(lock); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + ut_a(lock->magic_n == RW_LOCK_MAGIC_N); + ut_a((rw_lock_get_reader_count(lock) == 0) +@@ -207,11 +218,17 @@ + ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX) + || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX) + || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)); +- ut_a((rw_lock_get_waiters(lock) == 0) +- || (rw_lock_get_waiters(lock) == 1)); ++ ut_a((rw_lock_get_s_waiters(lock) == 0) ++ || (rw_lock_get_s_waiters(lock) == 1)); ++ ut_a((rw_lock_get_x_waiters(lock) == 0) ++ || (rw_lock_get_x_waiters(lock) == 1)); ++ ut_a((rw_lock_get_wx_waiters(lock) == 0) ++ || (rw_lock_get_wx_waiters(lock) == 1)); + ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return(TRUE); + } +@@ -237,13 +254,14 @@ + ut_ad(rw_lock_validate(lock)); + + lock_loop: ++ i = 0; ++spin_loop: + rw_s_spin_wait_count++; + + /* Spin waiting for the writer field to become free */ +- i = 0; + +- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED +- && i < SYNC_SPIN_ROUNDS) { ++ while (i < SYNC_SPIN_ROUNDS ++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); + } +@@ -262,15 +280,27 @@ + lock->cfile_name, (ulong) lock->cline, (ulong) i); + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + /* We try once again to obtain the lock */ + + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Success */ + } else { ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ i++; ++ ++ if (i < SYNC_SPIN_ROUNDS) { ++ goto spin_loop; ++ } ++#endif + /* If we get here, locking did not succeed, we may + suspend the thread to wait in the wait array */ + +@@ -281,9 +311,19 @@ + file_name, line, + &index); + +- rw_lock_set_waiters(lock, 1); ++ rw_lock_set_s_waiters(lock, 1); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ for (i = 0; i < 4; i++) { ++ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { ++ sync_array_free_cell(sync_primary_wait_array, index); ++ return; /* Success */ ++ } ++ } ++#else + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + if (srv_print_latch_waits) { + fprintf(stderr, +@@ -318,13 +358,19 @@ + { + ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + lock->writer_thread = os_thread_get_curr_id(); + + lock->pass = 0; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#else ++ __sync_synchronize(); ++#endif + } + + /********************************************************************** +@@ -342,6 +388,89 @@ + const char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ os_thread_id_t curr_thread = os_thread_get_curr_id(); ++ ++ /* try to lock writer */ ++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX) ++ == RW_LOCK_NOT_LOCKED) { ++ /* success */ ++ /* obtain RW_LOCK_WAIT_EX right */ ++ lock->writer_thread = curr_thread; ++ lock->pass = pass; ++ lock->writer_is_wait_ex = TRUE; ++ /* atomic operation may be safer about memory order. */ ++ __sync_synchronize(); ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX, ++ file_name, line); ++#endif ++ } ++ ++ if (!os_thread_eq(lock->writer_thread, curr_thread)) { ++ return(RW_LOCK_NOT_LOCKED); ++ } ++ ++ switch(rw_lock_get_writer(lock)) { ++ case RW_LOCK_WAIT_EX: ++ /* have right to try x-lock */ ++ if (lock->lock_word == RW_LOCK_BIAS) { ++ /* try x-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word), ++ RW_LOCK_BIAS) == 0) { ++ /* success */ ++ lock->pass = pass; ++ lock->writer_is_wait_ex = FALSE; ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX); ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, ++ file_name, line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ /* Locking succeeded, we may return */ ++ return(RW_LOCK_EX); ++ } else { ++ /* fail */ ++ __sync_fetch_and_add(&(lock->lock_word), ++ RW_LOCK_BIAS); ++ } ++ } ++ /* There are readers, we have to wait */ ++ return(RW_LOCK_WAIT_EX); ++ ++ break; ++ ++ case RW_LOCK_EX: ++ /* already have x-lock */ ++ if ((lock->pass == 0)&&(pass == 0)) { ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, ++ line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ /* Locking succeeded, we may return */ ++ return(RW_LOCK_EX); ++ } ++ ++ return(RW_LOCK_NOT_LOCKED); ++ ++ break; ++ ++ default: /* ??? */ ++ return(RW_LOCK_NOT_LOCKED); ++ } ++#else /* HAVE_ATOMIC_BUILTINS */ ++ + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(rw_lock_get_mutex(lock))); + #endif /* UNIV_SYNC_DEBUG */ +@@ -423,6 +552,7 @@ + /* Locking succeeded, we may return */ + return(RW_LOCK_EX); + } ++#endif /* HAVE_ATOMIC_BUILTINS */ + + /* Locking did not succeed */ + return(RW_LOCK_NOT_LOCKED); +@@ -448,19 +578,33 @@ + ulint line) /* in: line where requested */ + { + ulint index; /* index of the reserved wait cell */ +- ulint state; /* lock state acquired */ ++ ulint state = RW_LOCK_NOT_LOCKED; /* lock state acquired */ ++#ifdef HAVE_ATOMIC_BUILTINS ++ ulint prev_state = RW_LOCK_NOT_LOCKED; ++#endif + ulint i; /* spin round count */ + + ut_ad(rw_lock_validate(lock)); + + lock_loop: ++ i = 0; ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ prev_state = state; ++#else + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter_fast(&(lock->mutex)); ++#endif + + state = rw_lock_x_lock_low(lock, pass, file_name, line); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (state != prev_state) i=0; /* if progress, reset counter. */ ++#else + mutex_exit(&(lock->mutex)); ++#endif + ++spin_loop: + if (state == RW_LOCK_EX) { + + return; /* Locking succeeded */ +@@ -468,10 +612,9 @@ + } else if (state == RW_LOCK_NOT_LOCKED) { + + /* Spin waiting for the writer field to become free */ +- i = 0; + +- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED +- && i < SYNC_SPIN_ROUNDS) { ++ while (i < SYNC_SPIN_ROUNDS ++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, + srv_spin_wait_delay)); +@@ -485,9 +628,12 @@ + } else if (state == RW_LOCK_WAIT_EX) { + + /* Spin waiting for the reader count field to become zero */ +- i = 0; + ++#ifdef HAVE_ATOMIC_BUILTINS ++ while (lock->lock_word != RW_LOCK_BIAS ++#else + while (rw_lock_get_reader_count(lock) != 0 ++#endif + && i < SYNC_SPIN_ROUNDS) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, +@@ -500,7 +646,6 @@ + os_thread_yield(); + } + } else { +- i = 0; /* Eliminate a compiler warning */ + ut_error; + } + +@@ -516,34 +661,69 @@ + /* We try once again to obtain the lock. Acquire the mutex protecting + the rw-lock fields */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++ prev_state = state; ++#else + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + state = rw_lock_x_lock_low(lock, pass, file_name, line); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (state != prev_state) i=0; /* if progress, reset counter. */ ++#endif ++ + if (state == RW_LOCK_EX) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Locking succeeded */ + } ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ i++; ++ ++ if (i < SYNC_SPIN_ROUNDS) { ++ goto spin_loop; ++ } ++#endif + + rw_x_system_call_count++; + + sync_array_reserve_cell(sync_primary_wait_array, + lock, +-#ifdef __WIN__ +- /* On windows RW_LOCK_WAIT_EX signifies +- that this thread should wait on the +- special wait_ex_event. */ + (state == RW_LOCK_WAIT_EX) + ? RW_LOCK_WAIT_EX : +-#endif + RW_LOCK_EX, + file_name, line, + &index); + +- rw_lock_set_waiters(lock, 1); ++ if (state == RW_LOCK_WAIT_EX) { ++ rw_lock_set_wx_waiters(lock, 1); ++ } else { ++ rw_lock_set_x_waiters(lock, 1); ++ } + ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ for (i = 0; i < 4; i++) { ++ prev_state = state; ++ state = rw_lock_x_lock_low(lock, pass, file_name, line); ++ if (state == RW_LOCK_EX) { ++ sync_array_free_cell(sync_primary_wait_array, index); ++ return; /* Locking succeeded */ ++ } ++ if (state != prev_state) { ++ /* retry! */ ++ sync_array_free_cell(sync_primary_wait_array, index); ++ goto lock_loop; ++ } ++ } ++#else + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + if (srv_print_latch_waits) { + fprintf(stderr, +@@ -718,7 +898,9 @@ + ut_ad(lock); + ut_ad(rw_lock_validate(lock)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + info = UT_LIST_GET_FIRST(lock->debug_list); + +@@ -728,7 +910,9 @@ + && (info->pass == 0) + && (info->lock_type == lock_type)) { + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + /* Found! */ + + return(TRUE); +@@ -736,7 +920,9 @@ + + info = UT_LIST_GET_NEXT(list, info); + } ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + + return(FALSE); + } +@@ -758,21 +944,25 @@ + ut_ad(lock); + ut_ad(rw_lock_validate(lock)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + if (lock_type == RW_LOCK_SHARED) { + if (lock->reader_count > 0) { + ret = TRUE; + } + } else if (lock_type == RW_LOCK_EX) { +- if (lock->writer == RW_LOCK_EX) { ++ if (rw_lock_get_writer(lock) == RW_LOCK_EX) { + ret = TRUE; + } + } else { + ut_error; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + + return(ret); + } +@@ -801,16 +991,26 @@ + + count++; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0) +- || (rw_lock_get_waiters(lock) != 0)) { ++ || (rw_lock_get_s_waiters(lock) != 0) ++ || (rw_lock_get_x_waiters(lock) != 0) ++ || (rw_lock_get_wx_waiters(lock) != 0)) { + + fprintf(stderr, "RW-LOCK: %p ", lock); + +- if (rw_lock_get_waiters(lock)) { +- fputs(" Waiters for the lock exist\n", stderr); ++ if (rw_lock_get_s_waiters(lock)) { ++ fputs(" s_waiters for the lock exist,", stderr); ++ } ++ if (rw_lock_get_x_waiters(lock)) { ++ fputs(" x_waiters for the lock exist\n", stderr); ++ } ++ if (rw_lock_get_wx_waiters(lock)) { ++ fputs(" wait_ex_waiters for the lock exist\n", stderr); + } else { + putc('\n', stderr); + } +@@ -822,7 +1022,9 @@ + } + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + lock = UT_LIST_GET_NEXT(list, lock); + } + +@@ -847,10 +1049,18 @@ + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0) +- || (rw_lock_get_waiters(lock) != 0)) { ++ || (rw_lock_get_s_waiters(lock) != 0) ++ || (rw_lock_get_x_waiters(lock) != 0) ++ || (rw_lock_get_wx_waiters(lock) != 0)) { + +- if (rw_lock_get_waiters(lock)) { +- fputs(" Waiters for the lock exist\n", stderr); ++ if (rw_lock_get_s_waiters(lock)) { ++ fputs(" s_waiters for the lock exist,", stderr); ++ } ++ if (rw_lock_get_x_waiters(lock)) { ++ fputs(" x_waiters for the lock exist\n", stderr); ++ } ++ if (rw_lock_get_wx_waiters(lock)) { ++ fputs(" wait_ex_waiters for the lock exist\n", stderr); + } else { + putc('\n', stderr); + } +@@ -909,14 +1119,18 @@ + lock = UT_LIST_GET_FIRST(rw_lock_list); + + while (lock != NULL) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0)) { + count++; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + lock = UT_LIST_GET_NEXT(list, lock); + } + +diff -r 962aec0d731c patch_info/innodb_rw_lock.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_rw_lock.info Thu Oct 09 08:30:28 2008 -0700 +@@ -0,0 +1,6 @@ ++File=innodb_rw_lock.patch ++Name=Fix of InnoDB rw_locks ++Version=1.0 ++Author=Yasufumi Kinoshita ++License=BSD ++Comment= diff --git a/percona/5.0.75-b12/innodb_show_bp.patch b/percona/5.0.75-b12/innodb_show_bp.patch new file mode 100644 index 0000000..a56ae9a --- /dev/null +++ b/percona/5.0.75-b12/innodb_show_bp.patch @@ -0,0 +1,447 @@ +diff -r fe944d2c6e1f innobase/btr/btr0btr.c +--- a/innobase/btr/btr0btr.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/btr/btr0btr.c Mon Nov 10 19:48:24 2008 -0800 +@@ -2989,3 +2989,11 @@ + + return(TRUE); + } ++ ++dulint ++btr_page_get_index_id_noninline( ++/*============*/ ++ page_t* page) /* in: index page */ ++{ ++ return btr_page_get_index_id(page); ++} +diff -r fe944d2c6e1f innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/buf/buf0buf.c Mon Nov 10 19:48:24 2008 -0800 +@@ -2629,3 +2629,13 @@ + buf_block_print(block); + } + ++buf_block_t* ++buf_pool_get_nth_block_no_inline( ++/*===================*/ ++ /* out: pointer to block */ ++ buf_pool_t* buf_pool,/* in: buf_pool */ ++ ulint i) /* in: index of the block */{ ++ ++return buf_pool_get_nth_block(buf_pool, i); ++ ++} +diff -r fe944d2c6e1f innobase/include/btr0btr.h +--- a/innobase/include/btr0btr.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/btr0btr.h Mon Nov 10 19:48:24 2008 -0800 +@@ -69,6 +69,12 @@ + UNIV_INLINE + dulint + btr_page_get_index_id( ++/*==================*/ ++ /* out: index id */ ++ page_t* page); /* in: index page */ ++ ++dulint ++btr_page_get_index_id_noninline( + /*==================*/ + /* out: index id */ + page_t* page); /* in: index page */ +diff -r fe944d2c6e1f innobase/include/buf0buf.h +--- a/innobase/include/buf0buf.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/buf0buf.h Mon Nov 10 19:48:24 2008 -0800 +@@ -703,6 +703,8 @@ + buf_get_free_list_len(void); + /*=======================*/ + ++void buf_pool_dump(void); ++buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i); + + + /* The buffer control block structure */ +diff -r fe944d2c6e1f innobase/include/page0page.h +--- a/innobase/include/page0page.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/page0page.h Mon Nov 10 19:48:24 2008 -0800 +@@ -260,6 +260,12 @@ + /*============*/ + /* out: number of user records */ + page_t* page); /* in: index page */ ++ ++ulint ++page_get_n_recs_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page); /* in: index page */ + /******************************************************************* + Returns the number of records before the given record in chain. + The number includes infimum and supremum records. */ +@@ -519,6 +525,12 @@ + UNIV_INLINE + ulint + page_get_data_size( ++/*===============*/ ++ /* out: data in bytes */ ++ page_t* page); /* in: index page */ ++ ++ulint ++page_get_data_size_noninline( + /*===============*/ + /* out: data in bytes */ + page_t* page); /* in: index page */ +diff -r fe944d2c6e1f innobase/page/page0page.c +--- a/innobase/page/page0page.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/page/page0page.c Mon Nov 10 19:48:24 2008 -0800 +@@ -1994,3 +1994,25 @@ + page_cur_move_to_next(&cur); + } + } ++ ++ulint ++page_get_n_recs_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page) /* in: index page */ ++{ ++ return page_get_n_recs(page); ++} ++ ++ ++ulint ++page_get_data_size_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page) /* in: index page */ ++{ ++ return page_get_data_size(page); ++} ++ ++ ++ +diff -r fe944d2c6e1f mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/information_schema.result Mon Nov 10 19:48:25 2008 -0800 +@@ -42,6 +42,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INNODB_BUFFER_POOL_CONTENT + INDEX_STATISTICS + KEY_COLUMN_USAGE + PROCESSLIST +@@ -741,7 +742,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-107 ++108 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -802,6 +803,7 @@ + TABLE_NAME COLUMN_NAME PRIVILEGES + COLUMNS TABLE_NAME select + COLUMN_PRIVILEGES TABLE_NAME select ++INNODB_BUFFER_POOL_CONTENT TABLE_NAME select + INDEX_STATISTICS TABLE_NAME select + KEY_COLUMN_USAGE TABLE_NAME select + STATISTICS TABLE_NAME select +@@ -815,7 +817,7 @@ + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 22 ++information_schema 23 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1206,6 +1208,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INNODB_BUFFER_POOL_CONTENT TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROCESSLIST ID +@@ -1243,6 +1246,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INNODB_BUFFER_POOL_CONTENT TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROCESSLIST ID +@@ -1332,6 +1336,7 @@ + COLUMNS information_schema.COLUMNS 1 + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 ++INNODB_BUFFER_POOL_CONTENT information_schema.INNODB_BUFFER_POOL_CONTENT 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 + PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 +diff -r fe944d2c6e1f mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Mon Nov 10 19:48:25 2008 -0800 +@@ -11,6 +11,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INNODB_BUFFER_POOL_CONTENT + INDEX_STATISTICS + KEY_COLUMN_USAGE + PROCESSLIST +diff -r fe944d2c6e1f mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Mon Nov 10 19:48:25 2008 -0800 +@@ -85,6 +85,7 @@ + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INNODB_BUFFER_POOL_CONTENT | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROCESSLIST | +@@ -112,6 +113,7 @@ + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INNODB_BUFFER_POOL_CONTENT | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROCESSLIST | +diff -r fe944d2c6e1f patch_info/innodb_show_bp.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_show_bp.info Mon Nov 10 19:48:25 2008 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_show_bp.patch ++Name=show innodb buffer pool content ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment= +diff -r fe944d2c6e1f sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -128,10 +128,12 @@ + #include "../innobase/include/lock0lock.h" + #include "../innobase/include/dict0crea.h" + #include "../innobase/include/btr0cur.h" ++#include "../innobase/include/buf0buf.h" + #include "../innobase/include/btr0btr.h" + #include "../innobase/include/fsp0fsp.h" + #include "../innobase/include/sync0sync.h" + #include "../innobase/include/fil0fil.h" ++#include "../innobase/include/page0page.h" + #include "../innobase/include/trx0xa.h" + } + +@@ -6483,6 +6485,116 @@ + DBUG_RETURN(FALSE); + } + ++bool ++innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables) ++{ ++ ulint size; ++ ulint i; ++ dulint id; ++ ulint n_found; ++ buf_frame_t* frame; ++ dict_index_t* index; ++ buf_block_t* block; ++ ++ char *p; ++ char db_name_raw[NAME_LEN*5+1]; ++ char table_name_raw[NAME_LEN*5+1]; ++ ++ DBUG_ENTER("innodb_I_S_buffer_pool_content"); ++ ++ ++ size = buf_pool->curr_size; ++ ++ n_found = 0; ++ ++ TABLE *table= tables->table; ++ ++ ++ //buf_pool_dump(); ++ ++ ++ for (i = 0; i < size; i++) { ++ block = buf_pool_get_nth_block_no_inline(buf_pool, i); ++ frame = block->frame; ++ if (fil_page_get_type(frame)==0) continue; ++ ++ char page_type[64]; ++ ++ switch(fil_page_get_type(frame)) ++ { ++ case FIL_PAGE_INDEX: ++ strcpy(page_type, "index"); ++ break; ++ case FIL_PAGE_UNDO_LOG: ++ strcpy(page_type, "undo_log"); ++ break; ++ case FIL_PAGE_INODE: ++ strcpy(page_type, "inode"); ++ break; ++ case FIL_PAGE_IBUF_FREE_LIST: ++ strcpy(page_type, "ibuf_free_list"); ++ break; ++ default: ++ sprintf(page_type, "unknown", fil_page_get_type(frame)); ++ } ++ ++ table->field[0]->store((longlong)i, TRUE); ++ table->field[1]->store((longlong)block->space, TRUE); ++ table->field[2]->store((longlong)block->offset, TRUE); ++ table->field[3]->store((longlong)page_get_n_recs_noninline(block->frame), TRUE); ++ table->field[4]->store( ( fil_page_get_type(frame) == FIL_PAGE_INDEX ) ? (longlong)page_get_data_size_noninline(block->frame):0, TRUE); ++ table->field[5]->store((longlong)block->flush_type, TRUE); ++ table->field[6]->store((longlong)block->buf_fix_count, TRUE); ++ table->field[7]->store((longlong)block->LRU_position, TRUE); ++ table->field[8]->store((longlong)fil_page_get_type(frame), TRUE); ++ ++ table->field[9]->store(page_type, strlen(page_type), system_charset_info); ++ ++ //fprintf(stderr, "block N %d, space %d, offset %d, records %d, datasize %d, page_type %s, flush_type %d, buf_fix_count %d, LRU_position %d", i, block->space, block->offset, page_get_n_recs_noninline(block->frame), page_get_data_size_noninline(block->frame), page_type,block->flush_type, block->buf_fix_count, block->LRU_position); ++ ++ // flush_type, buf_fix_count, LRU_position ++ ++ if (fil_page_get_type(frame) == FIL_PAGE_INDEX) { ++ ++ id = btr_page_get_index_id_noninline(frame); ++ index = dict_index_get_if_in_cache(id); ++ if (index) { ++ table->field[10]->store(index->name, strlen(index->name), system_charset_info); ++ // fprintf(stderr, " index %s, table %s", index->name, index->table_name); ++ ++ if((p = strchr(index->table_name, '/'))) ++ { ++ strncpy(db_name_raw, index->table_name, p-index->table_name); ++ db_name_raw[p-index->table_name] = 0; ++ table->field[11]->store(db_name_raw, strlen(db_name_raw), system_charset_info); ++ p++; ++ } else { ++ table->field[11]->store(NULL, 0, system_charset_info); ++ p = (char *)index->table_name; ++ } ++ strcpy(table_name_raw, p); ++ ++ table->field[12]->store(table_name_raw, strlen(table_name_raw), system_charset_info); ++ } else { ++ table->field[10]->store(NULL, 0, system_charset_info); ++ table->field[11]->store(NULL, 0, system_charset_info); ++ table->field[12]->store(NULL, 0, system_charset_info); ++ } ++ }else{ ++ table->field[10]->store(NULL, 0, system_charset_info); ++ table->field[11]->store(NULL, 0, system_charset_info); ++ table->field[12]->store(NULL, 0, system_charset_info); ++ } ++ //fprintf(stderr, "\n"); ++ if (schema_table_store_record(thd, table)) ++ { ++ DBUG_RETURN(1); ++ } ++ } ++ ++ DBUG_RETURN(0); ++} ++ + /**************************************************************************** + Implements the SHOW MUTEX STATUS command. . */ + +diff -r fe944d2c6e1f sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/ha_innodb.h Mon Nov 10 19:48:25 2008 -0800 +@@ -263,6 +263,7 @@ + + int innobase_drop_database(char *path); + bool innodb_show_status(THD* thd); ++bool innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables); + bool innodb_mutex_show_status(THD* thd); + void innodb_export_status(void); + +diff -r fe944d2c6e1f sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/sql_parse.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -2926,6 +2926,7 @@ + case SCH_COLUMN_PRIVILEGES: + case SCH_TABLE_CONSTRAINTS: + case SCH_KEY_COLUMN_USAGE: ++ case SCH_INNODB_I_S_BUFFER_POOL_CONTENT: + default: + break; + } +diff -r fe944d2c6e1f sql/sql_show.cc +--- a/sql/sql_show.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/sql_show.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -27,6 +27,10 @@ + + #ifdef HAVE_BERKELEY_DB + #include "ha_berkeley.h" // For berkeley_show_logs ++#endif ++ ++#ifdef HAVE_INNOBASE_DB ++#include "ha_innodb.h" + #endif + + #ifndef NO_EMBEDDED_ACCESS_CHECKS +@@ -4042,6 +4046,13 @@ + DBUG_RETURN(res); + } + ++int fill_innodb_bp_content(THD *thd, TABLE_LIST *tables, COND *cond) ++{ ++ DBUG_ENTER("fill_innodb_bp_content"); ++ int res= 0; ++ innodb_I_S_buffer_pool_content(thd, tables); ++ DBUG_RETURN(res); ++} + + /* + Find schema_tables elment by name +@@ -4951,6 +4962,24 @@ + }; + + ++ST_FIELD_INFO innodb_bp_content_fields_info[]= ++{ ++ {"BLOCK_NUM", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Block_num"}, ++ {"SPACE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Space"}, ++ {"OFFSET", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Offset"}, ++ {"RECORDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Records"}, ++ {"DATASIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Datasize"}, ++ {"FLUSH_TYPE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Flush_type"}, ++ {"FIX_COUNT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Fix_count"}, ++ {"LRU_POSITION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "LRU_position"}, ++ {"PAGE_TYPE_ID", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Page_type_id"}, ++ {"PAGE_TYPE", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Page_type"}, ++ {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name"}, ++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schem"}, ++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ + /* + Description of ST_FIELD_INFO in table.h + */ +@@ -4969,6 +4998,8 @@ + get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0}, + {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table, + fill_schema_column_privileges, 0, 0, -1, -1, 0}, ++ {"INNODB_BUFFER_POOL_CONTENT", innodb_bp_content_fields_info, create_schema_table, ++ fill_innodb_bp_content, 0, 0, -1, -1, 0}, + {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table, + fill_schema_index_stats, make_old_format, 0, -1, -1, 0}, + {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table, +diff -r fe944d2c6e1f sql/table.h +--- a/sql/table.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/table.h Mon Nov 10 19:48:25 2008 -0800 +@@ -375,6 +375,7 @@ + SCH_COLLATION_CHARACTER_SET_APPLICABILITY, + SCH_COLUMNS, + SCH_COLUMN_PRIVILEGES, ++ SCH_INNODB_I_S_BUFFER_POOL_CONTENT, + SCH_INDEX_STATS, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, diff --git a/percona/5.0.75-b12/innodb_show_hashed_memory.patch b/percona/5.0.75-b12/innodb_show_hashed_memory.patch new file mode 100644 index 0000000..191193e --- /dev/null +++ b/percona/5.0.75-b12/innodb_show_hashed_memory.patch @@ -0,0 +1,275 @@ +diff -ruN mysql-5.0.67_highperf/innobase/buf/buf0buf.c mysql-5.0.67_highperf_tmp/innobase/buf/buf0buf.c +--- mysql-5.0.67_highperf/innobase/buf/buf0buf.c 2008-11-12 09:25:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/buf/buf0buf.c 2008-11-12 09:27:52.000000000 +0900 +@@ -2454,13 +2454,15 @@ + (ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped)); + } + fprintf(file, +- "Buffer pool size %lu\n" +- "Free buffers %lu\n" +- "Database pages %lu\n" +- "Modified db pages %lu\n" ++ "Buffer pool size %lu\n" ++ "Buffer pool size, bytes %lu\n" ++ "Free buffers %lu\n" ++ "Database pages %lu\n" ++ "Modified db pages %lu\n" + "Pending reads %lu\n" + "Pending writes: LRU %lu, flush list %lu, single page %lu\n", + (ulong) size, ++ (ulong) size * UNIV_PAGE_SIZE, + (ulong) UT_LIST_GET_LEN(buf_pool->free), + (ulong) UT_LIST_GET_LEN(buf_pool->LRU), + (ulong) UT_LIST_GET_LEN(buf_pool->flush_list), +diff -ruN mysql-5.0.67_highperf/innobase/fil/fil0fil.c mysql-5.0.67_highperf_tmp/innobase/fil/fil0fil.c +--- mysql-5.0.67_highperf/innobase/fil/fil0fil.c 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/fil/fil0fil.c 2008-11-12 09:27:52.000000000 +0900 +@@ -4472,3 +4472,30 @@ + + return(mach_read_from_2(page + FIL_PAGE_TYPE)); + } ++ ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++fil_system_hash_cells(void) ++/*=======================*/ ++{ ++ if (fil_system) { ++ return (fil_system->spaces->n_cells ++ + fil_system->name_hash->n_cells); ++ } else { ++ return 0; ++ } ++} ++ ++ulint ++fil_system_hash_nodes(void) ++/*=======================*/ ++{ ++ if (fil_system) { ++ return (UT_LIST_GET_LEN(fil_system->space_list) ++ * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE)); ++ } else { ++ return 0; ++ } ++} +diff -ruN mysql-5.0.67_highperf/innobase/include/fil0fil.h mysql-5.0.67_highperf_tmp/innobase/include/fil0fil.h +--- mysql-5.0.67_highperf/innobase/include/fil0fil.h 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/include/fil0fil.h 2008-11-12 09:27:52.000000000 +0900 +@@ -701,6 +701,16 @@ + written to page, the return value not defined */ + byte* page); /* in: file page */ + ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++fil_system_hash_cells(void); ++/*========================*/ ++ ++ulint ++fil_system_hash_nodes(void); ++/*========================*/ + + typedef struct fil_space_struct fil_space_t; + +diff -ruN mysql-5.0.67_highperf/innobase/include/thr0loc.h mysql-5.0.67_highperf_tmp/innobase/include/thr0loc.h +--- mysql-5.0.67_highperf/innobase/include/thr0loc.h 2008-11-12 09:24:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/include/thr0loc.h 2008-11-12 09:27:52.000000000 +0900 +@@ -77,6 +77,17 @@ + /*=============================*/ + /* out: pointer to the in_ibuf field */ + ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++thr_local_hash_cells(void); ++/*=======================*/ ++ ++ulint ++thr_local_hash_nodes(void); ++/*=======================*/ ++ + #ifndef UNIV_NONINL + #include "thr0loc.ic" + #endif +diff -ruN mysql-5.0.67_highperf/innobase/srv/srv0srv.c mysql-5.0.67_highperf_tmp/innobase/srv/srv0srv.c +--- mysql-5.0.67_highperf/innobase/srv/srv0srv.c 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/srv/srv0srv.c 2008-11-12 09:54:19.000000000 +0900 +@@ -1645,6 +1645,14 @@ + time_t current_time; + ulint n_reserved; + ++ ulint btr_search_sys_subtotal; ++ ulint lock_sys_subtotal; ++ ulint recv_sys_subtotal; ++ ulint io_counter_subtotal; ++ ++ ulint i; ++ trx_t* trx; ++ + mutex_enter(&srv_innodb_monitor_mutex); + + current_time = time(NULL); +@@ -1747,6 +1755,91 @@ + ut_total_allocated_memory, + mem_pool_get_reserved(mem_comm_pool)); + ++ /* Calcurate reserved memories */ ++ if (btr_search_sys && btr_search_sys->hash_index->heap) { ++ btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap); ++ } else { ++ btr_search_sys_subtotal = 0; ++ for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) { ++ btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]); ++ } ++ } ++ ++ lock_sys_subtotal = 0; ++ if (trx_sys) { ++ mutex_enter(&kernel_mutex); ++ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); ++ while (trx) { ++ lock_sys_subtotal += ((trx->lock_heap) ? mem_heap_get_size(trx->lock_heap) : 0); ++ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); ++ } ++ mutex_exit(&kernel_mutex); ++ } ++ ++ recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash) ++ ? mem_heap_get_size(recv_sys->heap) : 0); ++ ++ io_counter_subtotal = ((buf_pool->io_counter_heap) ++ ? mem_heap_get_size(buf_pool->io_counter_heap) : 0); ++ ++ fprintf(file, ++ "Internal hash tables (constant factor + variable factor)\n" ++ " Adaptive hash index %lu \t(%lu + %lu)\n" ++ " Page hash %lu\n" ++ " Dictionary cache %lu \t(%lu + %lu)\n" ++ " File system %lu \t(%lu + %lu)\n" ++ " Lock system %lu \t(%lu + %lu)\n" ++ " Recovery system %lu \t(%lu + %lu)\n" ++ " Threads %lu \t(%lu + %lu)\n" ++ " innodb_io_pattern %lu \t(%lu + %lu)\n", ++ ++ (ulong) (btr_search_sys ++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0) ++ + btr_search_sys_subtotal, ++ (ulong) (btr_search_sys ++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) btr_search_sys_subtotal, ++ ++ (ulong) (buf_pool->page_hash->n_cells * sizeof(hash_cell_t)), ++ ++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells ++ + dict_sys->table_id_hash->n_cells ++ + dict_sys->col_hash->n_cells) * sizeof(hash_cell_t) ++ + dict_sys->size) : 0), ++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells ++ + dict_sys->table_id_hash->n_cells ++ + dict_sys->col_hash->n_cells) * sizeof(hash_cell_t)) : 0), ++ (ulong) (dict_sys ? (dict_sys->size) : 0), ++ ++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t) ++ + fil_system_hash_nodes()), ++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)), ++ (ulong) fil_system_hash_nodes(), ++ ++ (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + lock_sys_subtotal), ++ (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) lock_sys_subtotal, ++ ++ (ulong) (((recv_sys && recv_sys->addr_hash) ++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + recv_sys_subtotal), ++ (ulong) ((recv_sys && recv_sys->addr_hash) ++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) recv_sys_subtotal, ++ ++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t) ++ + thr_local_hash_nodes()), ++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)), ++ (ulong) thr_local_hash_nodes(), ++ ++ (ulong) (((buf_pool->io_counter_hash) /* needs &(buf_pool->mutex) ? */ ++ ? (buf_pool->io_counter_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + io_counter_subtotal), ++ (ulong) ((buf_pool->io_counter_hash) /* needs &(buf_pool->mutex) ? */ ++ ? (buf_pool->io_counter_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) io_counter_subtotal); ++ + if (srv_use_awe) { + fprintf(file, + "In addition to that %lu MB of AWE memory allocated\n", +diff -ruN mysql-5.0.67_highperf/innobase/thr/thr0loc.c mysql-5.0.67_highperf_tmp/innobase/thr/thr0loc.c +--- mysql-5.0.67_highperf/innobase/thr/thr0loc.c 2008-11-12 09:24:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/thr/thr0loc.c 2008-11-12 09:27:52.000000000 +0900 +@@ -32,6 +32,7 @@ + + /* The hash table. The module is not yet initialized when it is NULL. */ + hash_table_t* thr_local_hash = NULL; ++ulint thr_local_hash_n_nodes = 0; + + /* The private data for each thread should be put to + the structure below and the accessor functions written +@@ -223,6 +224,7 @@ + HASH_INSERT(thr_local_t, hash, thr_local_hash, + os_thread_pf(os_thread_get_curr_id()), + local); ++ thr_local_hash_n_nodes++; + + mutex_exit(&thr_local_mutex); + } +@@ -251,6 +253,7 @@ + + HASH_DELETE(thr_local_t, hash, thr_local_hash, + os_thread_pf(id), local); ++ thr_local_hash_n_nodes--; + + mutex_exit(&thr_local_mutex); + +@@ -274,3 +277,29 @@ + mutex_create(&thr_local_mutex); + mutex_set_level(&thr_local_mutex, SYNC_THR_LOCAL); + } ++ ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++thr_local_hash_cells(void) ++/*======================*/ ++{ ++ if (thr_local_hash) { ++ return (thr_local_hash->n_cells); ++ } else { ++ return 0; ++ } ++} ++ ++ulint ++thr_local_hash_nodes(void) ++/*======================*/ ++{ ++ if (thr_local_hash) { ++ return (thr_local_hash_n_nodes ++ * (sizeof(thr_local_t) + MEM_BLOCK_HEADER_SIZE)); ++ } else { ++ return 0; ++ } ++} +diff -ruN mysql-5.0.67_highperf/patch_info/innodb_show_hashed_memory.info mysql-5.0.67_highperf_tmp/patch_info/innodb_show_hashed_memory.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/patch_info/innodb_show_hashed_memory.info 2008-11-12 09:27:52.000000000 +0900 +@@ -0,0 +1,6 @@ ++File=innodb_show_hashed_memory.patch ++Name=Adds additional information of InnoDB internal hash table memories in SHOW INNODB STATUS ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment= diff --git a/percona/5.0.75-b12/microsec_process.patch b/percona/5.0.75-b12/microsec_process.patch new file mode 100644 index 0000000..0e094e5 --- /dev/null +++ b/percona/5.0.75-b12/microsec_process.patch @@ -0,0 +1,281 @@ +diff -r 327ce7a34c91 mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/information_schema.result Fri Nov 07 15:52:53 2008 -0800 +@@ -44,6 +44,7 @@ + COLUMN_PRIVILEGES + INDEX_STATISTICS + KEY_COLUMN_USAGE ++PROCESSLIST + PROFILING + ROUTINES + SCHEMATA +@@ -740,7 +741,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-106 ++107 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -749,6 +750,7 @@ + table_schema table_name column_name + information_schema COLUMNS COLUMN_DEFAULT + information_schema COLUMNS COLUMN_TYPE ++information_schema PROCESSLIST INFO + information_schema ROUTINES ROUTINE_DEFINITION + information_schema ROUTINES SQL_MODE + information_schema TRIGGERS ACTION_CONDITION +@@ -813,7 +815,7 @@ + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 21 ++information_schema 22 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1206,6 +1208,7 @@ + COLUMN_PRIVILEGES TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA ++PROCESSLIST ID + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA + SCHEMATA SCHEMA_NAME +@@ -1242,6 +1245,7 @@ + COLUMN_PRIVILEGES TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA ++PROCESSLIST ID + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA + SCHEMATA SCHEMA_NAME +@@ -1329,6 +1333,7 @@ + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 ++PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 + ROUTINES information_schema.ROUTINES 1 + SCHEMATA information_schema.SCHEMATA 1 +diff -r 327ce7a34c91 mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Fri Nov 07 15:52:53 2008 -0800 +@@ -13,6 +13,7 @@ + COLUMN_PRIVILEGES + INDEX_STATISTICS + KEY_COLUMN_USAGE ++PROCESSLIST + PROFILING + ROUTINES + SCHEMATA +diff -r 327ce7a34c91 mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Fri Nov 07 15:52:53 2008 -0800 +@@ -87,6 +87,7 @@ + | COLUMN_PRIVILEGES | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | ++| PROCESSLIST | + | PROFILING | + | ROUTINES | + | SCHEMATA | +@@ -113,6 +114,7 @@ + | COLUMN_PRIVILEGES | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | ++| PROCESSLIST | + | PROFILING | + | ROUTINES | + | SCHEMATA | +diff -r 327ce7a34c91 sql/mysql_priv.h +--- a/sql/mysql_priv.h Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/mysql_priv.h Fri Nov 07 15:52:53 2008 -0800 +@@ -244,6 +244,8 @@ + + /* Characters shown for the command in 'show processlist' */ + #define PROCESS_LIST_WIDTH 100 ++/* Characters shown for the command in 'information_schema.processlist' */ ++#define PROCESS_LIST_INFO_WIDTH 65535 + + #define PRECISION_FOR_DOUBLE 53 + #define PRECISION_FOR_FLOAT 24 +diff -r 327ce7a34c91 sql/sql_show.cc +--- a/sql/sql_show.cc Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/sql_show.cc Fri Nov 07 15:52:53 2008 -0800 +@@ -1466,6 +1466,120 @@ + } + send_eof(thd); + DBUG_VOID_RETURN; ++} ++ ++int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ CHARSET_INFO *cs= system_charset_info; ++ char *user; ++ ulonglong current_timer= my_timer(¤t_timer, frequency); ++ DBUG_ENTER("fill_process_list"); ++ ++ user= thd->security_ctx->master_access & PROCESS_ACL ? ++ NullS : thd->security_ctx->priv_user; ++ ++ VOID(pthread_mutex_lock(&LOCK_thread_count)); ++ ++ if (!thd->killed) ++ { ++ I_List_iterator<THD> it(threads); ++ THD* tmp; ++ ++ while ((tmp= it++)) ++ { ++ Security_context *tmp_sctx= tmp->security_ctx; ++ struct st_my_thread_var *mysys_var; ++ const char *val; ++ ++ if ((!tmp->vio_ok() && !tmp->system_thread) || ++ (user && (!tmp_sctx->user || strcmp(tmp_sctx->user, user)))) ++ continue; ++ ++ restore_record(table, s->default_values); ++ /* ID */ ++ table->field[0]->store((longlong) tmp->thread_id, TRUE); ++ /* USER */ ++ val= tmp_sctx->user ? tmp_sctx->user : ++ (tmp->system_thread ? "system user" : "unauthenticated user"); ++ table->field[1]->store(val, strlen(val), cs); ++ /* HOST */ ++ if (tmp->peer_port && (tmp_sctx->host || tmp_sctx->ip) && ++ thd->security_ctx->host_or_ip[0]) ++ { ++ char host[LIST_PROCESS_HOST_LEN + 1]; ++ my_snprintf(host, LIST_PROCESS_HOST_LEN, "%s:%u", ++ tmp_sctx->host_or_ip, tmp->peer_port); ++ table->field[2]->store(host, strlen(host), cs); ++ } ++ else ++ table->field[2]->store(tmp_sctx->host_or_ip, ++ strlen(tmp_sctx->host_or_ip), cs); ++ /* DB */ ++ if (tmp->db) ++ { ++ table->field[3]->store(tmp->db, strlen(tmp->db), cs); ++ table->field[3]->set_notnull(); ++ } ++ ++ if ((mysys_var= tmp->mysys_var)) ++ pthread_mutex_lock(&mysys_var->mutex); ++ /* COMMAND */ ++ if ((val= (char *) (tmp->killed == THD::KILL_CONNECTION? "Killed" : 0))) ++ table->field[4]->store(val, strlen(val), cs); ++ else ++ table->field[4]->store(command_name[tmp->command], ++ strlen(command_name[tmp->command]), cs); ++ /* MYSQL_TIME */ ++ const ulonglong utime= (tmp->start_timer && current_timer) ? current_timer - tmp->start_timer : 0; ++ table->field[5]->store(utime / 1000000, TRUE); ++ /* STATE */ ++#ifndef EMBEDDED_LIBRARY ++ val= (char*) (tmp->locked ? "Locked" : ++ tmp->net.reading_or_writing ? ++ (tmp->net.reading_or_writing == 2 ? ++ "Writing to net" : ++ tmp->command == COM_SLEEP ? "" : ++ "Reading from net") : ++ tmp->proc_info ? tmp->proc_info : ++ tmp->mysys_var && ++ tmp->mysys_var->current_cond ? ++ "Waiting on cond" : NullS); ++#else ++ val= (char *) "Writing to net"; ++#endif ++ if (val) ++ { ++ table->field[6]->store(val, strlen(val), cs); ++ table->field[6]->set_notnull(); ++ } ++ ++ if (mysys_var) ++ pthread_mutex_unlock(&mysys_var->mutex); ++ ++ /* INFO */ ++ if (tmp->query) ++ { ++ table->field[7]->store(tmp->query, ++ min(PROCESS_LIST_INFO_WIDTH, ++ tmp->query_length), cs); ++ table->field[7]->set_notnull(); ++ } ++ ++ /* TIME_MS */ ++ table->field[8]->store((double)(utime / 1000.0)); ++ ++ if (schema_table_store_record(thd, table)) ++ { ++ VOID(pthread_mutex_unlock(&LOCK_thread_count)); ++ DBUG_RETURN(1); ++ } ++ ++ } ++ } ++ ++ VOID(pthread_mutex_unlock(&LOCK_thread_count)); ++ DBUG_RETURN(0); + } + + /***************************************************************************** +@@ -4821,6 +4941,22 @@ + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} + }; + ++ST_FIELD_INFO processlist_fields_info[]= ++{ ++ {"ID", 4, MYSQL_TYPE_LONG, 0, 0, "Id"}, ++ {"USER", 16, MYSQL_TYPE_STRING, 0, 0, "User"}, ++ {"HOST", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Host"}, ++ {"DB", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, "Db"}, ++ {"COMMAND", 16, MYSQL_TYPE_STRING, 0, 0, "Command"}, ++ {"TIME", 7, MYSQL_TYPE_LONG, 0, 0, "Time"}, ++ {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State"}, ++ {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info"}, ++ {"TIME_MS", 100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3, MYSQL_TYPE_DECIMAL, ++ 0, 0, "Time_ms"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ + /* + Description of ST_FIELD_INFO in table.h + */ +@@ -4845,6 +4981,8 @@ + get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0}, + {"OPEN_TABLES", open_tables_fields_info, create_schema_table, + fill_open_tables, make_old_format, 0, -1, -1, 1}, ++ {"PROCESSLIST", processlist_fields_info, create_schema_table, ++ fill_schema_processlist, make_old_format, 0, -1, -1, 0}, + {"PROFILING", query_profile_statistics_info, create_schema_table, + fill_query_profile_statistics_info, make_profile_table_for_show, + NULL, -1, -1, false}, +diff -r 327ce7a34c91 sql/table.h +--- a/sql/table.h Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/table.h Fri Nov 07 15:52:53 2008 -0800 +@@ -378,6 +378,7 @@ + SCH_INDEX_STATS, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, ++ SCH_PROCESSLIST, + SCH_PROFILES, + SCH_PROCEDURES, + SCH_SCHEMATA, +diff -ruN mysql-5.0.67_highperf/patch_info/microsec_process.info mysql-5.0.67_highperf_tmp/patch_info/microsec_process.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/patch_info/microsec_process.info 2008-11-12 09:27:52.000000000 +0900 +@@ -0,0 +1,6 @@ ++File=microsec_process.patch ++Name=Adds INFOMATION_SCHEMA.PROCESSLIST with TIME_MS column ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment= ++ChangeLog= diff --git a/percona/5.0.75-b12/microslow_innodb.patch b/percona/5.0.75-b12/microslow_innodb.patch new file mode 100644 index 0000000..763981f --- /dev/null +++ b/percona/5.0.75-b12/microslow_innodb.patch @@ -0,0 +1,2472 @@ +diff -r 25523be1816e include/my_getopt.h +--- a/include/my_getopt.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/include/my_getopt.h Mon Dec 22 00:26:39 2008 -0800 +@@ -28,7 +28,8 @@ + #define GET_ULL 8 + #define GET_STR 9 + #define GET_STR_ALLOC 10 +-#define GET_DISABLED 11 ++#define GET_DOUBLE 11 ++#define GET_DISABLED 12 + + #define GET_ASK_ADDR 128 + #define GET_TYPE_MASK 127 +diff -r 25523be1816e include/my_time.h +--- a/include/my_time.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/include/my_time.h Mon Dec 22 00:26:39 2008 -0800 +@@ -140,7 +140,7 @@ + int my_date_to_str(const MYSQL_TIME *l_time, char *to); + int my_datetime_to_str(const MYSQL_TIME *l_time, char *to); + int my_TIME_to_str(const MYSQL_TIME *l_time, char *to); +- ++ulonglong my_timer(ulonglong *ltime, ulonglong frequency); + C_MODE_END + + #endif /* _my_time_h_ */ +diff -r 25523be1816e innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/buf/buf0buf.c Mon Dec 22 00:26:39 2008 -0800 +@@ -37,6 +37,9 @@ + #include "log0log.h" + #include "trx0undo.h" + #include "srv0srv.h" ++ ++/* prototypes for new functions added to ha_innodb.cc */ ++trx_t* innobase_get_trx(); + + /* + IMPLEMENTATION OF THE BUFFER POOL +@@ -1086,6 +1089,36 @@ + return(block); + } + ++inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) ++{ ++ ulint block_hash; ++ ulint block_hash_byte; ++ byte block_hash_offset; ++ ++ ut_ad(block); ++ ++ if (!srv_slow_log || !trx || !trx->take_stats) ++ return; ++ ++ if (!trx->distinct_page_access_hash) { ++ trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); ++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); ++ } ++ ++ block_hash = ut_hash_ulint((block->space << 20) + block->space + ++ block->offset, DPAH_SIZE << 3); ++ block_hash_byte = block_hash >> 3; ++ block_hash_offset = (byte) block_hash & 0x07; ++ if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE) ++ fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset); ++ if (block_hash_offset < 0 || block_hash_offset > 7) ++ fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset); ++ if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) ++ trx->distinct_page_access++; ++ trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; ++ return; ++} ++ + /************************************************************************ + This is the general function used to get access to a database page. */ + +@@ -1108,6 +1141,11 @@ + ulint fix_type; + ibool success; + ibool must_read; ++ trx_t* trx = NULL; ++ ulint sec; ++ ulint ms; ++ ib_longlong start_time; ++ ib_longlong finish_time; + + ut_ad(mtr); + ut_ad((rw_latch == RW_S_LATCH) +@@ -1119,6 +1157,9 @@ + #ifndef UNIV_LOG_DEBUG + ut_ad(!ibuf_inside() || ibuf_page(space, offset)); + #endif ++ if (srv_slow_log) { ++ trx = innobase_get_trx(); ++ } + buf_pool->n_page_gets++; + loop: + block = NULL; +@@ -1148,7 +1189,7 @@ + return(NULL); + } + +- buf_read_page(space, offset); ++ buf_read_page(space, offset, trx); + + #ifdef UNIV_DEBUG + buf_dbg_counter++; +@@ -1261,6 +1302,11 @@ + /* Let us wait until the read operation + completes */ + ++ if (srv_slow_log && trx && trx->take_stats) ++ { ++ ut_usectime(&sec, &ms); ++ start_time = (ib_longlong)sec * 1000000 + ms; ++ } + for (;;) { + mutex_enter(&block->mutex); + +@@ -1275,6 +1321,12 @@ + + break; + } ++ } ++ if (srv_slow_log && trx && trx->take_stats && start_time) ++ { ++ ut_usectime(&sec, &ms); ++ finish_time = (ib_longlong)sec * 1000000 + ms; ++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } + } + +@@ -1296,12 +1348,17 @@ + /* In the case of a first access, try to apply linear + read-ahead */ + +- buf_read_ahead_linear(space, offset); ++ buf_read_ahead_linear(space, offset, trx); + } + + #ifdef UNIV_IBUF_DEBUG + ut_a(ibuf_count_get(block->space, block->offset) == 0); + #endif ++ ++ if (srv_slow_log) { ++ _increment_page_get_statistics(block, trx); ++ } ++ + return(block->frame); + } + +@@ -1326,6 +1383,7 @@ + ibool accessed; + ibool success; + ulint fix_type; ++ trx_t* trx = NULL; + + ut_ad(mtr && block); + ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); +@@ -1440,13 +1498,18 @@ + read-ahead */ + + buf_read_ahead_linear(buf_frame_get_space_id(guess), +- buf_frame_get_page_no(guess)); ++ buf_frame_get_page_no(guess), trx); + } + + #ifdef UNIV_IBUF_DEBUG + ut_a(ibuf_count_get(block->space, block->offset) == 0); + #endif + buf_pool->n_page_gets++; ++ ++ if (srv_slow_log) { ++ trx = innobase_get_trx(); ++ _increment_page_get_statistics(block, trx); ++ } + + return(TRUE); + } +@@ -1470,6 +1533,7 @@ + buf_block_t* block; + ibool success; + ulint fix_type; ++ trx_t* trx = NULL; + + ut_ad(mtr); + ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); +@@ -1558,6 +1622,11 @@ + || (ibuf_count_get(block->space, block->offset) == 0)); + #endif + buf_pool->n_page_gets++; ++ ++ if (srv_slow_log) { ++ trx = innobase_get_trx(); ++ _increment_page_get_statistics(block, trx); ++ } + + return(TRUE); + } +diff -r 25523be1816e innobase/buf/buf0rea.c +--- a/innobase/buf/buf0rea.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/buf/buf0rea.c Mon Dec 22 00:26:39 2008 -0800 +@@ -70,7 +70,8 @@ + treat the tablespace as dropped; this is a timestamp we + use to stop dangling page reads from a tablespace + which we have DISCARDed + IMPORTed back */ +- ulint offset) /* in: page number */ ++ ulint offset, /* in: page number */ ++ trx_t* trx) + { + buf_block_t* block; + ulint wake_later; +@@ -140,10 +141,10 @@ + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + +- *err = fil_io(OS_FILE_READ | wake_later, ++ *err = _fil_io(OS_FILE_READ | wake_later, + sync, space, + offset, 0, UNIV_PAGE_SIZE, +- (void*)block->frame, (void*)block); ++ (void*)block->frame, (void*)block, trx); + ut_a(*err == DB_SUCCESS); + + if (sync) { +@@ -174,8 +175,9 @@ + the page at the given page number does not get + read even if we return a value > 0! */ + ulint space, /* in: space id */ +- ulint offset) /* in: page number of a page which the current thread ++ ulint offset, /* in: page number of a page which the current thread + wants to access */ ++ trx_t* trx) + { + ib_longlong tablespace_version; + buf_block_t* block; +@@ -270,7 +272,7 @@ + if (!ibuf_bitmap_page(i)) { + count += buf_read_page_low(&err, FALSE, ibuf_mode + | OS_AIO_SIMULATED_WAKE_LATER, +- space, tablespace_version, i); ++ space, tablespace_version, i, trx); + if (err == DB_TABLESPACE_DELETED) { + ut_print_timestamp(stderr); + fprintf(stderr, +@@ -314,7 +316,8 @@ + /* out: number of page read requests issued: this can + be > 1 if read-ahead occurred */ + ulint space, /* in: space id */ +- ulint offset) /* in: page number */ ++ ulint offset, /* in: page number */ ++ trx_t* trx) + { + ib_longlong tablespace_version; + ulint count; +@@ -323,13 +326,13 @@ + + tablespace_version = fil_space_get_version(space); + +- count = buf_read_ahead_random(space, offset); ++ count = buf_read_ahead_random(space, offset, trx); + + /* We do the i/o in the synchronous aio mode to save thread + switches: hence TRUE */ + + count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, +- tablespace_version, offset); ++ tablespace_version, offset, trx); + srv_buf_pool_reads+= count2; + if (err == DB_TABLESPACE_DELETED) { + ut_print_timestamp(stderr); +@@ -374,8 +377,9 @@ + /*==================*/ + /* out: number of page read requests issued */ + ulint space, /* in: space id */ +- ulint offset) /* in: page number of a page; NOTE: the current thread ++ ulint offset, /* in: page number of a page; NOTE: the current thread + must want access to this page (see NOTE 3 above) */ ++ trx_t* trx) + { + ib_longlong tablespace_version; + buf_block_t* block; +@@ -556,7 +560,7 @@ + if (!ibuf_bitmap_page(i)) { + count += buf_read_page_low(&err, FALSE, ibuf_mode + | OS_AIO_SIMULATED_WAKE_LATER, +- space, tablespace_version, i); ++ space, tablespace_version, i, trx); + if (err == DB_TABLESPACE_DELETED) { + ut_print_timestamp(stderr); + fprintf(stderr, +@@ -625,10 +629,10 @@ + for (i = 0; i < n_stored; i++) { + if ((i + 1 == n_stored) && sync) { + buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, +- space_ids[i], space_versions[i], page_nos[i]); ++ space_ids[i], space_versions[i], page_nos[i], NULL); + } else { + buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE, +- space_ids[i], space_versions[i], page_nos[i]); ++ space_ids[i], space_versions[i], page_nos[i], NULL); + } + + if (err == DB_TABLESPACE_DELETED) { +@@ -704,11 +708,11 @@ + + if ((i + 1 == n_stored) && sync) { + buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, +- tablespace_version, page_nos[i]); ++ tablespace_version, page_nos[i], NULL); + } else { + buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE + | OS_AIO_SIMULATED_WAKE_LATER, +- space, tablespace_version, page_nos[i]); ++ space, tablespace_version, page_nos[i], NULL); + } + } + +diff -r 25523be1816e innobase/fil/fil0fil.c +--- a/innobase/fil/fil0fil.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/fil/fil0fil.c Mon Dec 22 00:26:39 2008 -0800 +@@ -3527,7 +3527,7 @@ + node->name, node->handle, buf, + offset_low, offset_high, + UNIV_PAGE_SIZE * n_pages, +- NULL, NULL); ++ NULL, NULL, NULL); + #endif + if (success) { + node->size += n_pages; +@@ -3851,7 +3851,7 @@ + Reads or writes data. This operation is asynchronous (aio). */ + + ulint +-fil_io( ++_fil_io( + /*===*/ + /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED + if we are trying to do i/o on a tablespace +@@ -3877,8 +3877,9 @@ + void* buf, /* in/out: buffer where to store read data + or from where to write; in aio this must be + appropriately aligned */ +- void* message) /* in: message for aio handler if non-sync ++ void* message, /* in: message for aio handler if non-sync + aio used, else ignored */ ++ trx_t* trx) + { + fil_system_t* system = fil_system; + ulint mode; +@@ -4018,7 +4019,7 @@ + #else + /* Queue the aio request */ + ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, +- offset_low, offset_high, len, node, message); ++ offset_low, offset_high, len, node, message, trx); + #endif + ut_a(ret); + +diff -r 25523be1816e innobase/include/buf0rea.h +--- a/innobase/include/buf0rea.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/include/buf0rea.h Mon Dec 22 00:26:39 2008 -0800 +@@ -10,6 +10,7 @@ + #define buf0rea_h + + #include "univ.i" ++#include "trx0types.h" + #include "buf0types.h" + + /************************************************************************ +@@ -25,7 +26,8 @@ + /* out: number of page read requests issued: this can + be > 1 if read-ahead occurred */ + ulint space, /* in: space id */ +- ulint offset);/* in: page number */ ++ ulint offset, /* in: page number */ ++ trx_t* trx); + /************************************************************************ + Applies linear read-ahead if in the buf_pool the page is a border page of + a linear read-ahead area and all the pages in the area have been accessed. +@@ -55,8 +57,9 @@ + /*==================*/ + /* out: number of page read requests issued */ + ulint space, /* in: space id */ +- ulint offset);/* in: page number of a page; NOTE: the current thread ++ ulint offset, /* in: page number of a page; NOTE: the current thread + must want access to this page (see NOTE 3 above) */ ++ trx_t* trx); + /************************************************************************ + Issues read requests for pages which the ibuf module wants to read in, in + order to contract the insert buffer tree. Technically, this function is like +diff -r 25523be1816e innobase/include/fil0fil.h +--- a/innobase/include/fil0fil.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/include/fil0fil.h Mon Dec 22 00:26:39 2008 -0800 +@@ -534,8 +534,11 @@ + /************************************************************************ + Reads or writes data. This operation is asynchronous (aio). */ + ++#define fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message) \ ++ _fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message, NULL) ++ + ulint +-fil_io( ++_fil_io( + /*===*/ + /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED + if we are trying to do i/o on a tablespace +@@ -561,8 +564,9 @@ + void* buf, /* in/out: buffer where to store read data + or from where to write; in aio this must be + appropriately aligned */ +- void* message); /* in: message for aio handler if non-sync ++ void* message, /* in: message for aio handler if non-sync + aio used, else ignored */ ++ trx_t* trx); + /************************************************************************ + Reads data from a space to a buffer. Remember that the possible incomplete + blocks at the end of file are ignored: they are not taken into account when +diff -r 25523be1816e innobase/include/os0file.h +--- a/innobase/include/os0file.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/include/os0file.h Mon Dec 22 00:26:39 2008 -0800 +@@ -10,6 +10,8 @@ + #define os0file_h + + #include "univ.i" ++ ++#include "trx0types.h" + + #ifndef __WIN__ + #include <dirent.h> +@@ -421,8 +423,11 @@ + /*********************************************************************** + Requests a synchronous read operation. */ + ++#define os_file_read(file, buf, offset, offset_high, n) \ ++ _os_file_read(file, buf, offset, offset_high, n, NULL) ++ + ibool +-os_file_read( ++_os_file_read( + /*=========*/ + /* out: TRUE if request was + successful, FALSE if fail */ +@@ -432,7 +437,8 @@ + offset where to read */ + ulint offset_high,/* in: most significant 32 bits of + offset */ +- ulint n); /* in: number of bytes to read */ ++ ulint n, /* in: number of bytes to read */ ++ trx_t* trx); + /*********************************************************************** + Rewind file to its start, read at most size - 1 bytes from it to str, and + NUL-terminate str. All errors are silently ignored. This function is +@@ -584,7 +590,8 @@ + can be used to identify a completed aio + operation); if mode is OS_AIO_SYNC, these + are ignored */ +- void* message2); ++ void* message2, ++ trx_t* trx); + /**************************************************************************** + Wakes up all async i/o threads so that they know to exit themselves in + shutdown. */ +diff -r 25523be1816e innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/include/srv0srv.h Mon Dec 22 00:26:39 2008 -0800 +@@ -26,6 +26,8 @@ + at a time */ + #define SRV_AUTO_EXTEND_INCREMENT \ + (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) ++ ++extern ibool srv_slow_log; + + /* This is set to TRUE if the MySQL user has set it in MySQL */ + extern ibool srv_lower_case_table_names; +diff -r 25523be1816e innobase/include/trx0trx.h +--- a/innobase/include/trx0trx.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/include/trx0trx.h Mon Dec 22 00:26:39 2008 -0800 +@@ -668,6 +668,17 @@ + /*------------------------------*/ + char detailed_error[256]; /* detailed error message for last + error, or empty. */ ++ /*------------------------------*/ ++ ulint io_reads; ++ ib_longlong io_read; ++ ulint io_reads_wait_timer; ++ ib_longlong lock_que_wait_ustarted; ++ ulint lock_que_wait_timer; ++ ulint innodb_que_wait_timer; ++ ulint distinct_page_access; ++#define DPAH_SIZE 8192 ++ byte* distinct_page_access_hash; ++ ibool take_stats; + }; + + #define TRX_MAX_N_THREADS 32 /* maximum number of concurrent +diff -r 25523be1816e innobase/lock/lock0lock.c +--- a/innobase/lock/lock0lock.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/lock/lock0lock.c Mon Dec 22 00:26:39 2008 -0800 +@@ -1806,6 +1806,8 @@ + { + lock_t* lock; + trx_t* trx; ++ ulint sec; ++ ulint ms; + + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +@@ -1861,6 +1863,10 @@ + trx->que_state = TRX_QUE_LOCK_WAIT; + trx->was_chosen_as_deadlock_victim = FALSE; + trx->wait_started = time(NULL); ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms; ++ } + + ut_a(que_thr_stop(thr)); + +@@ -3514,7 +3520,9 @@ + { + lock_t* lock; + trx_t* trx; +- ++ ulint sec; ++ ulint ms; ++ + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); + #endif /* UNIV_SYNC_DEBUG */ +@@ -3564,6 +3572,10 @@ + return(DB_SUCCESS); + } + ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms; ++ } + trx->que_state = TRX_QUE_LOCK_WAIT; + trx->was_chosen_as_deadlock_victim = FALSE; + trx->wait_started = time(NULL); +diff -r 25523be1816e innobase/os/os0file.c +--- a/innobase/os/os0file.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/os/os0file.c Mon Dec 22 00:26:39 2008 -0800 +@@ -14,6 +14,7 @@ + #include "srv0start.h" + #include "fil0fil.h" + #include "buf0buf.h" ++#include "trx0sys.h" + + #if defined(UNIV_HOTBACKUP) && defined(__WIN__) + /* Add includes for the _stat() call to compile on Windows */ +@@ -1903,9 +1904,13 @@ + #ifndef __WIN__ + /*********************************************************************** + Does a synchronous read operation in Posix. */ ++ ++#define os_file_pread(file, buf, n, offset, offset_high) \ ++ _os_file_pread(file, buf, n, offset, offset_high, NULL); ++ + static + ssize_t +-os_file_pread( ++_os_file_pread( + /*==========*/ + /* out: number of bytes read, -1 if error */ + os_file_t file, /* in: handle to a file */ +@@ -1913,12 +1918,17 @@ + ulint n, /* in: number of bytes to read */ + ulint offset, /* in: least significant 32 bits of file + offset from where to read */ +- ulint offset_high) /* in: most significant 32 bits of +- offset */ ++ ulint offset_high, /* in: most significant 32 bits of ++ offset */ ++ trx_t* trx) + { + off_t offs; + ssize_t n_bytes; +- ++ ulint sec; ++ ulint ms; ++ ib_longlong start_time; ++ ib_longlong finish_time; ++ + ut_a((offset & 0xFFFFFFFFUL) == offset); + + /* If off_t is > 4 bytes in size, then we assume we can pass a +@@ -1937,7 +1947,13 @@ + } + + os_n_file_reads++; +- ++ if (srv_slow_log && trx && trx->take_stats) ++ { ++ trx->io_reads++; ++ trx->io_read += n; ++ ut_usectime(&sec, &ms); ++ start_time = (ib_longlong)sec * 1000000 + ms; ++ } + #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) + os_mutex_enter(os_file_count_mutex); + os_file_n_pending_preads++; +@@ -1951,6 +1967,13 @@ + os_n_pending_reads--; + os_mutex_exit(os_file_count_mutex); + ++ if (srv_slow_log && trx && trx->take_stats && start_time) ++ { ++ ut_usectime(&sec, &ms); ++ finish_time = (ib_longlong)sec * 1000000 + ms; ++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time); ++ } ++ + return(n_bytes); + #else + { +@@ -1980,6 +2003,13 @@ + os_mutex_enter(os_file_count_mutex); + os_n_pending_reads--; + os_mutex_exit(os_file_count_mutex); ++ ++ if (srv_slow_log && trx && trx->take_stats && start_time) ++ { ++ ut_usectime(&sec, &ms); ++ finish_time = (ib_longlong)sec * 1000000 + ms; ++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time); ++ } + + return(ret); + } +@@ -2103,7 +2133,7 @@ + Requests a synchronous positioned read operation. */ + + ibool +-os_file_read( ++_os_file_read( + /*=========*/ + /* out: TRUE if request was + successful, FALSE if fail */ +@@ -2113,7 +2143,8 @@ + offset where to read */ + ulint offset_high, /* in: most significant 32 bits of + offset */ +- ulint n) /* in: number of bytes to read */ ++ ulint n, /* in: number of bytes to read */ ++ trx_t* trx) + { + #ifdef __WIN__ + BOOL ret; +@@ -2177,7 +2208,7 @@ + os_bytes_read_since_printout += n; + + try_again: +- ret = os_file_pread(file, buf, n, offset, offset_high); ++ ret = _os_file_pread(file, buf, n, offset, offset_high, trx); + + if ((ulint)ret == n) { + +@@ -3137,7 +3168,8 @@ + offset */ + ulint offset_high, /* in: most significant 32 bits of + offset */ +- ulint len) /* in: length of the block to read or write */ ++ ulint len, /* in: length of the block to read or write */ ++ trx_t* trx) + { + os_aio_slot_t* slot; + #ifdef WIN_ASYNC_IO +@@ -3390,7 +3422,8 @@ + can be used to identify a completed aio + operation); if mode is OS_AIO_SYNC, these + are ignored */ +- void* message2) ++ void* message2, ++ trx_t* trx) + { + os_aio_array_t* array; + os_aio_slot_t* slot; +@@ -3429,8 +3462,8 @@ + wait in the Windows case. */ + + if (type == OS_FILE_READ) { +- return(os_file_read(file, buf, offset, +- offset_high, n)); ++ return(_os_file_read(file, buf, offset, ++ offset_high, n, trx)); + } + + ut_a(type == OS_FILE_WRITE); +@@ -3463,8 +3496,13 @@ + ut_error; + } + ++ if (trx && type == OS_FILE_READ) ++ { ++ trx->io_reads++; ++ trx->io_read += n; ++ } + slot = os_aio_array_reserve_slot(type, array, message1, message2, file, +- name, buf, offset, offset_high, n); ++ name, buf, offset, offset_high, n, trx); + if (type == OS_FILE_READ) { + if (os_aio_use_native_aio) { + #ifdef WIN_ASYNC_IO +diff -r 25523be1816e innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:26:39 2008 -0800 +@@ -47,6 +47,8 @@ + #include "dict0boot.h" + #include "srv0start.h" + #include "row0mysql.h" ++ ++ibool srv_slow_log = 0; + + /* This is set to TRUE if the MySQL user has set it in MySQL; currently + affects only FOREIGN KEY definition parsing */ +@@ -996,6 +998,10 @@ + ibool has_slept = FALSE; + srv_conc_slot_t* slot = NULL; + ulint i; ++ ib_longlong start_time = 0L; ++ ib_longlong finish_time = 0L; ++ ulint sec; ++ ulint ms; + + /* If trx has 'free tickets' to enter the engine left, then use one + such ticket */ +@@ -1054,6 +1060,7 @@ + if (SRV_THREAD_SLEEP_DELAY > 0) + { + os_thread_sleep(SRV_THREAD_SLEEP_DELAY); ++ trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY; + } + + trx->op_info = ""; +@@ -1109,11 +1116,22 @@ + /* Go to wait for the event; when a thread leaves InnoDB it will + release this thread */ + ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ start_time = (ib_longlong)sec * 1000000 + ms; ++ } ++ + trx->op_info = "waiting in InnoDB queue"; + + os_event_wait(slot->event); + + trx->op_info = ""; ++ ++ if (srv_slow_log && trx->take_stats && start_time) { ++ ut_usectime(&sec, &ms); ++ finish_time = (ib_longlong)sec * 1000000 + ms; ++ trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); ++ } + + os_fast_mutex_lock(&srv_conc_mutex); + +diff -r 25523be1816e innobase/trx/trx0trx.c +--- a/innobase/trx/trx0trx.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/innobase/trx/trx0trx.c Mon Dec 22 00:26:39 2008 -0800 +@@ -190,6 +190,15 @@ + trx->global_read_view_heap = mem_heap_create(256); + trx->global_read_view = NULL; + trx->read_view = NULL; ++ ++ trx->io_reads = 0; ++ trx->io_read = 0; ++ trx->io_reads_wait_timer = 0; ++ trx->lock_que_wait_timer = 0; ++ trx->innodb_que_wait_timer = 0; ++ trx->distinct_page_access = 0; ++ trx->distinct_page_access_hash = NULL; ++ trx->take_stats = FALSE; + + /* Set X/Open XA transaction identification to NULL */ + memset(&trx->xid, 0, sizeof(trx->xid)); +@@ -230,6 +239,11 @@ + + trx->mysql_process_no = os_proc_get_number(); + ++ if (srv_slow_log && trx->take_stats) { ++ trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); ++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); ++ } ++ + return(trx); + } + +@@ -366,6 +380,12 @@ + /*===============*/ + trx_t* trx) /* in, own: trx object */ + { ++ if (trx->distinct_page_access_hash) ++ { ++ mem_free(trx->distinct_page_access_hash); ++ trx->distinct_page_access_hash= NULL; ++ } ++ + thr_local_free(trx->mysql_thread_id); + + mutex_enter(&kernel_mutex); +@@ -389,6 +409,12 @@ + /*====================*/ + trx_t* trx) /* in, own: trx object */ + { ++ if (trx->distinct_page_access_hash) ++ { ++ mem_free(trx->distinct_page_access_hash); ++ trx->distinct_page_access_hash= NULL; ++ } ++ + mutex_enter(&kernel_mutex); + + trx_free(trx); +@@ -1064,7 +1090,10 @@ + trx_t* trx) /* in: transaction */ + { + que_thr_t* thr; +- ++ ulint sec; ++ ulint ms; ++ ib_longlong now; ++ + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); + #endif /* UNIV_SYNC_DEBUG */ +@@ -1080,6 +1109,11 @@ + thr = UT_LIST_GET_FIRST(trx->wait_thrs); + } + ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ now = (ib_longlong)sec * 1000000 + ms; ++ trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); ++ } + trx->que_state = TRX_QUE_RUNNING; + } + +@@ -1093,6 +1127,9 @@ + trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */ + { + que_thr_t* thr; ++ ulint sec; ++ ulint ms; ++ ib_longlong now; + + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +@@ -1109,6 +1146,11 @@ + thr = UT_LIST_GET_FIRST(trx->wait_thrs); + } + ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ now = (ib_longlong)sec * 1000000 + ms; ++ trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); ++ } + trx->que_state = TRX_QUE_RUNNING; + } + +diff -r 25523be1816e mysys/my_getopt.c +--- a/mysys/my_getopt.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/mysys/my_getopt.c Mon Dec 22 00:26:39 2008 -0800 +@@ -1061,6 +1061,9 @@ + case GET_ULONG: + printf("%lu\n", *((ulong*) value)); + break; ++ case GET_DOUBLE: ++ printf("%6f\n", *((double*) value)); ++ break; + case GET_LL: + printf("%s\n", llstr(*((longlong*) value), buff)); + break; +diff -r 25523be1816e patch_info/microslow_innodb.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/microslow_innodb.info Mon Dec 22 00:26:39 2008 -0800 +@@ -0,0 +1,15 @@ ++File=microslow_innodb.patch ++Name=Extended statistics in slow.log ++Version=1.2 ++Author=Percona <info@percona.com> ++License=GPL ++Comment= ++Changelog ++2008-11-26 ++YK: Fix inefficient determination of trx, Make not to call useless gettimeofday when don't use slow log. Make log_slow_queries dynamic (bool). ++ ++2008-11-07 ++VT: Moved log_slow_rate_limit in SHOW VARIABLE into right place ++ ++2008-11 ++Arjen Lentz: Fixups (backward compatibility) by Arjen Lentz <arjen@openquery.com.au> +diff -r 25523be1816e scripts/mysqldumpslow.sh +--- a/scripts/mysqldumpslow.sh Mon Dec 22 00:25:06 2008 -0800 ++++ b/scripts/mysqldumpslow.sh Mon Dec 22 00:26:39 2008 -0800 +@@ -83,8 +83,8 @@ + s/^#? Time: \d{6}\s+\d+:\d+:\d+.*\n//; + my ($user,$host) = s/^#? User\@Host:\s+(\S+)\s+\@\s+(\S+).*\n// ? ($1,$2) : ('',''); + +- s/^# Query_time: (\d+) Lock_time: (\d+) Rows_sent: (\d+).*\n//; +- my ($t, $l, $r) = ($1, $2, $3); ++ s/^# Query_time: (\d+(\.\d+)?) Lock_time: (\d+(\.\d+)?) Rows_sent: (\d+(\.\d+)?).*\n//; ++ my ($t, $l, $r) = ($1, $3, $5); + $t -= $l unless $opt{l}; + + # remove fluff that mysqld writes to log when it (re)starts: +diff -r 25523be1816e sql-common/my_time.c +--- a/sql-common/my_time.c Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql-common/my_time.c Mon Dec 22 00:26:39 2008 -0800 +@@ -1252,3 +1252,37 @@ + return 0; + } + ++/* ++ int my_timer(ulonglong *ltime, ulonglong frequency) ++ ++ For performance measurement this function returns the number ++ of microseconds since the epoch (SVr4, BSD 4.3, POSIX 1003.1-2001) ++ or system start (Windows platforms). ++ ++ For windows platforms frequency value (obtained via ++ QueryPerformanceFrequency) has to be specified. The global frequency ++ value is set in mysqld.cc. ++ ++ If Windows platform doesn't support QueryPerformanceFrequency we will ++ obtain the time via GetClockCount, which supports microseconds only. ++*/ ++ ++ulonglong my_timer(ulonglong *ltime, ulonglong frequency) ++{ ++ ulonglong newtime= 0; ++#ifdef __WIN__ ++ if (frequency) ++ { ++ QueryPerformanceCounter((LARGE_INTEGER *)&newtime); ++ newtime/= (frequency * 1000000); ++ } else ++ newtime= (GetTickCount() * 1000; /* GetTickCount only returns milliseconds */ ++#else ++ struct timeval t; ++ if (gettimeofday(&t, NULL) != -1) ++ newtime= (ulonglong)t.tv_sec * 1000000 + t.tv_usec; ++#endif ++ if (ltime) ++ *ltime= newtime; ++ return newtime; ++} +diff -r 25523be1816e sql/filesort.cc +--- a/sql/filesort.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/filesort.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -180,6 +180,7 @@ + { + statistic_increment(thd->status_var.filesort_scan_count, &LOCK_status); + } ++ thd->query_plan_flags|= QPLAN_FILESORT; + #ifdef CAN_TRUST_RANGE + if (select && select->quick && select->quick->records > 0L) + { +@@ -245,6 +246,7 @@ + } + else + { ++ thd->query_plan_flags|= QPLAN_FILESORT_DISK; + if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer) + { + x_free(table_sort.buffpek); +@@ -1116,6 +1118,7 @@ + + statistic_increment(current_thd->status_var.filesort_merge_passes, + &LOCK_status); ++ current_thd->query_plan_fsort_passes++; + if (param->not_killable) + { + killed= ¬_killable; +diff -r 25523be1816e sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -1,3 +1,4 @@ ++ + /* Copyright (C) 2000-2005 MySQL AB & Innobase Oy + + This program is free software; you can redistribute it and/or modify +@@ -805,9 +806,34 @@ + trx->check_unique_secondary = TRUE; + } + ++ if (thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; ++ } ++ + return(trx); + } + ++/************************************************************************* ++Gets current trx. */ ++extern "C" ++trx_t* ++innobase_get_trx() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ return((trx_t*) thd->ha_data[innobase_hton.slot]); ++ } else { ++ return(NULL); ++ } ++} ++ ++void ++innobase_update_var_slow_log() ++{ ++ srv_slow_log = (ibool) opt_slow_log; ++} + + /************************************************************************* + Construct ha_innobase handler. */ +@@ -1309,6 +1335,8 @@ + } + + /* -------------- Log files ---------------------------*/ ++ ++ srv_slow_log = (ibool) opt_slow_log; + + /* The default dir for log files is the datadir of MySQL */ + +@@ -4673,6 +4701,12 @@ + trx->check_unique_secondary = FALSE; + } + ++ if (thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; ++ } ++ + if (lower_case_table_names) { + srv_lower_case_table_names = TRUE; + } else { +@@ -4938,6 +4972,12 @@ + trx->check_unique_secondary = FALSE; + } + ++ if (thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; ++ } ++ + name_len = strlen(name); + + assert(name_len < 1000); +@@ -5025,6 +5065,12 @@ + trx->check_foreigns = FALSE; + } + ++ if (current_thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; ++ } ++ + error = row_drop_database_for_mysql(namebuf, trx); + my_free(namebuf, MYF(0)); + +@@ -5089,6 +5135,12 @@ + + if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) { + trx->check_foreigns = FALSE; ++ } ++ ++ if (current_thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; + } + + name_len1 = strlen(from); +@@ -6098,6 +6150,7 @@ + { + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + trx_t* trx; ++ int i; + + DBUG_ENTER("ha_innobase::external_lock"); + DBUG_PRINT("enter",("lock_type: %d", lock_type)); +@@ -6221,7 +6274,24 @@ + + if (trx->n_mysql_tables_in_use == 0) { + +- trx->mysql_n_tables_locked = 0; ++ current_thd->innodb_was_used = TRUE; ++ current_thd->innodb_io_reads += trx->io_reads; ++ current_thd->innodb_io_read += trx->io_read; ++ current_thd->innodb_io_reads_wait_timer += trx->io_reads_wait_timer; ++ current_thd->innodb_lock_que_wait_timer += trx->lock_que_wait_timer; ++ current_thd->innodb_innodb_que_wait_timer += trx->innodb_que_wait_timer; ++ current_thd->innodb_page_access += trx->distinct_page_access; ++ ++ trx->io_reads = 0; ++ trx->io_read = 0; ++ trx->io_reads_wait_timer = 0; ++ trx->lock_que_wait_timer = 0; ++ trx->innodb_que_wait_timer = 0; ++ trx->distinct_page_access = 0; ++ if (trx->distinct_page_access_hash) ++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); ++ ++ trx->mysql_n_tables_locked = 0; + prebuilt->used_in_HANDLER = FALSE; + + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { +diff -r 25523be1816e sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/ha_innodb.h Mon Dec 22 00:26:39 2008 -0800 +@@ -266,6 +266,8 @@ + + int innobase_start_trx_and_assign_read_view(THD* thd); + ++void innobase_update_var_slow_log(); ++ + /*********************************************************************** + This function is used to prepare X/Open XA distributed transaction */ + +diff -r 25523be1816e sql/log.cc +--- a/sql/log.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/log.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -2229,11 +2229,12 @@ + */ + + bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, +- time_t query_start_arg) ++ time_t query_start_arg, ulonglong query_start_timer) + { + bool error=0; + time_t current_time; +- if (!is_open()) ++ ulonglong current_timer; ++ if (!opt_slow_log || !is_open()) + return 0; + DBUG_ENTER("MYSQL_LOG::write"); + +@@ -2243,7 +2244,8 @@ + int tmp_errno=0; + char buff[80],*end; + end=buff; +- if (!(thd->options & OPTION_UPDATE_LOG)) ++ if (!(thd->options & OPTION_UPDATE_LOG) && ++ !(thd->slave_thread && opt_log_slow_slave_statements)) + { + VOID(pthread_mutex_unlock(&LOCK_log)); + DBUG_RETURN(0); +@@ -2273,22 +2275,72 @@ + if (my_b_printf(&log_file, "# User@Host: %s[%s] @ %s [%s]\n", + sctx->priv_user ? + sctx->priv_user : "", +- sctx->user ? sctx->user : "", ++ sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), + sctx->host ? sctx->host : "", + sctx->ip ? sctx->ip : "") == + (uint) -1) + tmp_errno=errno; + } +- if (query_start_arg) ++ if (query_start_timer) + { ++ char buf[5][20]; ++ ulonglong current_timer= my_timer(¤t_timer, frequency); ++ snprintf(buf[0], 20, "%.6f", (current_timer ? (current_timer - query_start_timer):0) / 1000000.0); ++ snprintf(buf[1], 20, "%.6f", (thd->timer_after_lock - query_start_timer) / 1000000.0); ++ if (!query_length) ++ { ++ thd->sent_row_count= thd->examined_row_count= 0; ++ thd->row_count= 0; ++ thd->innodb_was_used= FALSE; ++ thd->query_plan_flags= QPLAN_NONE; ++ thd->query_plan_fsort_passes= 0; ++ } ++ + /* For slow query log */ + if (my_b_printf(&log_file, +- "# Query_time: %lu Lock_time: %lu Rows_sent: %lu Rows_examined: %lu\n", +- (ulong) (current_time - query_start_arg), +- (ulong) (thd->time_after_lock - query_start_arg), ++ "# Thread_id: %lu Schema: %s\n" \ ++ "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu Rows_affected: %lu Rows_read: %lu\n", ++ (ulong) thd->thread_id, (thd->db ? thd->db : ""), ++ buf[0], buf[1], + (ulong) thd->sent_row_count, +- (ulong) thd->examined_row_count) == (uint) -1) ++ (ulong) thd->examined_row_count, ++ ((long) thd->row_count_func > 0 ) ? (ulong) thd->row_count_func : 0, ++ (ulong) thd->row_count) == (uint) -1) + tmp_errno=errno; ++ if ((thd->variables.log_slow_verbosity & SLOG_V_QUERY_PLAN) && ++ my_b_printf(&log_file, ++ "# QC_Hit: %s Full_scan: %s Full_join: %s Tmp_table: %s Tmp_table_on_disk: %s\n" \ ++ "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu\n", ++ ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_TMP_TABLE) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_TMP_DISK) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? "Yes" : "No"), ++ thd->query_plan_fsort_passes) == (uint) -1) ++ tmp_errno=errno; ++ if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && thd->innodb_was_used) ++ { ++ snprintf(buf[2], 20, "%.6f", thd->innodb_io_reads_wait_timer / 1000000.0); ++ snprintf(buf[3], 20, "%.6f", thd->innodb_lock_que_wait_timer / 1000000.0); ++ snprintf(buf[4], 20, "%.6f", thd->innodb_innodb_que_wait_timer / 1000000.0); ++ if (my_b_printf(&log_file, ++ "# InnoDB_IO_r_ops: %lu InnoDB_IO_r_bytes: %lu InnoDB_IO_r_wait: %s\n" \ ++ "# InnoDB_rec_lock_wait: %s InnoDB_queue_wait: %s\n" \ ++ "# InnoDB_pages_distinct: %lu\n", ++ (ulong) thd->innodb_io_reads, ++ (ulong) thd->innodb_io_read, ++ buf[2], buf[3], buf[4], ++ (ulong) thd->innodb_page_access) == (uint) -1) ++ tmp_errno=errno; ++ } ++ else ++ { ++ if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && ++ my_b_printf(&log_file,"# No InnoDB statistics available for this query\n") == (uint) -1) ++ tmp_errno=errno; ++ } + } + if (thd->db && strcmp(thd->db,db)) + { // Database changed +diff -r 25523be1816e sql/log_event.cc +--- a/sql/log_event.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/log_event.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -2039,6 +2039,7 @@ + /* Execute the query (note that we bypass dispatch_command()) */ + const char* found_semicolon= NULL; + mysql_parse(thd, thd->query, thd->query_length, &found_semicolon); ++ log_slow_statement(thd); + + } + else +diff -r 25523be1816e sql/mysql_priv.h +--- a/sql/mysql_priv.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/mysql_priv.h Mon Dec 22 00:26:39 2008 -0800 +@@ -489,6 +489,78 @@ + #define WEEK_FIRST_WEEKDAY 4 + + #define STRING_BUFFER_USUAL_SIZE 80 ++ ++/* Slow log */ ++ ++struct msl_opts ++{ ++ ulong val; ++ const char *name; ++}; ++ ++#define SLOG_V_MICROTIME 1 << 0 ++#define SLOG_V_QUERY_PLAN 1 << 1 ++#define SLOG_V_INNODB 1 << 2 ++/* ... */ ++#define SLOG_V_INVALID 1 << 31 ++#define SLOG_V_NONE SLOG_V_MICROTIME ++ ++static const struct msl_opts slog_verb[]= ++{ ++ /* Basic flags */ ++ ++ { SLOG_V_MICROTIME, "microtime" }, ++ { SLOG_V_QUERY_PLAN, "query_plan" }, ++ { SLOG_V_INNODB, "innodb" }, ++ ++ /* End of baisc flags */ ++ ++ { 0, "" }, ++ ++ /* Complex flags */ ++ ++ { SLOG_V_MICROTIME, "minimal" }, ++ { SLOG_V_MICROTIME|SLOG_V_QUERY_PLAN, "standard" }, ++ { SLOG_V_MICROTIME|SLOG_V_QUERY_PLAN|SLOG_V_INNODB, "full" }, ++ ++ /* End of complex flags */ ++ ++ { SLOG_V_INVALID, (char *)0 } ++}; ++ ++#define QPLAN_NONE 0 ++#define QPLAN_QC 1 << 0 ++#define QPLAN_QC_NO 1 << 1 ++#define QPLAN_FULL_SCAN 1 << 2 ++#define QPLAN_FULL_JOIN 1 << 3 ++#define QPLAN_TMP_TABLE 1 << 4 ++#define QPLAN_TMP_DISK 1 << 5 ++#define QPLAN_FILESORT 1 << 6 ++#define QPLAN_FILESORT_DISK 1 << 7 ++/* ... */ ++#define QPLAN_MAX 1 << 31 ++ ++#define SLOG_F_QC_NO QPLAN_QC_NO ++#define SLOG_F_FULL_SCAN QPLAN_FULL_SCAN ++#define SLOG_F_FULL_JOIN QPLAN_FULL_JOIN ++#define SLOG_F_TMP_TABLE QPLAN_TMP_TABLE ++#define SLOG_F_TMP_DISK QPLAN_TMP_DISK ++#define SLOG_F_FILESORT QPLAN_FILESORT ++#define SLOG_F_FILESORT_DISK QPLAN_FILESORT_DISK ++#define SLOG_F_INVALID 1 << 31 ++#define SLOG_F_NONE 0 ++ ++static const struct msl_opts slog_filter[]= ++{ ++ { SLOG_F_QC_NO, "qc_miss" }, ++ { SLOG_F_FULL_SCAN, "full_scan" }, ++ { SLOG_F_FULL_JOIN, "full_join" }, ++ { SLOG_F_TMP_TABLE, "tmp_table" }, ++ { SLOG_F_TMP_DISK, "tmp_table_on_disk" }, ++ { SLOG_F_FILESORT, "filesort" }, ++ { SLOG_F_FILESORT_DISK, "filesort_on_disk" }, ++ { SLOG_F_INVALID, (char *)0 } ++}; + + enum enum_parsing_place + { +@@ -1345,6 +1417,7 @@ + extern bool using_update_log, opt_large_files, server_id_supplied; + extern bool opt_update_log, opt_bin_log, opt_error_log; + extern my_bool opt_log, opt_slow_log, opt_log_queries_not_using_indexes; ++extern char *opt_slow_logname; + extern bool opt_disable_networking, opt_skip_show_db; + extern my_bool opt_character_set_client_handshake; + extern bool volatile abort_loop, shutdown_in_progress, grant_option; +@@ -1356,7 +1429,8 @@ + extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs; + extern my_bool opt_secure_auth; + extern char* opt_secure_file_priv; +-extern my_bool opt_log_slow_admin_statements; ++extern my_bool opt_log_slow_admin_statements, opt_log_slow_slave_statements; ++extern my_bool opt_use_global_long_query_time; + extern my_bool sp_automatic_privileges, opt_noacl; + extern my_bool opt_old_style_user_limits, trust_function_creators; + extern uint opt_crash_binlog_innodb; +diff -r 25523be1816e sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -175,7 +175,6 @@ + static void getvolumename(); + static void getvolumeID(BYTE *volumeName); + #endif /* __NETWARE__ */ +- + + #ifdef _AIX41 + int initgroups(const char *,unsigned int); +@@ -409,10 +408,13 @@ + my_bool opt_secure_auth= 0; + char* opt_secure_file_priv= 0; + my_bool opt_log_slow_admin_statements= 0; ++my_bool opt_log_slow_slave_statements= 0; ++my_bool opt_use_global_long_query_time= 0; + my_bool lower_case_file_system= 0; + my_bool opt_large_pages= 0; + uint opt_large_page_size= 0; + my_bool opt_old_style_user_limits= 0, trust_function_creators= 0; ++char* opt_slow_logname= 0; + /* + True if there is at least one per-hour limit for some user, so we should + check them before each query (and possibly reset counters when hour is +@@ -507,6 +508,7 @@ + Ge_creator ge_creator; + Le_creator le_creator; + ++ulonglong frequency= 0; + + FILE *bootstrap_file; + int bootstrap_error; +@@ -584,7 +586,7 @@ + static int cleanup_done; + static ulong opt_specialflag, opt_myisam_block_size; + static char *opt_logname, *opt_update_logname, *opt_binlog_index_name; +-static char *opt_slow_logname, *opt_tc_heuristic_recover; ++static char *opt_tc_heuristic_recover; + static char *mysql_home_ptr, *pidfile_name_ptr; + static char **defaults_argv; + static char *opt_bin_logname; +@@ -3655,6 +3657,8 @@ + unireg_abort(1); + } + } ++ if (!QueryPerformanceFrequency((LARGE_INTEGER *)&frequency)) ++ frequency= 0; + #endif /* __WIN__ */ + + if (init_common_variables(MYSQL_CONFIG_NAME, +@@ -4901,7 +4905,7 @@ + OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE, + OPT_KEY_BUFFER_SIZE, OPT_KEY_CACHE_BLOCK_SIZE, + OPT_KEY_CACHE_DIVISION_LIMIT, OPT_KEY_CACHE_AGE_THRESHOLD, +- OPT_LONG_QUERY_TIME, ++ OPT_LONG_QUERY_TIME, OPT_MIN_EXAMINED_ROW_LIMIT, + OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET, + OPT_MAX_BINLOG_CACHE_SIZE, OPT_MAX_BINLOG_SIZE, + OPT_MAX_CONNECTIONS, OPT_MAX_CONNECT_ERRORS, +@@ -4992,11 +4996,18 @@ + OPT_TIMED_MUTEXES, + OPT_OLD_STYLE_USER_LIMITS, + OPT_LOG_SLOW_ADMIN_STATEMENTS, ++ OPT_LOG_SLOW_SLAVE_STATEMENTS, ++ OPT_LOG_SLOW_RATE_LIMIT, ++ OPT_LOG_SLOW_VERBOSITY, ++ OPT_LOG_SLOW_FILTER, + OPT_TABLE_LOCK_WAIT_TIMEOUT, + OPT_PLUGIN_DIR, + OPT_PORT_OPEN_TIMEOUT, + OPT_MERGE, + OPT_PROFILING, ++ OPT_SLOW_LOG, ++ OPT_SLOW_QUERY_LOG_FILE, ++ OPT_USE_GLOBAL_LONG_QUERY_TIME, + OPT_INNODB_ROLLBACK_ON_TIMEOUT, + OPT_SECURE_FILE_PRIV, + OPT_KEEP_FILES_ON_CREATE, +@@ -5386,8 +5396,17 @@ + (gptr*) &opt_log_slow_admin_statements, + (gptr*) &opt_log_slow_admin_statements, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, ++ {"log-slow-slave-statements", OPT_LOG_SLOW_SLAVE_STATEMENTS, ++ "Log slow replicated statements to the slow log if it is open.", ++ (gptr*) &opt_log_slow_slave_statements, ++ (gptr*) &opt_log_slow_slave_statements, ++ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"log-slow-queries", OPT_SLOW_QUERY_LOG, + "Log slow queries to this log file. Defaults logging to hostname-slow.log file. Must be enabled to activate other slow log options.", ++ (gptr*) &opt_slow_logname, (gptr*) &opt_slow_logname, 0, GET_STR, OPT_ARG, ++ 0, 0, 0, 0, 0, 0}, ++ {"slow_query_log_file", OPT_SLOW_QUERY_LOG_FILE, ++ "Log slow queries to given log file. Defaults logging to hostname-slow.log. Must be enabled to activate other slow log options.", + (gptr*) &opt_slow_logname, (gptr*) &opt_slow_logname, 0, GET_STR, OPT_ARG, + 0, 0, 0, 0, 0, 0}, + {"log-tc", OPT_LOG_TC, +@@ -5753,6 +5772,9 @@ + "Tells the slave thread to continue replication when a query returns an error from the provided list.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + #endif ++ {"slow-query-log", OPT_SLOW_LOG, ++ "Enable|disable slow query log", (gptr*) &opt_slow_log, ++ (gptr*) &opt_slow_log, 0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"socket", OPT_SOCKET, "Socket file to use for connection.", + (gptr*) &mysqld_unix_port, (gptr*) &mysqld_unix_port, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +@@ -6055,11 +6077,31 @@ + (gptr*) 0, + 0, (GET_ULONG | GET_ASK_ADDR) , REQUIRED_ARG, 100, + 1, 100, 0, 1, 0}, +- {"long_query_time", OPT_LONG_QUERY_TIME, +- "Log all queries that have taken more than long_query_time seconds to execute to file.", +- (gptr*) &global_system_variables.long_query_time, +- (gptr*) &max_system_variables.long_query_time, 0, GET_ULONG, +- REQUIRED_ARG, 10, 1, LONG_TIMEOUT, 0, 1, 0}, ++ {"log_slow_filter", OPT_LOG_SLOW_FILTER, ++ "Log only the queries that followed certain execution plan. Multiple flags allowed in a comma-separated string. [qc_miss, full_scan, full_join, tmp_table, tmp_table_on_disk, filesort, filesort_on_disk]", ++ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, SLOG_F_NONE, 0, 0}, ++ {"log_slow_rate_limit", OPT_LOG_SLOW_RATE_LIMIT, ++ "Rate limit statement writes to slow log to only those from every (1/log_slow_rate_limit) session.", ++ (gptr*) &global_system_variables.log_slow_rate_limit, ++ (gptr*) &max_system_variables.log_slow_rate_limit, 0, GET_ULONG, ++ REQUIRED_ARG, 1, 1, LONG_MAX, 0, 1L, 0}, ++ {"log_slow_verbosity", OPT_LOG_SLOW_VERBOSITY, ++ "Choose how verbose the messages to your slow log will be. Multiple flags allowed in a comma-separated string. [microtime, query_plan, innodb]", ++ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, SLOG_V_MICROTIME, 0, 0}, ++ {"long_query_time", OPT_LONG_QUERY_TIME, ++ "Log all queries that have taken more than long_query_time seconds to execute to file.", ++ (gptr*) &global_system_variables.long_query_time, ++ (gptr*) &max_system_variables.long_query_time, 0, GET_DOUBLE, ++ REQUIRED_ARG, 10000000, 0, LONG_TIMEOUT * 1000000, 0, 1, 0}, ++ {"min_examined_row_limit", OPT_MIN_EXAMINED_ROW_LIMIT, ++ "Don't log queries which examine less than min_examined_row_limit rows to file.", ++ (gptr*) &global_system_variables.min_examined_row_limit, ++ (gptr*) &max_system_variables.min_examined_row_limit, 0, GET_ULONG, ++ REQUIRED_ARG, 0, 0, LONG_MAX, 0, 1L, 0}, ++ {"use_global_long_query_time", OPT_USE_GLOBAL_LONG_QUERY_TIME, ++ "Control always use global long_query_time or local long_query_time.", ++ (gptr*) &opt_use_global_long_query_time, (gptr*) &opt_use_global_long_query_time, ++ 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0}, + {"lower_case_table_names", OPT_LOWER_CASE_TABLE_NAMES, + "If set to 1 table names are stored in lowercase on disk and table names will be case-insensitive. Should be set to 2 if you are using a case insensitive file system", + (gptr*) &lower_case_table_names, +@@ -6835,7 +6873,11 @@ + global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; + max_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; + global_system_variables.old_passwords= 0; +- ++ global_system_variables.long_query_time = 10000000; ++ max_system_variables.long_query_time = LONG_TIMEOUT * 1000000; ++ global_system_variables.log_slow_verbosity= SLOG_V_MICROTIME; ++ global_system_variables.log_slow_filter= SLOG_F_NONE; ++ + /* + Default behavior for 4.1 and 5.0 is to treat NULL values as unequal + when collecting index statistics for MyISAM tables. +@@ -7296,6 +7338,35 @@ + case OPT_BOOTSTRAP: + opt_noacl=opt_bootstrap=1; + break; ++ case OPT_LOG_SLOW_FILTER: ++ if ((global_system_variables.log_slow_filter= ++ msl_flag_resolve_by_name(slog_filter, argument, ++ SLOG_F_NONE, SLOG_F_INVALID)) == SLOG_F_INVALID) ++ { ++ fprintf(stderr,"Invalid argument in log_slow_filter: %s\n", argument); ++ exit(1); ++ } ++ break; ++ case OPT_LOG_SLOW_VERBOSITY: ++ if ((global_system_variables.log_slow_verbosity= ++ msl_flag_resolve_by_name(slog_verb, argument, ++ SLOG_V_NONE, SLOG_V_INVALID)) == SLOG_V_INVALID) ++ { ++ fprintf(stderr,"Invalid argument in log_slow_verbosity: %s\n", argument); ++ exit(1); ++ } ++ break; ++ case OPT_LONG_QUERY_TIME: ++ { ++ double doubleslow = strtod(argument,NULL); ++ if (doubleslow < 0 || doubleslow > (LONG_TIMEOUT)) ++ { ++ fprintf(stderr,"Out of range long_query_time value: %s\n", argument); ++ exit(1); ++ } ++ global_system_variables.long_query_time = (ulonglong) (doubleslow * 1000000); ++ break; ++ } + case OPT_STORAGE_ENGINE: + { + if ((enum db_type)((global_system_variables.table_type= +@@ -7628,10 +7699,14 @@ + if (opt_bdb) + sql_print_warning("this binary does not contain BDB storage engine"); + #endif +- if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes) && ++ if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes || ++ opt_log_slow_slave_statements) && + !opt_slow_log) +- sql_print_warning("options --log-slow-admin-statements and --log-queries-not-using-indexes have no effect if --log-slow-queries is not set"); +- ++ { ++ sql_print_warning("options --log-slow-admin-statements, --log-slow-slave-statements and --log-queries-not-using-indexes have no effect if --log-slow-queries is not set"); ++ opt_log_slow_slave_statements= FALSE; ++ } ++ + if (argc > 0) + { + fprintf(stderr, "%s: Too many arguments (first extra is '%s').\nUse --help to get a list of available options\n", my_progname, *argv); +diff -r 25523be1816e sql/set_var.cc +--- a/sql/set_var.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/set_var.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -217,9 +217,13 @@ + sys_log_queries_not_using_indexes("log_queries_not_using_indexes", + &opt_log_queries_not_using_indexes); + sys_var_thd_ulong sys_log_warnings("log_warnings", &SV::log_warnings); +-sys_var_thd_ulong sys_long_query_time("long_query_time", ++sys_var_thd_microtime sys_long_query_time("long_query_time", + &SV::long_query_time); ++sys_var_bool_ptr sys_use_global_long_query_time("use_global_long_query_time", ++ &opt_use_global_long_query_time); + sys_var_bool_const_ptr sys_log_slow("log_slow_queries", &opt_slow_log); ++sys_var_log_slow sys_slow_query_log("slow_query_log", &opt_slow_log); ++sys_var_const_str_ptr sys_slow_query_log_file("slow_query_log_file", &opt_slow_logname); + sys_var_thd_bool sys_low_priority_updates("low_priority_updates", + &SV::low_priority_updates, + fix_low_priority_updates); +@@ -283,6 +285,8 @@ + &SV::max_tmp_tables); + sys_var_long_ptr sys_max_write_lock_count("max_write_lock_count", + &max_write_lock_count); ++sys_var_thd_ulong sys_min_examined_row_limit("min_examined_row_limit", ++ &SV::min_examined_row_limit); + sys_var_thd_ulong sys_multi_range_count("multi_range_count", + &SV::multi_range_count); + sys_var_long_ptr sys_myisam_data_pointer_size("myisam_data_pointer_size", +@@ -327,6 +331,20 @@ + sys_var_bool_ptr sys_relay_log_purge("relay_log_purge", + &relay_log_purge); + #endif ++sys_var_thd_ulong sys_log_slow_rate_limit("log_slow_rate_limit", ++ &SV::log_slow_rate_limit); ++sys_var_thd_msl_flag sys_log_slow_filter("log_slow_filter", ++ &SV::log_slow_filter, ++ SLOG_F_NONE, ++ SLOG_F_NONE, ++ SLOG_F_INVALID, ++ slog_filter); ++sys_var_thd_msl_flag sys_log_slow_verbosity("log_slow_verbosity", ++ &SV::log_slow_verbosity, ++ SLOG_V_NONE, ++ SLOG_V_MICROTIME, ++ SLOG_V_INVALID, ++ slog_verb); + sys_var_long_ptr sys_rpl_recovery_rank("rpl_recovery_rank", + &rpl_recovery_rank); + sys_var_long_ptr sys_query_cache_size("query_cache_size", +@@ -694,6 +712,10 @@ + &sys_log_off, + &sys_log_queries_not_using_indexes, + &sys_log_slow, ++ &sys_log_slow_filter, ++ &sys_log_slow_rate_limit, ++ &sys_log_slow_verbosity, ++ &sys_use_global_long_query_time, + &sys_log_update, + &sys_log_warnings, + &sys_long_query_time, +@@ -717,6 +738,7 @@ + &sys_max_tmp_tables, + &sys_max_user_connections, + &sys_max_write_lock_count, ++ &sys_min_examined_row_limit, + &sys_multi_range_count, + &sys_myisam_data_pointer_size, + &sys_myisam_max_sort_file_size, +@@ -770,6 +792,8 @@ + &sys_slave_skip_counter, + #endif + &sys_slow_launch_time, ++ &sys_slow_query_log, ++ &sys_slow_query_log_file, + &sys_sort_buffer, + &sys_sql_big_tables, + &sys_sql_low_priority_updates, +@@ -986,8 +1010,11 @@ + {"log_slave_updates", (char*) &opt_log_slave_updates, SHOW_MY_BOOL}, + #endif + {sys_log_slow.name, (char*) &sys_log_slow, SHOW_SYS}, ++ {sys_log_slow_filter.name, (char*) &sys_log_slow_filter, SHOW_SYS}, ++ {sys_log_slow_rate_limit.name, (char*) &sys_log_slow_rate_limit, SHOW_SYS}, ++ {sys_log_slow_verbosity.name, (char*) &sys_log_slow_verbosity, SHOW_SYS}, + {sys_log_warnings.name, (char*) &sys_log_warnings, SHOW_SYS}, +- {sys_long_query_time.name, (char*) &sys_long_query_time, SHOW_SYS}, ++ {sys_long_query_time.name, (char*) &sys_long_query_time, SHOW_MICROTIME}, + {sys_low_priority_updates.name, (char*) &sys_low_priority_updates, SHOW_SYS}, + {"lower_case_file_system", (char*) &lower_case_file_system, SHOW_MY_BOOL}, + {"lower_case_table_names", (char*) &lower_case_table_names, SHOW_INT}, +@@ -1014,6 +1041,7 @@ + {sys_max_tmp_tables.name, (char*) &sys_max_tmp_tables, SHOW_SYS}, + {sys_max_user_connections.name,(char*) &sys_max_user_connections, SHOW_SYS}, + {sys_max_write_lock_count.name, (char*) &sys_max_write_lock_count,SHOW_SYS}, ++ {sys_min_examined_row_limit.name, (char*) &sys_min_examined_row_limit, SHOW_SYS}, + {sys_multi_range_count.name, (char*) &sys_multi_range_count, SHOW_SYS}, + {sys_myisam_data_pointer_size.name, (char*) &sys_myisam_data_pointer_size, SHOW_SYS}, + {sys_myisam_max_sort_file_size.name, (char*) &sys_myisam_max_sort_file_size, +@@ -1101,6 +1129,8 @@ + {sys_slave_trans_retries.name,(char*) &sys_slave_trans_retries, SHOW_SYS}, + #endif + {sys_slow_launch_time.name, (char*) &sys_slow_launch_time, SHOW_SYS}, ++ {sys_slow_query_log.name, (char*) &sys_slow_query_log, SHOW_SYS}, ++ {sys_slow_query_log_file.name,(char*) &sys_slow_query_log_file, SHOW_SYS}, + #ifdef HAVE_SYS_UN_H + {"socket", (char*) &mysqld_unix_port, SHOW_CHAR_PTR}, + #endif +@@ -1141,6 +1171,7 @@ + {sys_tx_isolation.name, (char*) &sys_tx_isolation, SHOW_SYS}, + {sys_updatable_views_with_limit.name, + (char*) &sys_updatable_views_with_limit,SHOW_SYS}, ++ {sys_use_global_long_query_time.name, (char*) &sys_use_global_long_query_time, SHOW_SYS}, + {sys_version.name, (char*) &sys_version, SHOW_SYS}, + #ifdef HAVE_BERKELEY_DB + {sys_version_bdb.name, (char*) &sys_version_bdb, SHOW_SYS}, +@@ -1769,6 +1799,17 @@ + } + + ++bool sys_var_thd_microtime::check(THD *thd, set_var *var) ++{ ++ if (var->value->result_type() == DECIMAL_RESULT) ++ var->save_result.ulonglong_value= (ulonglong)(var->value->val_real() * 1000000); ++ else ++ var->save_result.ulonglong_value= (ulonglong)(var->value->val_int() * 1000000); ++ ++ return 0; ++} ++ ++ + bool sys_var_thd_bool::update(THD *thd, set_var *var) + { + if (var->type == OPT_GLOBAL) +@@ -1924,6 +1965,19 @@ + value= *(longlong*) value_ptr(thd, var_type, base); + pthread_mutex_unlock(&LOCK_global_system_variables); + return new Item_int(value); ++ } ++ case SHOW_MICROTIME: ++ { ++ longlong value; ++ char buff[80]; ++ int len; ++ ++ pthread_mutex_lock(&LOCK_global_system_variables); ++ value= *(longlong*) value_ptr(thd, var_type, base); ++ pthread_mutex_unlock(&LOCK_global_system_variables); ++ ++ len = snprintf(buff, 80, "%f", ((double) value) / 1000000.0); ++ return new Item_float(buff,len); + } + case SHOW_HA_ROWS: + { +@@ -2757,6 +2811,30 @@ + } + + ++bool sys_var_log_slow::update(THD *thd, set_var *var) ++{ ++ bool ret; ++ ++ pthread_mutex_lock(&LOCK_global_system_variables); ++ if (var->save_result.ulong_value) ++ { ++ if(!mysql_slow_log.is_open()) ++ { ++ mysql_slow_log.open_slow_log(opt_slow_logname); ++ } ++ } ++ pthread_mutex_unlock(&LOCK_global_system_variables); ++ ++ ret = sys_var_bool_ptr::update(thd, var); ++ ++#ifdef HAVE_INNOBASE_DB ++ innobase_update_var_slow_log(); ++#endif ++ ++ return(ret); ++} ++ ++ + #ifdef HAVE_REPLICATION + bool sys_var_slave_skip_counter::check(THD *thd, set_var *var) + { +@@ -3519,6 +3597,191 @@ + #endif + } + ++/* Slow log stuff */ ++ ++ulong msl_option_resolve_by_name(const struct msl_opts *opts, const char *name, ulong len) ++{ ++ ulong i; ++ ++ for (i=0; opts[i].name; i++) ++ { ++ if (!my_strnncoll(&my_charset_latin1, ++ (const uchar *)name, len, ++ (const uchar *)opts[i].name, strlen(opts[i].name))) ++ return opts[i].val; ++ } ++ return opts[i].val; ++} ++ ++ulong msl_flag_resolve_by_name(const struct msl_opts *opts, const char *names_list, ++ const ulong none_val, const ulong invalid_val) ++{ ++ const char *p, *e; ++ ulong val= none_val; ++ ++ if (!*names_list) ++ return val; ++ ++ for (p= e= names_list; ; e++) ++ { ++ ulong i; ++ ++ if (*e != ',' && *e) ++ continue; ++ for (i=0; opts[i].name; i++) ++ { ++ if (!my_strnncoll(&my_charset_latin1, ++ (const uchar *)p, e - p, ++ (const uchar *)opts[i].name, strlen(opts[i].name))) ++ { ++ val= val | opts[i].val; ++ break; ++ } ++ } ++ if (opts[i].val == invalid_val) ++ return invalid_val; ++ if (!*e) ++ break; ++ p= e + 1; ++ } ++ return val; ++} ++ ++const char *msl_option_get_name(const struct msl_opts *opts, ulong val) ++{ ++ for (ulong i=0; opts[i].name && opts[i].name[0]; i++) ++ { ++ if (opts[i].val == val) ++ return opts[i].name; ++ } ++ return "*INVALID*"; ++} ++ ++char *msl_flag_get_name(const struct msl_opts *opts, char *buf, ulong val) ++{ ++ uint offset= 0; ++ ++ *buf= '\0'; ++ for (ulong i=0; opts[i].name && opts[i].name[0]; i++) ++ { ++ if (opts[i].val & val) ++ offset+= snprintf(buf+offset, STRING_BUFFER_USUAL_SIZE - offset - 1, ++ "%s%s", (offset ? "," : ""), opts[i].name); ++ } ++ return buf; ++} ++ ++/**************************************************************************** ++ Functions to handle log_slow_verbosity ++****************************************************************************/ ++ ++/* Based upon sys_var::check_enum() */ ++ ++bool sys_var_thd_msl_option::check(THD *thd, set_var *var) ++{ ++ char buff[STRING_BUFFER_USUAL_SIZE]; ++ String str(buff, sizeof(buff), &my_charset_latin1), *res; ++ ++ if (var->value->result_type() == STRING_RESULT) ++ { ++ ulong verb= this->invalid_val; ++ if (!(res=var->value->val_str(&str)) || ++ (var->save_result.ulong_value= ++ (ulong) (verb= msl_option_resolve_by_name(this->opts, res->ptr(), res->length()))) == this->invalid_val) ++ goto err; ++ return 0; ++ } ++ ++err: ++ my_error(ER_WRONG_ARGUMENTS, MYF(0), var->var->name); ++ return 1; ++} ++ ++byte *sys_var_thd_msl_option::value_ptr(THD *thd, enum_var_type type, ++ LEX_STRING *base) ++{ ++ ulong val; ++ val= ((type == OPT_GLOBAL) ? global_system_variables.*offset : ++ thd->variables.*offset); ++ const char *verbosity= msl_option_get_name(this->opts, val); ++ return (byte *) verbosity; ++} ++ ++ ++void sys_var_thd_msl_option::set_default(THD *thd, enum_var_type type) ++{ ++ if (type == OPT_GLOBAL) ++ global_system_variables.*offset= (ulong) this->default_val; ++ else ++ thd->variables.*offset= (ulong) (global_system_variables.*offset); ++} ++ ++ ++bool sys_var_thd_msl_option::update(THD *thd, set_var *var) ++{ ++ if (var->type == OPT_GLOBAL) ++ global_system_variables.*offset= var->save_result.ulong_value; ++ else ++ thd->variables.*offset= var->save_result.ulong_value; ++ return 0; ++} ++ ++/**************************************************************************** ++ Functions to handle log_slow_filter ++****************************************************************************/ ++ ++/* Based upon sys_var::check_enum() */ ++ ++bool sys_var_thd_msl_flag::check(THD *thd, set_var *var) ++{ ++ char buff[2 * STRING_BUFFER_USUAL_SIZE]; ++ String str(buff, sizeof(buff), &my_charset_latin1), *res; ++ ++ if (var->value->result_type() == STRING_RESULT) ++ { ++ ulong filter= this->none_val; ++ if (!(res=var->value->val_str(&str)) || ++ (var->save_result.ulong_value= ++ (ulong) (filter= msl_flag_resolve_by_name(this->flags, res->ptr(), this->none_val, ++ this->invalid_val))) == this->invalid_val) ++ goto err; ++ return 0; ++ } ++ ++err: ++ my_error(ER_WRONG_ARGUMENTS, MYF(0), var->var->name); ++ return 1; ++} ++ ++byte *sys_var_thd_msl_flag::value_ptr(THD *thd, enum_var_type type, ++ LEX_STRING *base) ++{ ++ ulong val; ++ val= ((type == OPT_GLOBAL) ? global_system_variables.*offset : ++ thd->variables.*offset); ++ msl_flag_get_name(this->flags, this->flags_string, val); ++ return (byte *) this->flags_string; ++} ++ ++ ++void sys_var_thd_msl_flag::set_default(THD *thd, enum_var_type type) ++{ ++ if (type == OPT_GLOBAL) ++ global_system_variables.*offset= (ulong) this->default_val; ++ else ++ thd->variables.*offset= (ulong) (global_system_variables.*offset); ++} ++ ++ ++bool sys_var_thd_msl_flag::update(THD *thd, set_var *var) ++{ ++ if (var->type == OPT_GLOBAL) ++ global_system_variables.*offset= var->save_result.ulong_value; ++ else ++ thd->variables.*offset= var->save_result.ulong_value; ++ return 0; ++} ++ + /**************************************************************************** + Functions to handle table_type + ****************************************************************************/ +diff -r 25523be1816e sql/set_var.h +--- a/sql/set_var.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/set_var.h Mon Dec 22 00:26:39 2008 -0800 +@@ -132,6 +132,7 @@ + }; + + ++ + class sys_var_ulonglong_ptr :public sys_var + { + public: +@@ -168,6 +169,13 @@ + bool check_update_type(Item_result type) { return 0; } + }; + ++class sys_var_log_slow :public sys_var_bool_ptr ++{ ++public: ++ sys_var_log_slow(const char *name_arg, my_bool *value_arg) ++ :sys_var_bool_ptr(name_arg, value_arg) {} ++ bool update(THD *thd, set_var *var); ++}; + + class sys_var_bool_const_ptr : public sys_var + { +@@ -340,7 +348,6 @@ + } + }; + +- + class sys_var_thd_ulong :public sys_var_thd + { + sys_check_func check_func; +@@ -360,7 +367,6 @@ + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); + }; + +- + class sys_var_thd_ha_rows :public sys_var_thd + { + public: +@@ -377,7 +383,6 @@ + SHOW_TYPE show_type() { return SHOW_HA_ROWS; } + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); + }; +- + + class sys_var_thd_ulonglong :public sys_var_thd + { +@@ -407,6 +412,19 @@ + } + }; + ++class sys_var_thd_microtime :public sys_var_thd_ulonglong ++{ ++public: ++ sys_var_thd_microtime(const char *name_arg, ulonglong SV::*offset_arg) ++ :sys_var_thd_ulonglong(name_arg, offset_arg) ++ {} ++ SHOW_TYPE show_type() { return SHOW_MICROTIME; } ++ bool check(THD *thd, set_var *var); ++ bool check_update_type(Item_result type) ++ { ++ return type != INT_RESULT && type != DECIMAL_RESULT; ++ } ++}; + + class sys_var_thd_bool :public sys_var_thd + { +@@ -477,6 +495,66 @@ + ulong *length); + }; + ++ ++class sys_var_thd_msl_option :public sys_var_thd ++{ ++protected: ++ ulong SV::*offset; ++ const ulong none_val; ++ const ulong default_val; ++ const ulong invalid_val; ++ const struct msl_opts *opts; ++public: ++ sys_var_thd_msl_option(const char *name_arg, ulong SV::*offset_arg, ++ const ulong none_val_arg, ++ const ulong default_val_arg, ++ const ulong invalid_val_arg, ++ const struct msl_opts *opts_arg) ++ :sys_var_thd(name_arg), offset(offset_arg), none_val(none_val_arg), ++ default_val(default_val_arg), invalid_val(invalid_val_arg), ++ opts(opts_arg) ++ {} ++ bool check(THD *thd, set_var *var); ++ SHOW_TYPE show_type() { return SHOW_CHAR; } ++ bool check_update_type(Item_result type) ++ { ++ return type != STRING_RESULT; /* Only accept strings */ ++ } ++ void set_default(THD *thd, enum_var_type type); ++ bool update(THD *thd, set_var *var); ++ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); ++}; ++ ++ ++class sys_var_thd_msl_flag :public sys_var_thd ++{ ++protected: ++ char flags_string[2 * STRING_BUFFER_USUAL_SIZE]; ++ ulong SV::*offset; ++ const ulong none_val; ++ const ulong default_val; ++ const ulong invalid_val; ++ const struct msl_opts *flags; ++public: ++ sys_var_thd_msl_flag(const char *name_arg, ulong SV::*offset_arg, ++ const ulong none_val_arg, ++ const ulong default_val_arg, ++ const ulong invalid_val_arg, ++ const struct msl_opts *flags_arg) ++ :sys_var_thd(name_arg), offset(offset_arg), none_val(none_val_arg), ++ default_val(default_val_arg), invalid_val(invalid_val_arg), ++ flags(flags_arg) ++ {} ++ bool check(THD *thd, set_var *var); ++ SHOW_TYPE show_type() { return SHOW_CHAR; } ++ bool check_update_type(Item_result type) ++ { ++ return type != STRING_RESULT; /* Only accept strings */ ++ } ++ void set_default(THD *thd, enum_var_type type); ++ bool update(THD *thd, set_var *var); ++ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); ++}; + + class sys_var_thd_storage_engine :public sys_var_thd + { +@@ -1087,3 +1165,11 @@ + bool process_key_caches(int (* func) (const char *name, KEY_CACHE *)); + void delete_elements(I_List<NAMED_LIST> *list, + void (*free_element)(const char*, gptr)); ++ ++/* Slow log functions */ ++ ++ulong msl_option_resolve_by_name(const struct msl_opts *opts, const char *name, ulong len); ++ulong msl_flag_resolve_by_name(const struct msl_opts *opts, const char *names_list, ++ const ulong none_val, const ulong invalid_val); ++const char *msl_option_get_name(const struct msl_opts *opts, ulong val); ++char *msl_flag_get_name(const struct msl_opts *opts, char *buf, ulong val); +diff -r 25523be1816e sql/slave.cc +--- a/sql/slave.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/slave.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -2925,6 +2925,12 @@ + + MAX_LOG_EVENT_HEADER; /* note, incr over the global not session var */ + thd->slave_thread = 1; + set_slave_thread_options(thd); ++ if (opt_log_slow_slave_statements) ++ { ++ thd->enable_slow_log= TRUE; ++ /* Slave thread is excluded from rate limiting the slow log writes. */ ++ thd->write_to_slow_log= TRUE; ++ } + thd->client_capabilities = CLIENT_LOCAL_FILES; + thd->real_id=pthread_self(); + pthread_mutex_lock(&LOCK_thread_count); +diff -r 25523be1816e sql/sql_cache.cc +--- a/sql/sql_cache.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/sql_cache.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -1341,6 +1341,7 @@ + + thd->limit_found_rows = query->found_rows(); + thd->status_var.last_query_cost= 0.0; ++ thd->query_plan_flags|= QPLAN_QC; + + BLOCK_UNLOCK_RD(query_block); + DBUG_RETURN(1); // Result sent to client +@@ -1348,6 +1349,7 @@ + err_unlock: + STRUCT_UNLOCK(&structure_guard_mutex); + err: ++ thd->query_plan_flags|= QPLAN_QC_NO; + DBUG_RETURN(0); // Query was not cached + } + +diff -r 25523be1816e sql/sql_class.cc +--- a/sql/sql_class.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/sql_class.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -188,7 +188,7 @@ + lock_id(&main_lock_id), + user_time(0), in_sub_stmt(0), global_read_lock(0), is_fatal_error(0), + transaction_rollback_request(0), is_fatal_sub_stmt_error(0), +- rand_used(0), time_zone_used(0), ++ rand_used(0), time_zone_used(0), user_timer(0), + last_insert_id_used(0), last_insert_id_used_bin_log(0), insert_id_used(0), + clear_next_insert_id(0), in_lock_tables(0), bootstrap(0), + derived_tables_processing(FALSE), spcont(NULL), +@@ -2224,6 +2224,12 @@ + backup->cuted_fields= cuted_fields; + backup->client_capabilities= client_capabilities; + backup->savepoints= transaction.savepoints; ++ backup->innodb_io_reads= innodb_io_reads; ++ backup->innodb_io_read= innodb_io_read; ++ backup->innodb_io_reads_wait_timer= innodb_io_reads_wait_timer; ++ backup->innodb_lock_que_wait_timer= innodb_lock_que_wait_timer; ++ backup->innodb_innodb_que_wait_timer= innodb_innodb_que_wait_timer; ++ backup->innodb_page_access= innodb_page_access; + + if (!lex->requires_prelocking() || is_update_query(lex->sql_command)) + options&= ~OPTION_BIN_LOG; +@@ -2240,7 +2246,13 @@ + sent_row_count= 0; + cuted_fields= 0; + transaction.savepoints= 0; +- ++ innodb_io_reads= 0; ++ innodb_io_read= 0; ++ innodb_io_reads_wait_timer= 0; ++ innodb_lock_que_wait_timer= 0; ++ innodb_innodb_que_wait_timer= 0; ++ innodb_page_access= 0; ++ + /* Surpress OK packets in case if we will execute statements */ + net.no_send_ok= TRUE; + } +@@ -2293,6 +2305,12 @@ + */ + examined_row_count+= backup->examined_row_count; + cuted_fields+= backup->cuted_fields; ++ innodb_io_reads+= backup->innodb_io_reads; ++ innodb_io_read+= backup->innodb_io_read; ++ innodb_io_reads_wait_timer+= backup->innodb_io_reads_wait_timer; ++ innodb_lock_que_wait_timer+= backup->innodb_lock_que_wait_timer; ++ innodb_innodb_que_wait_timer+= backup->innodb_innodb_que_wait_timer; ++ innodb_page_access+= backup->innodb_page_access; + } + + +diff -r 25523be1816e sql/sql_class.h +--- a/sql/sql_class.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/sql_class.h Mon Dec 22 00:26:39 2008 -0800 +@@ -43,6 +43,7 @@ + extern char internal_table_name[2]; + extern char empty_c_string[1]; + extern const char **errmesg; ++extern ulonglong frequency; + + #define TC_LOG_PAGE_SIZE 8192 + #define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE) +@@ -314,7 +321,7 @@ + bool write(THD *thd, enum enum_server_command command, + const char *format, ...) ATTRIBUTE_FORMAT(printf, 4, 5); + bool write(THD *thd, const char *query, uint query_length, +- time_t query_start=0); ++ time_t query_start=0, ulonglong query_start_timer=0); + bool write(Log_event* event_info); // binary log write + bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event); + +@@ -520,13 +527,14 @@ + ulong auto_increment_increment, auto_increment_offset; + ulong bulk_insert_buff_size; + ulong join_buff_size; +- ulong long_query_time; ++ ulonglong long_query_time; + ulong max_allowed_packet; + ulong max_error_count; + ulong max_length_for_sort_data; + ulong max_sort_length; + ulong max_tmp_tables; + ulong max_insert_delayed_threads; ++ ulong min_examined_row_limit; + ulong multi_range_count; + ulong myisam_repair_threads; + ulong myisam_sort_buff_size; +@@ -542,10 +550,13 @@ + ulong preload_buff_size; + ulong profiling_history_size; + ulong query_cache_type; ++ ulong log_slow_rate_limit; + ulong read_buff_size; + ulong read_rnd_buff_size; + ulong div_precincrement; + ulong sortbuff_size; ++ ulong log_slow_filter; ++ ulong log_slow_verbosity; + ulong table_type; + ulong tx_isolation; + ulong completion_type; +@@ -1121,6 +1132,12 @@ + uint in_sub_stmt; + bool enable_slow_log, insert_id_used, clear_next_insert_id; + bool last_insert_id_used; ++ ulong innodb_io_reads; ++ ulonglong innodb_io_read; ++ ulong innodb_io_reads_wait_timer; ++ ulong innodb_lock_que_wait_timer; ++ ulong innodb_innodb_que_wait_timer; ++ ulong innodb_page_access; + my_bool no_send_ok; + SAVEPOINT *savepoints; + }; +@@ -1177,6 +1194,11 @@ + class THD :public Statement, + public Open_tables_state + { ++private: ++ inline ulonglong query_start_timer() { return start_timer; } ++ inline void set_timer() { if (user_timer) start_timer=timer_after_lock=user_timer; else timer_after_lock=my_timer(&start_timer, frequency); } ++ inline void end_timer() { my_timer(&start_timer, frequency); } ++ inline void lock_timer() { my_timer(&timer_after_lock, frequency); } + public: + /* + Constant for THD::where initialization in the beginning of every query. +@@ -1285,10 +1307,24 @@ + */ + const char *where; + time_t start_time,time_after_lock,user_time; ++ ulonglong start_timer,timer_after_lock, user_timer; + time_t connect_time,thr_create_time; // track down slow pthread_create + thr_lock_type update_lock_default; + Delayed_insert *di; + ++ bool write_to_slow_log; ++ ++ bool innodb_was_used; ++ ulong innodb_io_reads; ++ ulonglong innodb_io_read; ++ ulong innodb_io_reads_wait_timer; ++ ulong innodb_lock_que_wait_timer; ++ ulong innodb_innodb_que_wait_timer; ++ ulong innodb_page_access; ++ ++ ulong query_plan_flags; ++ ulong query_plan_fsort_passes; ++ + /* <> 0 if we are inside of trigger or stored function. */ + uint in_sub_stmt; + +@@ -1678,11 +1714,11 @@ + sql_print_information("time() failed with %d", errno); + } + +- inline time_t query_start() { query_start_used=1; return start_time; } +- inline void set_time() { if (user_time) start_time=time_after_lock=user_time; else { safe_time(&start_time); time_after_lock= start_time; }} +- inline void end_time() { safe_time(&start_time); } +- inline void set_time(time_t t) { time_after_lock=start_time=user_time=t; } +- inline void lock_time() { safe_time(&time_after_lock); } ++ inline time_t query_start() { query_start_timer(); query_start_used=1; return start_time; } ++ inline void set_time() { set_timer(); if (user_time) start_time=time_after_lock=user_time; else { safe_time(&start_time); time_after_lock= start_time; }} ++ inline void end_time() { end_timer(); safe_time(&start_time); } ++ inline void set_time(time_t t) { set_timer(); time_after_lock=start_time=user_time=t; } ++ inline void lock_time() { lock_timer(); safe_time(&time_after_lock); } + inline void insert_id(ulonglong id_arg) + { + last_insert_id= id_arg; +diff -r 25523be1816e sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/sql_parse.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -20,6 +20,7 @@ + #include <m_ctype.h> + #include <myisam.h> + #include <my_dir.h> ++#include <my_time.h> + + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" +@@ -1227,6 +1228,15 @@ + my_net_set_read_timeout(net, thd->variables.net_read_timeout); + my_net_set_write_timeout(net, thd->variables.net_write_timeout); + ++ /* ++ If rate limiting of slow log writes is enabled, decide whether to log this ++ new thread's queries or not. Uses extremely simple algorithm. :) ++ */ ++ thd->write_to_slow_log= FALSE; ++ if (thd->variables.log_slow_rate_limit <= 1 || ++ (thd->thread_id % thd->variables.log_slow_rate_limit) == 0) ++ thd->write_to_slow_log= TRUE; ++ + while (!net->error && net->vio != 0 && + !(thd->killed == THD::KILL_CONNECTION)) + { +@@ -2353,27 +2363,55 @@ + return; // Don't set time for sub stmt + + start_of_query= thd->start_time; ++ ulonglong start_of_query_timer= thd->start_timer; + thd->end_time(); // Set start time ++ ++ ++ /* Follow the slow log filter configuration. */ ++ if (thd->variables.log_slow_filter != SLOG_F_NONE && ++ (!(thd->variables.log_slow_filter & thd->query_plan_flags) || ++ ((thd->variables.log_slow_filter & SLOG_F_QC_NO) && ++ (thd->query_plan_flags & QPLAN_QC)))) ++ return; ++ ++ /* ++ Low long_query_time value most likely means user is debugging stuff and even ++ though some thread's queries are not supposed to be logged b/c of the rate ++ limit, if one of them takes long enough (>= 1 second) it will be sensible ++ to make an exception and write to slow log anyway. ++ */ ++ if (opt_use_global_long_query_time) ++ thd->variables.long_query_time = global_system_variables.long_query_time; ++ if (thd->write_to_slow_log != TRUE && thd->variables.long_query_time < 1000000 && ++ (ulong) (thd->start_timer - thd->timer_after_lock) >= 1000000) ++ thd->write_to_slow_log= TRUE; ++ ++ /* Do not log this thread's queries due to rate limiting. */ ++ if (thd->write_to_slow_log != TRUE) ++ return; + + /* + Do not log administrative statements unless the appropriate option is + set; do not log into slow log if reading from backup. + */ +- if (thd->enable_slow_log && !thd->user_time) ++ if (thd->enable_slow_log && ++ (!thd->user_time || (thd->slave_thread && opt_log_slow_slave_statements)) ++ ) + { + thd_proc_info(thd, "logging slow query"); + +- if ((ulong) (thd->start_time - thd->time_after_lock) > +- thd->variables.long_query_time || +- (thd->server_status & +- (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) && +- opt_log_queries_not_using_indexes && +- /* == SQLCOM_END unless this is a SHOW command */ +- thd->lex->orig_sql_command == SQLCOM_END) ++ if (((ulong) (thd->start_timer - thd->timer_after_lock) >= ++ thd->variables.long_query_time || ++ (thd->server_status & ++ (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) && ++ opt_log_queries_not_using_indexes && ++ /* == SQLCOM_END unless this is a SHOW command */ ++ thd->lex->orig_sql_command == SQLCOM_END) && ++ thd->examined_row_count >= thd->variables.min_examined_row_limit) + { + thd_proc_info(thd, "logging slow query"); + thd->status_var.long_query_count++; +- mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query); ++ mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query, start_of_query_timer); + } + } + } +@@ -2664,6 +2702,8 @@ + context.resolve_in_table_list_only((TABLE_LIST*)select_lex-> + table_list.first); + ++ /* Reset the counter at all cases for the extended slow query log */ ++ thd->row_count= 1; + /* + Reset warning count for each query that uses tables + A better approach would be to reset this for any commands +@@ -6084,6 +6120,15 @@ + thd->total_warn_count=0; // Warnings for this query + thd->rand_used= 0; + thd->sent_row_count= thd->examined_row_count= 0; ++ thd->innodb_was_used= FALSE; ++ thd->innodb_io_reads= 0; ++ thd->innodb_io_read= 0; ++ thd->innodb_io_reads_wait_timer= 0; ++ thd->innodb_lock_que_wait_timer= 0; ++ thd->innodb_innodb_que_wait_timer= 0; ++ thd->innodb_page_access= 0; ++ thd->query_plan_flags= QPLAN_NONE; ++ thd->query_plan_fsort_passes= 0; + } + DBUG_VOID_RETURN; + } +diff -r 25523be1816e sql/sql_select.cc +--- a/sql/sql_select.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/sql_select.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -6221,8 +6221,11 @@ + { + join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED; + if (statistics) ++ { + statistic_increment(join->thd->status_var.select_scan_count, + &LOCK_status); ++ join->thd->query_plan_flags|= QPLAN_FULL_SCAN; ++ } + } + } + else +@@ -6237,8 +6240,11 @@ + { + join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED; + if (statistics) ++ { + statistic_increment(join->thd->status_var.select_full_join_count, + &LOCK_status); ++ join->thd->query_plan_flags|= QPLAN_FULL_JOIN; ++ } + } + } + if (!table->no_keyread) +@@ -9302,6 +9308,7 @@ + (ulong) rows_limit,test(group))); + + statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status); ++ thd->query_plan_flags|= QPLAN_TMP_TABLE; + + if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) + temp_pool_slot = bitmap_set_next(&temp_pool); +@@ -10162,6 +10169,7 @@ + } + statistic_increment(table->in_use->status_var.created_tmp_disk_tables, + &LOCK_status); ++ table->in_use->query_plan_flags|= QPLAN_TMP_DISK; + table->s->db_record_offset= 1; + DBUG_RETURN(0); + err: +diff -r 25523be1816e sql/sql_show.cc +--- a/sql/sql_show.cc Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/sql_show.cc Mon Dec 22 00:26:39 2008 -0800 +@@ -1532,6 +1532,12 @@ + value= ((char *) status_var + (ulonglong) value); + case SHOW_LONGLONG: + end= longlong10_to_str(*(longlong*) value, buff, 10); ++ break; ++ case SHOW_MICROTIME: ++ show_type= ((sys_var*) value)->show_type(); ++ value= (char*) ((sys_var*) value)->value_ptr(thd, value_type, ++ &null_lex_str); ++ end= buff + sprintf(buff, "%f", (((double) (*(ulonglong*)value))) / 1000000.0); + break; + case SHOW_HA_ROWS: + end= longlong10_to_str((longlong) *(ha_rows*) value, buff, 10); +diff -r 25523be1816e sql/structs.h +--- a/sql/structs.h Mon Dec 22 00:25:06 2008 -0800 ++++ b/sql/structs.h Mon Dec 22 00:26:39 2008 -0800 +@@ -168,8 +168,8 @@ + enum SHOW_TYPE + { + SHOW_UNDEF, +- SHOW_LONG, SHOW_LONGLONG, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, +- SHOW_DOUBLE_STATUS, ++ SHOW_LONG, SHOW_LONGLONG, SHOW_MICROTIME, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, ++ SHOW_DOUBLE_STATUS, + SHOW_BOOL, SHOW_MY_BOOL, SHOW_OPENTABLES, SHOW_STARTTIME, + SHOW_LONG_CONST, SHOW_INT_CONST, SHOW_HAVE, SHOW_SYS, SHOW_HA_ROWS, + SHOW_VARS, diff --git a/percona/5.0.75-b12/mirror_binlog.patch b/percona/5.0.75-b12/mirror_binlog.patch new file mode 100644 index 0000000..4dae799 --- /dev/null +++ b/percona/5.0.75-b12/mirror_binlog.patch @@ -0,0 +1,2694 @@ +diff -r 66cc9e0a6768 mysql-test/lib/mtr_cases.pl +--- a/mysql-test/lib/mtr_cases.pl Thu Dec 04 21:37:12 2008 -0800 ++++ b/mysql-test/lib/mtr_cases.pl Thu Dec 04 21:46:15 2008 -0800 +@@ -334,6 +334,10 @@ + + $tinfo->{'slave_num'}= 1; # Default for rpl* tests, use one slave + ++ if ( $tname eq 'rpl_mirror_binlog' ) ++ { ++ $tinfo->{'slave_num'}= 3; ++ } + } + + if ( defined mtr_match_prefix($tname,"federated") ) +@@ -344,15 +348,20 @@ + + my $master_opt_file= "$testdir/$tname-master.opt"; + my $slave_opt_file= "$testdir/$tname-slave.opt"; +- my $slave_mi_file= "$testdir/$tname.slave-mi"; ++ my $slave_mi_files= ["$testdir/$tname.slave-mi", ++ "$testdir/$tname.1.slave-mi", ++ "$testdir/$tname.2.slave-mi"]; + my $master_sh= "$testdir/$tname-master.sh"; + my $slave_sh= "$testdir/$tname-slave.sh"; + my $disabled_file= "$testdir/$tname.disabled"; + my $im_opt_file= "$testdir/$tname-im.opt"; + +- $tinfo->{'master_opt'}= []; +- $tinfo->{'slave_opt'}= []; +- $tinfo->{'slave_mi'}= []; ++ $tinfo->{'master_opt'}= []; ++ $tinfo->{'slave_opt'}= []; ++ $tinfo->{'slave_mi'}= {}; ++ $tinfo->{'slave_mi'}{0}= []; ++ $tinfo->{'slave_mi'}{1}= []; ++ $tinfo->{'slave_mi'}{2}= []; + + if ( -f $master_opt_file ) + { +@@ -427,9 +436,14 @@ + push(@{$tinfo->{'slave_opt'}}, @$slave_opt); + } + +- if ( -f $slave_mi_file ) ++ my $mi_idx= 0; ++ foreach my $slave_mi_file ( @$slave_mi_files ) + { +- $tinfo->{'slave_mi'}= mtr_get_opts_from_file($slave_mi_file); ++ if ( -f $slave_mi_file ) ++ { ++ $tinfo->{'slave_mi'}{$mi_idx}= mtr_get_opts_from_file($slave_mi_file); ++ } ++ $mi_idx+= 1; + } + + if ( -f $master_sh ) +diff -r 66cc9e0a6768 mysql-test/mysql-test-run.pl +--- a/mysql-test/mysql-test-run.pl Thu Dec 04 21:37:12 2008 -0800 ++++ b/mysql-test/mysql-test-run.pl Thu Dec 04 21:46:15 2008 -0800 +@@ -275,6 +275,7 @@ + our $opt_stress_test_file= ""; + + our $opt_warnings; ++our $opt_slave_innodb= 0; + + our $opt_skip_ndbcluster= 0; + our $opt_skip_ndbcluster_slave= 0; +@@ -299,6 +300,8 @@ + our $used_binlog_format; + our $used_default_engine; + our $debug_compiled_binaries; ++ ++our $current_testname= ""; + + our %mysqld_variables; + +@@ -645,6 +648,7 @@ + 'testcase-timeout=i' => \$opt_testcase_timeout, + 'suite-timeout=i' => \$opt_suite_timeout, + 'warnings|log-warnings' => \$opt_warnings, ++ 'slave-innodb' => \$opt_slave_innodb, + + # Options which are no longer used + (map { $_ => \&warn_about_removed_option } @removed_options), +@@ -1001,6 +1005,14 @@ + { + $ENV{'BIG_TEST'}= 1; + } ++ ++ # -------------------------------------------------------------------------- ++ # Big test flags ++ # -------------------------------------------------------------------------- ++ if ( $opt_big_test ) ++ { ++ $ENV{'BIG_TEST'}= 1; ++ } + + # -------------------------------------------------------------------------- + # Gcov flag +@@ -1885,7 +1897,9 @@ + $ENV{'SLAVE_MYSOCK'}= $slave->[0]->{'path_sock'}; + $ENV{'SLAVE_MYPORT'}= $slave->[0]->{'port'}; + $ENV{'SLAVE_MYPORT1'}= $slave->[1]->{'port'}; ++ $ENV{'SLAVE_MYSOCK1'}= $slave->[1]->{'path_sock'}; + $ENV{'SLAVE_MYPORT2'}= $slave->[2]->{'port'}; ++ $ENV{'SLAVE_MYSOCK2'}= $slave->[2]->{'path_sock'}; + $ENV{'MYSQL_TCP_PORT'}= $mysqld_variables{'port'}; + $ENV{'DEFAULT_MASTER_PORT'}= $mysqld_variables{'master-port'}; + +@@ -2375,6 +2389,8 @@ + if ( ! $glob_win32 ) + { + symlink("$glob_mysql_test_dir/std_data", "$opt_vardir/std_data_ln"); ++ my @a = ("chmod", "-R", "o+r", "$glob_mysql_test_dir/std_data"); ++ system(@a) == 0 or die "system @ failed: $?" + } + else + { +@@ -3466,6 +3482,8 @@ + $ENV{'TZ'}= $tinfo->{'timezone'}; + mtr_verbose("Setting timezone: $tinfo->{'timezone'}"); + ++ $current_testname= $tinfo->{'name'}; ++ + my $master_restart= run_testcase_need_master_restart($tinfo); + my $slave_restart= run_testcase_need_slave_restart($tinfo); + +@@ -3881,7 +3899,8 @@ + unless $mysqld->{'type'} eq 'slave'; + + mtr_add_arg($args, "%s--init-rpl-role=slave", $prefix); +- if (! ( $opt_skip_slave_binlog || $skip_binlog )) ++ ++ if (! ($opt_skip_slave_binlog or ($current_testname eq 'rpl_mirror_binlog')) ) + { + mtr_add_arg($args, "%s--log-bin=%s/log/slave%s-bin", $prefix, + $opt_vardir, $sidx); # FIXME use own dir for binlogs +@@ -4568,7 +4587,7 @@ + if ( ! $slave->[$idx]->{'pid'} ) + { + mysqld_start($slave->[$idx],$tinfo->{'slave_opt'}, +- $tinfo->{'slave_mi'}); ++ $tinfo->{'slave_mi'}{$idx}); + + } + } +@@ -4580,7 +4599,6 @@ + # Wait for clusters to start + foreach my $cluster (@{$clusters}) + { +- + next if !$cluster->{'pid'}; + + if (ndbcluster_wait_started($cluster, "")) +@@ -5179,6 +5197,7 @@ + skip-im Don't start IM, and skip the IM test cases + big-test Set the environment variable BIG_TEST, which can be + checked from test cases. ++ + + Options that specify ports + +diff -r 66cc9e0a6768 mysql-test/r/rpl_mirror_binlog.result +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/r/rpl_mirror_binlog.result Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,441 @@ ++stop slave; ++drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; ++reset master; ++reset slave; ++drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; ++start slave; ++drop table if exists t1; ++create table t1(n int) engine = InnoDB; ++insert into t1 values (300); ++insert into t1 values (299); ++insert into t1 values (298); ++insert into t1 values (297); ++insert into t1 values (296); ++insert into t1 values (295); ++insert into t1 values (294); ++insert into t1 values (293); ++insert into t1 values (292); ++insert into t1 values (291); ++insert into t1 values (290); ++insert into t1 values (289); ++insert into t1 values (288); ++insert into t1 values (287); ++insert into t1 values (286); ++insert into t1 values (285); ++insert into t1 values (284); ++insert into t1 values (283); ++insert into t1 values (282); ++insert into t1 values (281); ++insert into t1 values (280); ++insert into t1 values (279); ++insert into t1 values (278); ++insert into t1 values (277); ++insert into t1 values (276); ++insert into t1 values (275); ++insert into t1 values (274); ++insert into t1 values (273); ++insert into t1 values (272); ++insert into t1 values (271); ++insert into t1 values (270); ++insert into t1 values (269); ++insert into t1 values (268); ++insert into t1 values (267); ++insert into t1 values (266); ++insert into t1 values (265); ++insert into t1 values (264); ++insert into t1 values (263); ++insert into t1 values (262); ++insert into t1 values (261); ++insert into t1 values (260); ++insert into t1 values (259); ++insert into t1 values (258); ++insert into t1 values (257); ++insert into t1 values (256); ++insert into t1 values (255); ++insert into t1 values (254); ++insert into t1 values (253); ++insert into t1 values (252); ++insert into t1 values (251); ++insert into t1 values (250); ++insert into t1 values (249); ++insert into t1 values (248); ++insert into t1 values (247); ++insert into t1 values (246); ++insert into t1 values (245); ++insert into t1 values (244); ++insert into t1 values (243); ++insert into t1 values (242); ++insert into t1 values (241); ++insert into t1 values (240); ++insert into t1 values (239); ++insert into t1 values (238); ++insert into t1 values (237); ++insert into t1 values (236); ++insert into t1 values (235); ++insert into t1 values (234); ++insert into t1 values (233); ++insert into t1 values (232); ++insert into t1 values (231); ++insert into t1 values (230); ++insert into t1 values (229); ++insert into t1 values (228); ++insert into t1 values (227); ++insert into t1 values (226); ++insert into t1 values (225); ++insert into t1 values (224); ++insert into t1 values (223); ++insert into t1 values (222); ++insert into t1 values (221); ++insert into t1 values (220); ++insert into t1 values (219); ++insert into t1 values (218); ++insert into t1 values (217); ++insert into t1 values (216); ++insert into t1 values (215); ++insert into t1 values (214); ++insert into t1 values (213); ++insert into t1 values (212); ++insert into t1 values (211); ++insert into t1 values (210); ++insert into t1 values (209); ++insert into t1 values (208); ++insert into t1 values (207); ++insert into t1 values (206); ++insert into t1 values (205); ++insert into t1 values (204); ++insert into t1 values (203); ++insert into t1 values (202); ++insert into t1 values (201); ++insert into t1 values (200); ++insert into t1 values (199); ++insert into t1 values (198); ++insert into t1 values (197); ++insert into t1 values (196); ++insert into t1 values (195); ++insert into t1 values (194); ++insert into t1 values (193); ++insert into t1 values (192); ++insert into t1 values (191); ++insert into t1 values (190); ++insert into t1 values (189); ++insert into t1 values (188); ++insert into t1 values (187); ++insert into t1 values (186); ++insert into t1 values (185); ++insert into t1 values (184); ++insert into t1 values (183); ++insert into t1 values (182); ++insert into t1 values (181); ++insert into t1 values (180); ++insert into t1 values (179); ++insert into t1 values (178); ++insert into t1 values (177); ++insert into t1 values (176); ++insert into t1 values (175); ++insert into t1 values (174); ++insert into t1 values (173); ++insert into t1 values (172); ++insert into t1 values (171); ++insert into t1 values (170); ++insert into t1 values (169); ++insert into t1 values (168); ++insert into t1 values (167); ++insert into t1 values (166); ++insert into t1 values (165); ++insert into t1 values (164); ++insert into t1 values (163); ++insert into t1 values (162); ++insert into t1 values (161); ++insert into t1 values (160); ++insert into t1 values (159); ++insert into t1 values (158); ++insert into t1 values (157); ++insert into t1 values (156); ++insert into t1 values (155); ++insert into t1 values (154); ++insert into t1 values (153); ++insert into t1 values (152); ++insert into t1 values (151); ++insert into t1 values (150); ++insert into t1 values (149); ++insert into t1 values (148); ++insert into t1 values (147); ++insert into t1 values (146); ++insert into t1 values (145); ++insert into t1 values (144); ++insert into t1 values (143); ++insert into t1 values (142); ++insert into t1 values (141); ++insert into t1 values (140); ++insert into t1 values (139); ++insert into t1 values (138); ++insert into t1 values (137); ++insert into t1 values (136); ++insert into t1 values (135); ++insert into t1 values (134); ++insert into t1 values (133); ++insert into t1 values (132); ++insert into t1 values (131); ++insert into t1 values (130); ++insert into t1 values (129); ++insert into t1 values (128); ++insert into t1 values (127); ++insert into t1 values (126); ++insert into t1 values (125); ++insert into t1 values (124); ++insert into t1 values (123); ++insert into t1 values (122); ++insert into t1 values (121); ++insert into t1 values (120); ++insert into t1 values (119); ++insert into t1 values (118); ++insert into t1 values (117); ++insert into t1 values (116); ++insert into t1 values (115); ++insert into t1 values (114); ++insert into t1 values (113); ++insert into t1 values (112); ++insert into t1 values (111); ++insert into t1 values (110); ++insert into t1 values (109); ++insert into t1 values (108); ++insert into t1 values (107); ++insert into t1 values (106); ++insert into t1 values (105); ++insert into t1 values (104); ++insert into t1 values (103); ++insert into t1 values (102); ++insert into t1 values (101); ++insert into t1 values (100); ++insert into t1 values (99); ++insert into t1 values (98); ++insert into t1 values (97); ++insert into t1 values (96); ++insert into t1 values (95); ++insert into t1 values (94); ++insert into t1 values (93); ++insert into t1 values (92); ++insert into t1 values (91); ++insert into t1 values (90); ++insert into t1 values (89); ++insert into t1 values (88); ++insert into t1 values (87); ++insert into t1 values (86); ++insert into t1 values (85); ++insert into t1 values (84); ++insert into t1 values (83); ++insert into t1 values (82); ++insert into t1 values (81); ++insert into t1 values (80); ++insert into t1 values (79); ++insert into t1 values (78); ++insert into t1 values (77); ++insert into t1 values (76); ++insert into t1 values (75); ++insert into t1 values (74); ++insert into t1 values (73); ++insert into t1 values (72); ++insert into t1 values (71); ++insert into t1 values (70); ++insert into t1 values (69); ++insert into t1 values (68); ++insert into t1 values (67); ++insert into t1 values (66); ++insert into t1 values (65); ++insert into t1 values (64); ++insert into t1 values (63); ++insert into t1 values (62); ++insert into t1 values (61); ++insert into t1 values (60); ++insert into t1 values (59); ++insert into t1 values (58); ++insert into t1 values (57); ++insert into t1 values (56); ++insert into t1 values (55); ++insert into t1 values (54); ++insert into t1 values (53); ++insert into t1 values (52); ++insert into t1 values (51); ++insert into t1 values (50); ++insert into t1 values (49); ++insert into t1 values (48); ++insert into t1 values (47); ++insert into t1 values (46); ++insert into t1 values (45); ++insert into t1 values (44); ++insert into t1 values (43); ++insert into t1 values (42); ++insert into t1 values (41); ++insert into t1 values (40); ++insert into t1 values (39); ++insert into t1 values (38); ++insert into t1 values (37); ++insert into t1 values (36); ++insert into t1 values (35); ++insert into t1 values (34); ++insert into t1 values (33); ++insert into t1 values (32); ++insert into t1 values (31); ++insert into t1 values (30); ++insert into t1 values (29); ++insert into t1 values (28); ++insert into t1 values (27); ++insert into t1 values (26); ++insert into t1 values (25); ++insert into t1 values (24); ++insert into t1 values (23); ++insert into t1 values (22); ++insert into t1 values (21); ++insert into t1 values (20); ++insert into t1 values (19); ++insert into t1 values (18); ++insert into t1 values (17); ++insert into t1 values (16); ++insert into t1 values (15); ++insert into t1 values (14); ++insert into t1 values (13); ++insert into t1 values (12); ++insert into t1 values (11); ++insert into t1 values (10); ++insert into t1 values (9); ++insert into t1 values (8); ++insert into t1 values (7); ++insert into t1 values (6); ++insert into t1 values (5); ++insert into t1 values (4); ++insert into t1 values (3); ++insert into t1 values (2); ++insert into t1 values (1); ++"The following are SLAVE." ++select count(distinct n) from t1; ++count(distinct n) ++300 ++select min(n) from t1; ++min(n) ++1 ++select max(n) from t1; ++max(n) ++300 ++show slave status; ++Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master ++Waiting for master to send event 127.0.0.1 root 9306 1 master-bin.000014 2849 # # master-bin.000014 Yes Yes # 0 0 2849 # None 0 No # ++show master status; ++File Position Binlog_Do_DB Binlog_Ignore_DB ++master-bin.000014 2849 ++"The following are SLAVE1." ++start slave; ++select count(distinct n) from t1; ++count(distinct n) ++300 ++select min(n) from t1; ++min(n) ++1 ++select max(n) from t1; ++max(n) ++300 ++show slave status; ++Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master ++Waiting for master to send event 127.0.0.1 root 9308 1 master-bin.000014 2849 # # master-bin.000014 Yes Yes # 0 0 2849 # None 0 No # ++"The following are SLAVE." ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++MASTER_SERVER_ID=2, ++INDEX='replication-log'; ++ERROR HY000: Could not initialize master info structure; more error messages can be found in the MySQL error log ++stop slave; ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++MASTER_SERVER_ID=2, ++INDEX='replication_log'; ++ERROR HY000: Could not initialize master info structure; more error messages can be found in the MySQL error log ++MAKE MASTER REVOKE SESSION WITH KILL; ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++MASTER_SERVER_ID=2, ++INDEX='replication_log' ++ WITH BINLOG; ++MAKE MASTER GRANT SESSION; ++delete from t1 where n > 250; ++select count(distinct n) from t1; ++count(distinct n) ++250 ++"The following are SLAVE1." ++select count(distinct n) from t1; ++count(distinct n) ++250 ++select min(n) from t1; ++min(n) ++1 ++select max(n) from t1; ++max(n) ++250 ++"The following are SLAVE2." ++start slave; ++select count(distinct n) from t1; ++count(distinct n) ++250 ++select min(n) from t1; ++min(n) ++1 ++select max(n) from t1; ++max(n) ++250 ++show slave status; ++Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master ++Waiting for master to send event 127.0.0.1 root 9308 1 master-bin.000015 189 # # master-bin.000015 Yes Yes # 0 0 189 # None 0 No # ++drop table t1; ++drop table t1; ++"The following are SLAVE." ++show master logs; ++Log_name File_size ++master-bin.000001 4214 ++master-bin.000002 4212 ++master-bin.000003 4212 ++master-bin.000004 4212 ++master-bin.000005 4212 ++master-bin.000006 4212 ++master-bin.000007 4212 ++master-bin.000008 4212 ++master-bin.000009 4212 ++master-bin.000010 4194 ++master-bin.000011 4190 ++master-bin.000012 4190 ++master-bin.000013 4190 ++master-bin.000014 2849 ++master-bin.000015 265 ++show master status; ++File Position Binlog_Do_DB Binlog_Ignore_DB ++master-bin.000015 265 ++"The following are SLAVE2." ++show master logs; ++Log_name File_size ++master-bin.000001 4214 ++master-bin.000002 4212 ++master-bin.000003 4212 ++master-bin.000004 4212 ++master-bin.000005 4212 ++master-bin.000006 4212 ++master-bin.000007 4212 ++master-bin.000008 4212 ++master-bin.000009 4212 ++master-bin.000010 4194 ++master-bin.000011 4190 ++master-bin.000012 4190 ++master-bin.000013 4190 ++master-bin.000014 2849 ++master-bin.000015 265 ++show master status; ++File Position Binlog_Do_DB Binlog_Ignore_DB ++master-bin.000015 265 ++purge master logs to 'master-bin.000006'; ++show master logs; ++Log_name File_size ++master-bin.000006 4212 ++master-bin.000007 4212 ++master-bin.000008 4212 ++master-bin.000009 4212 ++master-bin.000010 4194 ++master-bin.000011 4190 ++master-bin.000012 4190 ++master-bin.000013 4190 ++master-bin.000014 2849 ++master-bin.000015 265 ++reset master; ++ERROR HY000: Binlog closed, cannot RESET MASTER +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog-master.opt +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog-master.opt Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,1 @@ ++-O max_binlog_size=4096 +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog-slave.opt +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog-slave.opt Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,1 @@ ++--rpl_mirror_binlog_enabled=1 --log-bin-index=replication_log +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog.1.slave-mi +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog.1.slave-mi Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,1 @@ ++--master-user=root --master-connect-retry=1 --master-host=127.0.0.1 --master-password="" --master-port=9308 --server-id=3 +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog.2.slave-mi +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog.2.slave-mi Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,1 @@ ++--master-user=root --master-connect-retry=1 --master-host=127.0.0.1 --master-password="" --master-port=9308 --server-id=4 +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog.test +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog.test Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,119 @@ ++-- source include/master-slave.inc ++-- source include/have_innodb.inc ++connect (slave_sec,localhost,root,,test,$SLAVE_MYPORT1,$SLAVE_MYSOCK1); ++connect (slave_ter,localhost,root,,test,$SLAVE_MYPORT2,$SLAVE_MYSOCK2); ++ ++connection master; ++--disable_warnings ++drop table if exists t1; ++--enable_warnings ++create table t1(n int) engine = InnoDB; ++ ++let $i=300; ++while ($i) ++{ ++ eval insert into t1 values ($i); ++ dec $i; ++} ++ ++save_master_pos; ++ ++connection slave; ++sync_with_master; ++ ++echo "The following are SLAVE."; ++select count(distinct n) from t1; ++select min(n) from t1; ++select max(n) from t1; ++--replace_column 8 # 9 # 18 # 23 # 33 # ++show slave status; ++show master status; ++ ++connection slave_sec; ++echo "The following are SLAVE1."; ++start slave; ++sync_with_master; ++ ++select count(distinct n) from t1; ++select min(n) from t1; ++select max(n) from t1; ++--replace_column 8 # 9 # 18 # 23 # 33 # ++show slave status; ++ ++# make the slave the new master ++connection slave; ++echo "The following are SLAVE."; ++ ++# The first 1201 error is caused by running slave. ++--error 1201 ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++ MASTER_SERVER_ID=2, ++ INDEX='replication-log'; ++stop slave; ++ ++# The second 1201 error is caused by failover mode. ++--error 1201 ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++ MASTER_SERVER_ID=2, ++ INDEX='replication_log'; ++ ++MAKE MASTER REVOKE SESSION WITH KILL; ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++ MASTER_SERVER_ID=2, ++ INDEX='replication_log' ++ WITH BINLOG; ++ ++MAKE MASTER GRANT SESSION; ++ ++delete from t1 where n > 250; ++save_master_pos; ++ ++select count(distinct n) from t1; ++ ++connection slave_sec; ++echo "The following are SLAVE1."; ++ ++sync_with_master; ++select count(distinct n) from t1; ++select min(n) from t1; ++select max(n) from t1; ++ ++connection slave_ter; ++echo "The following are SLAVE2."; ++start slave; ++sync_with_master; ++ ++select count(distinct n) from t1; ++select min(n) from t1; ++select max(n) from t1; ++ ++--replace_column 8 # 9 # 18 # 23 # 33 # ++show slave status; ++ ++connection master; ++drop table t1; ++ ++connection slave; ++drop table t1; ++save_master_pos; ++ ++connection slave_sec; ++sync_with_master; ++ ++connection slave; ++echo "The following are SLAVE."; ++ ++show master logs; ++show master status; ++ ++ ++connection slave_ter; ++echo "The following are SLAVE2."; ++sync_with_master; ++ ++show master logs; ++show master status; ++purge master logs to 'master-bin.000006'; ++show master logs; ++--error 1186 ++reset master; +diff -r 66cc9e0a6768 patch_info/mirror_binlog.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/mirror_binlog.info Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,6 @@ ++File=mirror_binlog.patch ++Name=Mirroring binary logs on slave ++Version=V1 ++Author=Google ++License=GPL ++Comment=contains FastMaster promotion patch +diff -r 66cc9e0a6768 sql/Makefile.am +--- a/sql/Makefile.am Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/Makefile.am Thu Dec 04 21:46:15 2008 -0800 +@@ -68,7 +68,7 @@ + sql_array.h sql_cursor.h \ + examples/ha_example.h ha_archive.h \ + examples/ha_tina.h ha_blackhole.h \ +- ha_federated.h ++ ha_federated.h repl_mule.h + mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + item.cc item_sum.cc item_buff.cc item_func.cc \ + item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \ +@@ -105,7 +105,7 @@ + sp_cache.cc parse_file.cc sql_trigger.cc \ + examples/ha_example.cc ha_archive.cc \ + examples/ha_tina.cc ha_blackhole.cc \ +- ha_federated.cc ++ ha_federated.cc repl_mule.cc + + gen_lex_hash_SOURCES = gen_lex_hash.cc + gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) +diff -r 66cc9e0a6768 sql/Makefile.in +--- a/sql/Makefile.in Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/Makefile.in Thu Dec 04 21:46:15 2008 -0800 +@@ -152,7 +152,7 @@ + sp_rcontext.$(OBJEXT) sp.$(OBJEXT) sp_cache.$(OBJEXT) \ + parse_file.$(OBJEXT) sql_trigger.$(OBJEXT) \ + ha_example.$(OBJEXT) ha_archive.$(OBJEXT) ha_tina.$(OBJEXT) \ +- ha_blackhole.$(OBJEXT) ha_federated.$(OBJEXT) ++ ha_blackhole.$(OBJEXT) ha_federated.$(OBJEXT) repl_mule.$(OBJEXT) + mysqld_OBJECTS = $(am_mysqld_OBJECTS) + mysqld_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) \ + $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_2) \ +@@ -516,7 +516,7 @@ + sql_array.h sql_cursor.h \ + examples/ha_example.h ha_archive.h \ + examples/ha_tina.h ha_blackhole.h \ +- ha_federated.h ++ ha_federated.h repl_mule.h + + mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + item.cc item_sum.cc item_buff.cc item_func.cc \ +@@ -554,7 +554,7 @@ + sp_cache.cc parse_file.cc sql_trigger.cc \ + examples/ha_example.cc ha_archive.cc \ + examples/ha_tina.cc ha_blackhole.cc \ +- ha_federated.cc ++ ha_federated.cc repl_mule.cc + + gen_lex_hash_SOURCES = gen_lex_hash.cc + gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) +@@ -748,6 +748,7 @@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/protocol.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/records.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/repl_failsafe.Po@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/repl_mule.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/set_var.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slave.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sp.Po@am__quote@ +diff -r 66cc9e0a6768 sql/lex.h +--- a/sql/lex.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/lex.h Thu Dec 04 21:46:15 2008 -0800 +@@ -292,6 +292,7 @@ + { "LONGTEXT", SYM(LONGTEXT)}, + { "LOOP", SYM(LOOP_SYM)}, + { "LOW_PRIORITY", SYM(LOW_PRIORITY)}, ++ { "MAKE", SYM(MAKE_SYM)}, + { "MASTER", SYM(MASTER_SYM)}, + { "MASTER_CONNECT_RETRY", SYM(MASTER_CONNECT_RETRY_SYM)}, + { "MASTER_HOST", SYM(MASTER_HOST_SYM)}, +diff -r 66cc9e0a6768 sql/log.cc +--- a/sql/log.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/log.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -79,7 +79,9 @@ + + bool binlog_init() + { +- return !opt_bin_log; ++ if (!opt_bin_log) ++ binlog_hton.prepare = NULL; ++ return 0; /* return !opt_bin_log; */ + } + + static int binlog_close_connection(THD *thd) +@@ -406,6 +408,7 @@ + :bytes_written(0), last_time(0), query_start(0), name(0), + prepared_xids(0), log_type(LOG_CLOSED), file_id(1), open_count(1), + write_error(FALSE), inited(FALSE), need_start_event(TRUE), ++ mule_binlog_(0), + description_event_for_exec(0), description_event_for_queue(0) + { + /* +@@ -506,7 +509,10 @@ + const char *log_name) + { + File index_file_nr= -1; +- DBUG_ASSERT(!my_b_inited(&index_file)); ++ ++ /* If the index is already opened, do not open it again. */ ++ if (my_b_inited(&index_file)) ++ return FALSE; + + /* + First open of this class instance +@@ -750,7 +756,7 @@ + if (file >= 0) + my_close(file,MYF(0)); + end_io_cache(&log_file); +- end_io_cache(&index_file); ++ close_index_file(); + safeFree(name); + log_type= LOG_CLOSED; + DBUG_RETURN(1); +@@ -768,7 +774,10 @@ + int MYSQL_LOG::raw_get_current_log(LOG_INFO* linfo) + { + strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1); +- linfo->pos = my_b_tell(&log_file); ++ if (!mule_binlog_) ++ linfo->pos = my_b_tell(&log_file); ++ else ++ linfo->pos = my_b_filelength(&log_file); + return 0; + } + +@@ -935,6 +944,11 @@ + if (need_lock) + pthread_mutex_lock(&LOCK_index); + safe_mutex_assert_owner(&LOCK_index); ++ ++ if (open_index_file(index_file_name, NULL) != 0) { ++ error = -1; ++ goto err; ++ } + + /* As the file is flushed, we can't get an error here */ + (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0, +@@ -1446,18 +1460,19 @@ + SYNOPSIS + new_file() + need_lock Set to 1 if caller has not locked LOCK_log ++ logfile_name the specified log filename. + + NOTE + The new file name is stored last in the index file + */ + +-void MYSQL_LOG::new_file(bool need_lock) ++void MYSQL_LOG::new_file(bool need_lock, const char* log_filename) + { + char new_name[FN_REFLEN], *new_name_ptr, *old_name; + enum_log_type save_log_type; + + DBUG_ENTER("MYSQL_LOG::new_file"); +- if (!is_open()) ++ if (!is_log_open()) + { + DBUG_PRINT("info",("log is closed")); + DBUG_VOID_RETURN; +@@ -1496,7 +1511,9 @@ + We have to do this here and not in open as we want to store the + new file name in the current binary log file. + */ +- if (generate_new_name(new_name, name)) ++ if (log_filename) { ++ fn_format(new_name,log_filename,mysql_data_home,"",4); ++ } else if (generate_new_name(new_name, name)) + goto end; + new_name_ptr=new_name; + +@@ -1571,7 +1588,7 @@ + bytes_written+= ev->data_written; + DBUG_PRINT("info",("max_size: %lu",max_size)); + if ((uint) my_b_append_tell(&log_file) > max_size) +- new_file(0); ++ new_file(0); + + err: + pthread_mutex_unlock(&LOCK_log); +@@ -1600,8 +1617,14 @@ + bytes_written += len; + } while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint))); + DBUG_PRINT("info",("max_size: %lu",max_size)); +- if ((uint) my_b_append_tell(&log_file) > max_size) +- new_file(0); ++ ++ /* If max_size is BINLOG_NOSWITCH_SIZE, binlog would not switch because ++ * of file size limit. ++ */ ++ if (max_size != BINLOG_NOSWITCH_SIZE && ++ (uint) my_b_append_tell(&log_file) > max_size) { ++ new_file(0); ++ } + + err: + if (!error) +@@ -2492,6 +2515,17 @@ + DBUG_VOID_RETURN; + } + ++int MYSQL_LOG::flush_log_file() { ++ return flush_io_cache(&log_file); ++} ++ ++int MYSQL_LOG::close_index_file() { ++ if (my_b_inited(&index_file)) { ++ end_io_cache(&index_file); ++ my_close(index_file.file, MYF(0)); ++ } ++ return 0; ++} + + /* + Check if a string is a valid number +diff -r 66cc9e0a6768 sql/log_event.h +--- a/sql/log_event.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/log_event.h Thu Dec 04 21:46:15 2008 -0800 +@@ -94,6 +94,14 @@ + #define LINE_TERM_EMPTY 0x4 + #define LINE_START_EMPTY 0x8 + #define ESCAPED_EMPTY 0x10 ++ ++/* This server-id value is used to indicate a special master-info event ++ * in relay-log. ++ * We will enforce in database that replication can not set this value ++ * as the server-id. ++ */ ++#define MASTER_INFO_SERVER_ID 0xffffffff ++ + + /***************************************************************************** + +diff -r 66cc9e0a6768 sql/mysql_priv.h +--- a/sql/mysql_priv.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/mysql_priv.h Thu Dec 04 21:46:15 2008 -0800 +@@ -462,6 +462,7 @@ + /* BINLOG_DUMP options */ + + #define BINLOG_DUMP_NON_BLOCK 1 ++#define BINLOG_MIRROR_CLIENT 0x0004 + + /* sql_show.cc:show_log_files() */ + #define SHOW_LOG_STATUS_FREE "FREE" +@@ -1374,6 +1375,7 @@ + extern const char **errmesg; /* Error messages */ + extern const char *myisam_recover_options_str; + extern const char *in_left_expr_name, *in_additional_cond, *in_having_cond; ++extern char *opt_binlog_index_name; + extern const char * const triggers_file_ext; + extern const char * const trigname_file_ext; + extern Eq_creator eq_creator; +@@ -1875,6 +1877,10 @@ + extern "C" void unireg_abort(int exit_code); + void kill_delayed_threads(void); + bool check_stack_overrun(THD *thd, long margin, char *dummy); ++extern my_bool rpl_mirror_binlog_enabled; ++extern ulong sync_mirror_binlog_period; ++extern my_bool rpl_mirror_binlog_no_replicate; ++extern ulong rpl_mirror_binlog_clients, rpl_mirror_binlog_status; + #else + #define unireg_abort(exit_code) DBUG_RETURN(exit_code) + inline void kill_delayed_threads(void) {} +diff -r 66cc9e0a6768 sql/mysqld.cc +--- a/sql/mysqld.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/mysqld.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -555,6 +555,7 @@ + pthread_mutex_t LOCK_global_user_client_stats; + pthread_mutex_t LOCK_global_table_stats; + pthread_mutex_t LOCK_global_index_stats; ++pthread_mutex_t LOCK_failover_master; + /* + The below lock protects access to two global server variables: + max_prepared_stmt_count and prepared_stmt_count. These variables +@@ -584,13 +585,15 @@ + char *master_ssl_key, *master_ssl_cert; + char *master_ssl_ca, *master_ssl_capath, *master_ssl_cipher; + ++char *opt_binlog_index_name; ++ + /* Static variables */ + + static bool kill_in_progress, segfaulted; + static my_bool opt_do_pstack, opt_bootstrap, opt_myisam_log; + static int cleanup_done; + static ulong opt_specialflag, opt_myisam_block_size; +-static char *opt_logname, *opt_update_logname, *opt_binlog_index_name; ++static char *opt_logname, *opt_update_logname; + static char *opt_tc_heuristic_recover; + static char *mysql_home_ptr, *pidfile_name_ptr; + static char **defaults_argv; +@@ -598,6 +601,32 @@ + + static my_socket unix_sock,ip_sock; + struct rand_struct sql_rand; // used by sql_class.cc:THD::THD() ++ ++/* When set, we are inside a failover slave and deny all non-super access */ ++bool failover_deny_access= 0; ++ ++/* When set, binlog will be mirrored on the replica. */ ++my_bool rpl_mirror_binlog_enabled; ++ ++/* Sync the mirrored binlog to disk after every #th event. */ ++ulong sync_mirror_binlog_period; ++ ++/* The fixed size for replication event buffer. Replication event can exceed ++ * the size. ++ */ ++//ulong rpl_event_buffer_size; ++ ++/* This is a mirror binlog status variable on the primary to indicate how many ++ * mirror binlog servers are connecting. ++ */ ++ulong rpl_mirror_binlog_clients = 0; ++ ++/* This indicates whether mirror binlog is working on a replica database. It ++ * requires: ++ * . rpl_mirror_binlog_enabled = 1 ++ * . the slave I/O thread is running and mirror binlog is also dumped ++ */ ++ulong rpl_mirror_binlog_status = 0; + + /* OS specific variables */ + +@@ -1315,6 +1344,7 @@ + (void) pthread_cond_destroy(&COND_flush_thread_cache); + (void) pthread_cond_destroy(&COND_manager); + (void) pthread_mutex_destroy(&LOCK_stats); ++ (void) pthread_mutex_destroy(&LOCK_failover_master); + (void) pthread_mutex_destroy(&LOCK_global_user_client_stats); + (void) pthread_mutex_destroy(&LOCK_global_table_stats); + (void) pthread_mutex_destroy(&LOCK_global_index_stats); +@@ -3164,6 +3194,7 @@ + (void) pthread_cond_init(&COND_rpl_status, NULL); + #endif + (void) pthread_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST); ++ (void) pthread_mutex_init(&LOCK_failover_master, MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST); +@@ -3398,39 +3429,8 @@ + + if (opt_bin_log) + { +- char buf[FN_REFLEN]; +- const char *ln; +- ln= mysql_bin_log.generate_name(opt_bin_logname, "-bin", 1, buf); +- if (!opt_bin_logname && !opt_binlog_index_name) +- { +- /* +- User didn't give us info to name the binlog index file. +- Picking `hostname`-bin.index like did in 4.x, causes replication to +- fail if the hostname is changed later. So, we would like to instead +- require a name. But as we don't want to break many existing setups, we +- only give warning, not error. +- */ +- sql_print_warning("No argument was provided to --log-bin, and " +- "--log-bin-index was not used; so replication " +- "may break when this MySQL server acts as a " +- "master and has his hostname changed!! Please " +- "use '--log-bin=%s' to avoid this problem.", ln); +- } +- if (ln == buf) +- { +- my_free(opt_bin_logname, MYF(MY_ALLOW_ZERO_PTR)); +- opt_bin_logname=my_strdup(buf, MYF(0)); +- } +- if (mysql_bin_log.open_index_file(opt_binlog_index_name, ln)) +- { +- unireg_abort(1); +- } +- +- /* +- Used to specify which type of lock we need to use for queries of type +- INSERT ... SELECT. This will change when we have row level logging. +- */ +- using_update_log=1; ++ if (make_master_open_index(&opt_bin_logname, opt_binlog_index_name) != 0) ++ unireg_abort(1); + } + + if (xid_cache_init()) +@@ -3480,9 +3480,10 @@ + unireg_abort(1); + } + +- if (opt_bin_log && mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0, +- WRITE_CACHE, 0, max_binlog_size, 0)) +- unireg_abort(1); ++ if (opt_bin_log && ++ make_master(NULL, opt_bin_logname, opt_binlog_index_name, NULL) != 0) { ++ unireg_abort(1); ++ } + + #ifdef HAVE_REPLICATION + if (opt_bin_log && expire_logs_days) +@@ -5036,6 +5037,8 @@ + OPT_INNODB_READ_IO_THREADS, + OPT_INNODB_WRITE_IO_THREADS, + OPT_INNODB_ADAPTIVE_HASH_INDEX, ++ OPT_RPL_MIRROR_BINLOG, ++ OPT_SYNC_MIRROR_BINLOG, + OPT_FEDERATED + }; + +@@ -5725,6 +5728,11 @@ + {"rpl-recovery-rank", OPT_RPL_RECOVERY_RANK, "Undocumented.", + (gptr*) &rpl_recovery_rank, (gptr*) &rpl_recovery_rank, 0, GET_ULONG, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, ++ {"rpl_mirror_binlog_enabled", OPT_RPL_MIRROR_BINLOG, ++ "1 = support mirroring binlogs. 0 = disable mirroring binlogs", ++ (gptr*) &rpl_mirror_binlog_enabled, ++ (gptr*) &rpl_mirror_binlog_enabled, 0, GET_BOOL, NO_ARG, ++ 0, 0, 1, 0, 1, 0}, + {"safe-mode", OPT_SAFE, "Skip some optimize stages (for testing).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + #ifndef TO_BE_DELETED +@@ -5849,6 +5857,11 @@ + {"symbolic-links", 's', "Enable symbolic link support.", + (gptr*) &my_use_symdir, (gptr*) &my_use_symdir, 0, GET_BOOL, NO_ARG, + IF_PURIFY(0,1), 0, 0, 0, 0, 0}, ++ {"sync-mirror-binlog", OPT_SYNC_MIRROR_BINLOG, ++ "Sync the mirrored binlog to disk after every #th event. " ++ "#=0 (the default) does no sync. Syncing slows MySQL down", ++ (gptr*) &sync_mirror_binlog_period, ++ (gptr*) &sync_mirror_binlog_period, 0, GET_ULONG, REQUIRED_ARG, 0, 0, ~0L, 0, 1, 0}, + {"sysdate-is-now", OPT_SYSDATE_IS_NOW, + "Non-default option to alias SYSDATE() to NOW() to make it safe-replicable. Since 5.0, SYSDATE() returns a `dynamic' value different for different invocations, even within the same statement.", + (gptr*) &global_system_variables.sysdate_is_now, +@@ -6625,6 +6638,7 @@ + {"Delayed_errors", (char*) &delayed_insert_errors, SHOW_LONG}, + {"Delayed_insert_threads", (char*) &delayed_insert_threads, SHOW_LONG_CONST}, + {"Delayed_writes", (char*) &delayed_insert_writes, SHOW_LONG}, ++ {"Failover_deny_access", (char*) &failover_deny_access, SHOW_LONG}, + {"Flush_commands", (char*) &refresh_version, SHOW_LONG_CONST}, + {"Handler_commit", (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS}, + {"Handler_delete", (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS}, +diff -r 66cc9e0a6768 sql/repl_mule.cc +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/sql/repl_mule.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,466 @@ ++/* ++ Copyright (C) 2007 Google Inc. ++ ++This program is free software; you can redistribute it and/or ++modify it under the terms of the GNU General Public License ++as published by the Free Software Foundation; either version 2 ++of the License, or (at your option) any later version. ++ ++This program is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with this program; if not, write to the Free Software ++Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++*/ ++ ++#include "mysql_priv.h" ++#include <my_dir.h> ++#include "slave.h" ++#include "repl_mule.h" ++ ++/* max log size: 2GB */ ++#define MAX_LOG_SIZE BINLOG_NOSWITCH_SIZE ++ ++ReplMule::ReplMule(THD* thd, MASTER_INFO *mi, RelayStatus status, ++ my_off_t file_size, const char *binlog_indexname, ++ MYSQL_LOG *binlog, ulong sync_period) ++ : desc_event_(new Format_description_log_event(BINLOG_VERSION)), ++ io_thd_(thd), mi_(mi), status_(status), dump_position_(0L), ++ file_size_(file_size), mule_log_(binlog), ++ mule_log_sync_period_(sync_period), mule_log_event_counter_(0) { ++ char llbuf1[22], llbuf2[22]; ++ ++ DBUG_ENTER("ReplMule::ReplMule"); ++ ++ /* Indicate that we are in replication mule mode. */ ++ mule_log_->set_mule_mode(); ++ ++ strmake(curr_log_filename_, mi->master_log_name, ++ sizeof(curr_log_filename_)-1); ++ strmake(mule_indexname_, binlog_indexname, sizeof(mule_indexname_)-1); ++ ++ /* Open the mule log file */ ++ if (!mule_log_->is_log_open()) { ++ /* Do not open binlog file when master_log_name is not specified. We ++ * are at the I/O thread initialization time and we do not know what ++ * filename we are going to dump. ++ * We wait for the next rotation event to indicate the filename. ++ */ ++ if (strlen(curr_log_filename_) > 0 && ++ mule_log_->open(curr_log_filename_, LOG_BIN, NULL, ++ SEQ_READ_APPEND, true, MAX_LOG_SIZE, 0) != 0) { ++ sql_print_error("ReplMule: open binlog failed: %s", ++ curr_log_filename_); ++ status_ = MULE_ERROR; ++ DBUG_VOID_RETURN; ++ } ++ } ++ ++ switch (status_) { ++ case MULE_BEHIND: ++ dump_position_ = mi->master_log_pos; ++ mi->master_log_pos = file_size_; ++ sql_print_information("ReplicationMule: MULE_BEHIND - new(%s), old(%s)", ++ llstr(mi->master_log_pos, llbuf1), ++ llstr(dump_position_, llbuf2)); ++ break; ++ case RELAY_MATCH_MULE: ++ case RELAY_MATCH_MULE_RUN: ++ dump_position_ = mi->master_log_pos; ++ sql_print_information("ReplicationMule: RELAY_MATCH_MULE."); ++ break; ++ case MULE_VERIFY: ++ case MULE_VERIFY_RELAY_BEHIND: ++ dump_position_ = mi->master_log_pos; ++ mi->master_log_pos = BIN_LOG_HEADER_SIZE; ++ sql_print_information( ++ "ReplicationMule: MULE_VERIFY - old(%s), file_size(%s)", ++ llstr(dump_position_, llbuf1), llstr(file_size_, llbuf2)); ++ ++ /* seek to the beginning of the file for verification */ ++ seekToPosition(BIN_LOG_HEADER_SIZE); ++ break; ++ } ++ ++ DBUG_VOID_RETURN; ++} ++ ++ReplMule::~ReplMule() { ++ DBUG_ENTER("ReplMule::~ReplMule"); ++ ++ if (mule_log_->is_log_open()) ++ mule_log_->close(LOG_CLOSE_INDEX); ++ mule_log_->clear_mule_mode(); ++ ++ /* If we are still in MULE_BEHIND or MULE_VERIFY state and we exit from ++ * I/O thread, it means we encountered some errors. ++ * mi->master_log_pos might be used by later slave start. It is being ++ * changed here to do event dumping or event verification. So, we should ++ * restore it to its original value. ++ */ ++ switch (status_) { ++ case MULE_BEHIND: ++ case MULE_VERIFY: ++ if (mi_->master_log_pos < dump_position_) ++ mi_->master_log_pos = dump_position_; ++ break; ++ } ++ ++ delete desc_event_; ++ ++ DBUG_VOID_RETURN; ++} ++ ++ReplMule::WriteStatus ReplMule::writeEvent(const char* buf, ulong event_len) { ++ WriteStatus dump_status = WRITE_RELAY; ++ char llbuf1[22], llbuf2[22], llbuf3[22]; ++ char *verify_event; ++ bool verified = false; ++ bool skip_event = false; ++ ++ DBUG_ENTER("ReplMule::dumpEvent"); ++ switch (status_) { ++ case MULE_VERIFY: ++ case MULE_VERIFY_RELAY_BEHIND: ++ if (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT && ++ IsFakeRotation(buf, event_len)) { ++ /* Do not verify the faked rotate event */ ++ if (status_ == MULE_VERIFY) ++ dump_status = SKIP_RELAY; ++ break; ++ } ++ verify_event = new char[event_len]; ++ if (verify_event == NULL) { ++ sql_print_error( ++ "ReplMule::dumpEvent - insufficient memory in verification, " ++ "position(%s), event_len(%d).", ++ llstr(mi_->master_log_pos, llbuf1), event_len); ++ dump_status = WRITE_ERROR; ++ break; ++ } ++ if (my_b_read(mule_log_->get_log_file(), (byte*) verify_event, ++ event_len) != 0) { ++ sql_print_error( ++ "ReplMule::dumpEvent - read log error in verification, " ++ "position(%s), event_len(%d).", ++ llstr(mi_->master_log_pos, llbuf1), event_len); ++ dump_status = WRITE_ERROR; ++ delete verify_event; ++ break; ++ } ++ verified = (memcmp(buf, verify_event, event_len) == 0); ++ delete verify_event; ++ if (!verified) { ++ sql_print_error( ++ "ReplMule::dumpEvent - event does not match at position(%s)", ++ llstr(mi_->master_log_pos, llbuf1)); ++ dump_status = WRITE_ERROR; ++ break; ++ } ++ /* fall through */ ++ case MULE_BEHIND: ++ dump_status = SKIP_RELAY; ++ if (status_ == MULE_BEHIND && ++ queueEvent(buf, event_len, &skip_event) != 0) { ++ dump_status = WRITE_ERROR; ++ break; ++ } ++ ++ /* Skip faked rotation event */ ++ if (!skip_event) ++ mi_->master_log_pos += event_len; ++ ++ if (mi_->master_log_pos == dump_position_) { ++ if (dump_position_ < file_size_) { ++ status_ = MULE_VERIFY_RELAY_BEHIND; ++ } else { ++ status_ = RELAY_MATCH_MULE; ++ } ++ sql_print_information( ++ "ReplMule::dumpEvent - new status(%d) " ++ "master_log_pos(%s), dump_pos(%s), file_size(%s)", status_, ++ llstr(mi_->master_log_pos, llbuf1), llstr(dump_position_, llbuf2), ++ llstr(file_size_, llbuf3)); ++ } else if (mi_->master_log_pos == file_size_) { ++ if (dump_position_ > file_size_) { ++ status_ = MULE_BEHIND; ++ } else { ++ status_ = RELAY_MATCH_MULE; ++ } ++ sql_print_information( ++ "ReplMule::dumpEvent - new status(%d) " ++ "master_log_pos(%s), dump_pos(%s), file_size(%s)", status_, ++ llstr(mi_->master_log_pos, llbuf1), llstr(dump_position_, llbuf2), ++ llstr(file_size_, llbuf3)); ++ } else if (status_ != MULE_VERIFY_RELAY_BEHIND && ++ mi_->master_log_pos > dump_position_) { ++ sql_print_error( ++ "ReplMule::dumpEvent - mule position(%s) does not match " ++ "relay-log position(%s).", ++ llstr(mi_->master_log_pos, llbuf1), llstr(dump_position_, llbuf2)); ++ dump_status = WRITE_ERROR; ++ } ++ break; ++ case RELAY_MATCH_MULE_RUN: ++ if (buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) { ++ sql_print_information(" RELAY_MATCH_MULE event %d", buf[EVENT_TYPE_OFFSET] ); ++ /* Do not write format description record if size is the same */ ++ break; ++ } ++ case RELAY_MATCH_MULE: ++ if (queueEvent(buf, event_len, &skip_event) != 0) ++ dump_status = WRITE_ERROR; ++ break; ++ } ++ ++ DBUG_RETURN(dump_status); ++} ++ ++int ReplMule::appendEvent(const char* buf, ulong event_len) { ++ char llbuf1[22]; ++ int error; ++ ++ DBUG_ENTER("ReplMule::appendEvent"); ++ ++ error = mule_log_->appendv(buf,event_len,0); ++ if (error != 0) { ++ sql_print_error("ReplMule::appendEvent - append error at %s(%s)", ++ mi_->master_log_name, ++ llstr(mi_->master_log_pos, llbuf1)); ++ } else if (mule_log_->flush_log_file() != 0) { ++ sql_print_error("ReplMule::appendEvent - flush error at %s(%s)", ++ mi_->master_log_name, ++ llstr(mi_->master_log_pos, llbuf1)); ++ error = -1; ++ } else if (mule_log_sync_period_ > 0) { ++ mule_log_event_counter_++; ++ if (mule_log_event_counter_ >= mule_log_sync_period_) { ++ mule_log_event_counter_ = 0; ++ error = my_sync(mule_log_->get_log_file()->file, MYF(MY_WME)); ++ if (error != 0) ++ sql_print_error("ReplMule::appendEvent - sync error at %s(%s)", ++ mi_->master_log_name, ++ llstr(mi_->master_log_pos, llbuf1)); ++ } ++ } ++ ++ DBUG_RETURN(error); ++} ++ ++int ReplMule::queueEvent(const char* buf, ulong event_len, bool *skip_event) { ++ int error = 0; ++ ++ DBUG_ENTER("ReplMule::queueEvent"); ++ ++ *skip_event = false; ++ ++ mule_log_->lock_log(); ++ if (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT) { ++ Rotate_log_event rev(buf, event_len, desc_event_); ++ ++ /* If this is a faked rotate event and the specified filename is ++ * the same as the current binlog filename, ignore the event. ++ */ ++ if (IsFakeRotation(rev)) { ++ *skip_event = true; ++ DBUG_PRINT("info",("skipped faked rotation event")); ++ } else { ++ /* Only append real events. */ ++ if (rev.when != 0) ++ error = appendEvent(buf, event_len); ++ ++ /* Only rotate file when append succeeds. */ ++ if (error == 0) { ++ /* Create a new file: lock both index and log. */ ++ if (strlen(curr_log_filename_) == 0) { ++ /* If curr_log_filename_ is not specified, then this is the first ++ * valid rotation event to indicate the filename. ++ */ ++ error = mule_log_->open(rev.new_log_ident, LOG_BIN, NULL, ++ SEQ_READ_APPEND, true, MAX_LOG_SIZE, 0); ++ } else { ++ mule_log_->new_file(0, rev.new_log_ident); ++ } ++ ++ strmake(curr_log_filename_, rev.new_log_ident, ++ strlen(rev.new_log_ident)); ++ ++ DBUG_PRINT("info",("rotate file: %s", rev.new_log_ident)); ++ } ++ } ++ } else { ++ error = appendEvent(buf, event_len); ++ } ++ mule_log_->unlock_log(); ++ ++ DBUG_RETURN(error); ++} ++ ++void ReplMule::seekToPosition(my_off_t pos) { ++ DBUG_ENTER("ReplMule::seekToPosition"); ++ DBUG_PRINT("enter",("seek_pos: %ld", (ulong) pos)); ++ ++ my_b_seek(mule_log_->get_log_file(), pos); ++ DBUG_VOID_RETURN; ++} ++ ++bool ReplMule::IsFakeRotation(const char* buf, ulong event_len) { ++ DBUG_ENTER("ReplMule::IsFakeRotation"); ++ ++ Rotate_log_event rev(buf, event_len, desc_event_); ++ DBUG_RETURN(IsFakeRotation(rev)); ++} ++ ++bool ReplMule::IsFakeRotation(const Rotate_log_event& rev) { ++ DBUG_ENTER("ReplMule::IsFakeRotation"); ++ DBUG_RETURN(rev.when == 0 && ++ rev.ident_len == strlen(curr_log_filename_) && ++ strcmp(rev.new_log_ident, curr_log_filename_) == 0); ++} ++ ++/* createReplicationMule: ++ * Create a mule that relays master's replication binlog and ++ * generate an exact same copy on the local filesystem. ++ * ++ * Code flow: ++ * last_mulelog = scan the existing mule log index to find it ++ * if (mulelog index is not created or there is no mule log inside it) ++ * old_mule_log <- requested dumping position ++ * requested dumping position <- 0 in the file ++ * else ++ * check whether the mule log matches the requested dump ++ * (whether the last mule log name/size matches) ++ * if the mule log name does not match ++ * exit with an error ++ * if (the mule log size does not match the requested dump position) ++ * request the dump from position 0 and read all events ++ * verify all events with the corresponding events in mule log ++ * if (the verification succeeds) ++ * continue the dump ++ * else ++ * exit with an error ++ */ ++ReplMule* ReplMule::createReplicationMule( ++ THD* thd, MASTER_INFO *mi, const char *binlog_indexname, ++ MYSQL_LOG *binlog) { ++ ReplMule *mule = NULL; ++ LOG_INFO linfo; ++ bool index_opened = false; ++ ++ DBUG_ENTER("ReplMule::createReplicationMule"); ++ ++ /* binlog_indexname must be set to some real value. */ ++ DBUG_ASSERT(binlog_indexname); ++ ++ /* Lock binlog index for all binlog operations */ ++ binlog->lock_index(); ++ index_opened = binlog->open_index_file(binlog_indexname, NULL); ++ DBUG_PRINT("info",("open index file succeed: %d", index_opened)); ++ sql_print_information("createReplicationMule"); ++ ++ /* Scan the existing binlog index to find the last relayed binlog */ ++ if (index_opened || ++ binlog->find_log_pos(&linfo, NullS, false) != 0) { ++ /* binlog index is not created or has no log file inside: ++ * . old_relay_binlog <- requested dumping position ++ * . requested dumping position <- 0 in the file ++ */ ++ if (mi->master_log_pos == BIN_LOG_HEADER_SIZE) { ++ mule = new ReplMule(thd, mi, RELAY_MATCH_MULE, BIN_LOG_HEADER_SIZE, ++ binlog_indexname, binlog, sync_mirror_binlog_period); ++ } else { ++ mule = new ReplMule(thd, mi, MULE_BEHIND, BIN_LOG_HEADER_SIZE, ++ binlog_indexname, binlog, sync_mirror_binlog_period); ++ } ++ ++ if (mule == NULL) { ++ sql_print_error("Mule malloc operation failed."); ++ } ++ } else { ++ IO_CACHE* log_file; ++ MY_STAT stat; ++ char last_binlog_name[FN_REFLEN]; ++ ++ /* Find the last log file from the binlog index. ++ * Check whether the last binlog matches the requested dump for both ++ * binlog name and binlog size. ++ */ ++ for (;;) { ++ strmake(last_binlog_name, linfo.log_file_name, FN_REFLEN); ++ last_binlog_name[FN_REFLEN - 1] = '\0'; ++ if (binlog->find_next_log(&linfo, false)) ++ break; ++ } ++ DBUG_PRINT("info",("the last binlog: %s", last_binlog_name)); ++ ++ /* if the binlog name does not match, exit with an error. */ ++ if (strcmp(last_binlog_name+dirname_length(last_binlog_name), ++ mi->master_log_name) != 0) { ++ sql_print_error("Mule binlog(%s) does not match new relay-binlog(%s)", ++ last_binlog_name, mi->master_log_name); ++ } /* Open the last binlog. */ ++ else if (binlog->open(last_binlog_name, LOG_BIN, NULL, ++ SEQ_READ_APPEND, true, MAX_LOG_SIZE, 0) != 0) { ++ sql_print_error("Mule open last binlog failed: %s", last_binlog_name); ++ } else { ++ bool valid_file_size = true; ++ ++ /* Get the binlog size. */ ++ log_file = binlog->get_log_file(); ++ if (my_fstat(log_file->file, &stat, MYF(0)) == 0) { ++ /* If the binlog size does not match the requested dump position, then ++ * request the dump from position 0 and verify all events, we need to ++ * verify events because the mule log might be used for serving during ++ * anytime. We must be sure that they are correct. ++ */ ++ sql_print_information("Binglog size %d", stat.st_size); ++ if (stat.st_size == mi->master_log_pos) { ++ mule = new ReplMule(thd, mi, RELAY_MATCH_MULE_RUN, stat.st_size, ++ binlog_indexname, binlog, ++ sync_mirror_binlog_period); ++ } else if (stat.st_size > BIN_LOG_HEADER_SIZE) { ++ mule = new ReplMule(thd, mi, MULE_VERIFY, stat.st_size, ++ binlog_indexname, binlog, ++ sync_mirror_binlog_period); ++ } else if (stat.st_size == BIN_LOG_HEADER_SIZE) { ++ mule = new ReplMule(thd, mi, MULE_BEHIND, BIN_LOG_HEADER_SIZE, ++ binlog_indexname, binlog, ++ sync_mirror_binlog_period); ++ } else { ++ char llbuf[22]; ++ valid_file_size = false; ++ sql_print_error("Mule binlog file(%s) invalid size: %s", ++ last_binlog_name, llstr(stat.st_size, llbuf)); ++ } ++ } else { ++ valid_file_size = false; ++ sql_print_error("Mule binlog file(%s): fstat failed.", ++ last_binlog_name); ++ } ++ ++ if (valid_file_size) { ++ if (mule == NULL) { ++ sql_print_error("Mule malloc operation failed."); ++ } else if (mule->status_ == MULE_ERROR) { ++ /* If mule creation fails, indicate the error. */ ++ delete mule; ++ mule = NULL; ++ } ++ } ++ } ++ } ++ ++ /* Clear the mule binlog mode if there are errors. */ ++ if (mule == NULL) { ++ binlog->clear_mule_mode(); ++ binlog->close_index_file(); ++ } ++ ++ /* Unlock binlog index */ ++ binlog->unlock_index(); ++ ++ DBUG_RETURN(mule); ++} +diff -r 66cc9e0a6768 sql/repl_mule.h +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/sql/repl_mule.h Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,166 @@ ++/* ++ Copyright (C) 2007 Google Inc. ++ ++This program is free software; you can redistribute it and/or ++modify it under the terms of the GNU General Public License ++as published by the Free Software Foundation; either version 2 ++of the License, or (at your option) any later version. ++ ++This program is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with this program; if not, write to the Free Software ++Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++*/ ++ ++#ifndef SQL_REPL_MULE_H__ ++#define SQL_REPL_MULE_H__ ++ ++/* Replication Mule is the class that is responsible for generating ++ * an exact copy of the binlog from a master database. We call this feature ++ * mirror binlog and it can be enabled by setting rpl_mirror_binlog. We ++ * need to keep the same copy for the following purposes: ++ * . The replica can serve the binlog transparently as if they are the ++ * master database. This can relieve master connection overhead. ++ * . During failover, the replica can become the new master and serve ++ * old binlogs transparently. ++ * (The Mule name comes from the popular P2P software eMule.) ++ * ++ * Internally, we call the mirrored binlog mule log. ++ */ ++ ++class THD; ++class Rotate_log_event; ++class Format_description_log_event; ++typedef struct st_master_info MASTER_INFO; ++ ++class ReplMule { ++ public: ++ /* Because I/O thread also creates relay-binlog, instead of an exact ++ * copy of the original master's binlog, we have two resources that ++ * might get out of sync. ++ * This enum indicates the status: ++ * MULE_BEHIND - the mule's header is behind: ++ * (mule is activated for the first time) ++ * RELAY_MATCH_MULE - mule matches relay-log ++ * RELAY_MATCH_MULE_RUN - mule matches relay-log and it was not empty binlog ++ * MULE_VERIFY - mule has more events than the relay-log and needs ++ * verification; we can not verify based on relay-log ++ * events because events might get changed a little; ++ * verification starts with downloading all events in ++ * the last binlog from the master and compare with ++ * all events in the mule log; ++ * MULE_VERIFY_RELAY_BEHIND - mule has more events than the relay-log ++ * and relay-log needs to write events ++ * MULE_ERROR - mule detects errors in event duplicate ++ * ++ * When the mule mirrors binlogs, it writes an event into the mule log ++ * first. Then, I/O thread writes the event into the relay log. ++ */ ++ enum RelayStatus { ++ MULE_BEHIND = 1, ++ RELAY_MATCH_MULE = 2, ++ RELAY_MATCH_MULE_RUN = 7, ++ MULE_VERIFY = 3, ++ MULE_VERIFY_RELAY_BEHIND = 4, ++ MULE_ERROR = 5, ++ }; ++ ++ enum WriteStatus { ++ WRITE_RELAY = 1, ++ WRITE_ERROR = 2, ++ SKIP_RELAY = 3, ++ }; ++ ++ private: ++ const Format_description_log_event *desc_event_; ++ THD *io_thd_; ++ MASTER_INFO *mi_; ++ ++ /* ++ * I/O thread will write both mule log for mirror binlog and relay log ++ * for SQL thread. ++ * The variable indicates whether the two are in sync. ++ */ ++ RelayStatus status_; ++ ++ /* The starting event writing position. */ ++ my_off_t dump_position_; ++ ++ /* During the initial setup, the last mule log's file size. */ ++ my_off_t file_size_; ++ ++ /* Internally, we call the mirrored binlog mule log. */ ++ MYSQL_LOG *mule_log_; ++ ++ /* Sync the mule log to disk for every #N events. */ ++ ulong mule_log_sync_period_; ++ ulong mule_log_event_counter_; ++ ++ /* mule log's index filename */ ++ char mule_indexname_[FN_REFLEN]; ++ ++ /* the current mule log's filename */ ++ char curr_log_filename_[FN_REFLEN]; ++ ++ ReplMule(THD* thd, MASTER_INFO *mi, RelayStatus status, ++ my_off_t file_size, const char *binlog_indexname, ++ MYSQL_LOG *binlog, ulong sync_period); ++ ++ /* ++ * Queue the event into the current mule log. If it is a rotation ++ * event, generate a new mule log file. ++ * Indicate whether the event is skipped because it is an fake event. ++ * A fake event is generated by the master to indicate the current ++ * reading position. ++ */ ++ int queueEvent(const char* buf, ulong event_len, bool *skip_event); ++ ++ /* Append the event to the current mule log. */ ++ int appendEvent(const char* buf, ulong event_len); ++ ++ bool IsFakeRotation(const char* buf, ulong event_len); ++ bool IsFakeRotation(const Rotate_log_event& rev); ++ ++ /* Seek to the specified position in the current open mule log. */ ++ void seekToPosition(my_off_t pos); ++ ++ public: ++ ++ ~ReplMule(); ++ ++ /* Dump the event into mule binlog. ++ * Input: ++ * buf (IN) - replication event buffer ++ * event_len (IN) - the event length ++ * ++ * Return: ++ * . WRITE_RELAY: the relay log needs to writing the event ++ * . WRITE_ERROR: the writing encountered errors ++ * . SKIP_RELAY: the relay log should skip the event ++ */ ++ WriteStatus writeEvent(const char* buf, ulong event_len); ++ ++ /* createReplicationMule: ++ * Create a mule that relays master's replication binlog and ++ * generate an exact same copy on the local filesystem. ++ * ++ * Input: ++ * thd (IN) - replication I/O thread ++ * mi (IN) - master info struct for I/O thread's progress ++ * binlog_indexname (IN) - filename for binlog's index ++ * binlog (IN) - replication binlog ++ * ++ * Return: ++ * . a replication mule if success ++ * . NULL if there are any errors ++ */ ++ static ReplMule *createReplicationMule(THD* thd, MASTER_INFO *mi, ++ const char *binlog_indexname, ++ MYSQL_LOG *binlog); ++}; ++ ++#endif /* SQL_REPL_MULE_H__ */ +diff -r 66cc9e0a6768 sql/set_var.cc +--- a/sql/set_var.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/set_var.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -345,6 +345,8 @@ + slog_verb); + sys_var_long_ptr sys_rpl_recovery_rank("rpl_recovery_rank", + &rpl_recovery_rank); ++sys_var_bool_ptr sys_rpl_mirror_binlog_enabled("rpl_mirror_binlog_enabled", ++ &rpl_mirror_binlog_enabled); + sys_var_long_ptr sys_query_cache_size("query_cache_size", + &query_cache_size, + fix_query_cache_size); +@@ -364,6 +366,9 @@ + sys_var_thd_ulong sys_trans_prealloc_size("transaction_prealloc_size", + &SV::trans_prealloc_size, + 0, fix_trans_mem_root); ++sys_var_long_ptr sys_sync_mirror_binlog_period( ++ "sync_mirror_binlog_period", ++ &sync_mirror_binlog_period); + + #ifdef HAVE_QUERY_CACHE + sys_var_long_ptr sys_query_cache_limit("query_cache_limit", +@@ -774,6 +779,7 @@ + &sys_relay_log_purge, + #endif + &sys_rpl_recovery_rank, ++ &sys_rpl_mirror_binlog_enabled, + &sys_safe_updates, + &sys_secure_auth, + &sys_secure_file_priv, +@@ -1113,6 +1119,8 @@ + {"relay_log_space_limit", (char*) &relay_log_space_limit, SHOW_LONGLONG}, + #endif + {sys_rpl_recovery_rank.name,(char*) &sys_rpl_recovery_rank, SHOW_SYS}, ++ {sys_rpl_mirror_binlog_enabled.name, ++ (char *) &sys_rpl_mirror_binlog_enabled, SHOW_SYS}, + {"secure_auth", (char*) &sys_secure_auth, SHOW_SYS}, + {"secure_file_priv", (char*) &sys_secure_file_priv, SHOW_SYS}, + #ifdef HAVE_SMEM +diff -r 66cc9e0a6768 sql/slave.cc +--- a/sql/slave.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/slave.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -25,6 +25,7 @@ + #include <thr_alarm.h> + #include <my_dir.h> + #include <sql_common.h> ++#include "repl_mule.h" + #include <errmsg.h> + #include <mysys_err.h> + +@@ -3527,6 +3528,7 @@ + RELAY_LOG_INFO *rli= &mi->rli; + char llbuff[22]; + uint retry_count; ++ ReplMule *mule = NULL; + + // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff + my_thread_init(); +@@ -3609,6 +3611,23 @@ + if (get_master_version_and_clock(mysql, mi)) + goto err; + ++ if (rpl_mirror_binlog_enabled && !mule) { ++ if (opt_binlog_index_name == NULL) { ++ sql_print_error("\"log-bin-index\" must be set in mirror binlog."); ++ goto err; ++ } ++ ++ /* Create the mule to generate the exact copy of the binlog */ ++ mule = ReplMule::createReplicationMule( ++ thd, mi, opt_binlog_index_name, &mysql_bin_log); ++ ++ /* If we could not create the mule, we stop the I/O thread and report ++ * an error. ++ */ ++ if (mule == NULL) ++ goto err; ++ } ++ + if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1) + { + /* +@@ -3624,6 +3643,7 @@ + DBUG_PRINT("info",("Starting reading binary log from master")); + while (!io_slave_killed(thd,mi)) + { ++ const char* event_buf; + bool suppress_warnings= 0; + thd_proc_info(thd, "Requesting binlog dump"); + if (request_dump(mysql, mi, &suppress_warnings)) +@@ -3754,10 +3774,25 @@ + goto connected; + } // if (event_len == packet_error) + ++ event_buf = (const char*)mysql->net.read_pos + 1; ++ ++ if (mule) { ++ ReplMule::WriteStatus d_status = ++ mule->writeEvent(event_buf, event_len); ++ switch (d_status) { ++ case ReplMule::WRITE_RELAY: ++ break; ++ case ReplMule::SKIP_RELAY: ++ /* Skip writing relay event; go back to read the next event */ ++ continue; ++ case ReplMule::WRITE_ERROR: ++ goto err; ++ } ++ } ++ + retry_count=0; // ok event, reset retry counter + thd_proc_info(thd, "Queueing master event to the relay log"); +- if (queue_event(mi,(const char*)mysql->net.read_pos + 1, +- event_len)) ++ if (queue_event(mi, event_buf, event_len)) + { + sql_print_error("Slave I/O thread could not queue event from master"); + goto err; +@@ -3847,6 +3882,7 @@ + change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE); + DBUG_ASSERT(thd->net.buff != 0); + net_end(&thd->net); // destructor will not free it, because net.vio is 0 ++ delete mule; + close_thread_tables(thd, 0); + pthread_mutex_lock(&LOCK_thread_count); + THD_CHECK_SENTRY(thd); +diff -r 66cc9e0a6768 sql/sql_class.h +--- a/sql/sql_class.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_class.h Thu Dec 04 21:46:15 2008 -0800 +@@ -152,6 +152,12 @@ + #define LOG_INFO_FATAL -7 + #define LOG_INFO_IN_USE -8 + ++/* If the maximum size is equal to this value, binlog would not rotate on ++ * size limit. ++ */ ++#define BINLOG_NOSWITCH_SIZE ((ulong) -1) ++ ++ + /* bitmap to SQL_LOG::close() */ + #define LOG_CLOSE_INDEX 1 + #define LOG_CLOSE_TO_BE_OPENED 2 +@@ -245,6 +251,9 @@ + bool no_auto_events; + friend class Log_event; + ++ /* mule replication mode */ ++ bool mule_binlog_; ++ + public: + /* + These describe the log's format. This is used only for relay logs. +@@ -317,7 +326,8 @@ + } + bool open_index_file(const char *index_file_name_arg, + const char *log_name); +- void new_file(bool need_lock); ++ int close_index_file(); ++ void new_file(bool need_lock= 1, const char* log_filename= NULL); + bool write(THD *thd, enum enum_server_command command, + const char *format, ...) ATTRIBUTE_FORMAT(printf, 4, 5); + bool write(THD *thd, const char *query, uint query_length, +@@ -357,7 +367,27 @@ + int get_current_log(LOG_INFO* linfo); + int raw_get_current_log(LOG_INFO* linfo); + uint next_file_id(); +- inline bool is_open() { return log_type != LOG_CLOSED; } ++ ++ /* Because mysql use is_open() to check whether replication is on, ++ * we will let the check fail during binlog mule mode. Mule replication ++ * and normal master replication can not be on at the same time. ++ * ++ * is_log_open(): the binlog file is open for either purpose ++ * ++ * is_open(): the binlog is open for master replication. ++ * is_mule_open(): the binlog is open for mirror binlog or for ++ * replication mule; refer repl_mule.h for details ++ */ ++ bool is_log_open() { ++ return log_type != LOG_CLOSED; ++ } ++ bool is_open() { ++ return (!mule_binlog_) && is_log_open(); ++ } ++ bool is_mule_open() { ++ return (mule_binlog_) && is_log_open(); ++ } ++ + inline char* get_index_fname() { return index_file_name;} + inline char* get_log_fname() { return log_file_name; } + inline char* get_name() { return name; } +@@ -366,8 +396,18 @@ + + inline void lock_index() { pthread_mutex_lock(&LOCK_index);} + inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);} ++ inline void lock_log() { pthread_mutex_lock(&LOCK_log);} ++ inline void unlock_log() { pthread_mutex_unlock(&LOCK_log);} + inline IO_CACHE *get_index_file() { return &index_file;} + inline uint32 get_open_count() { return open_count; } ++ /* Look in file repl_mule.h for the definition of mule. */ ++ void set_mule_mode() { ++ mule_binlog_ = 1; ++ } ++ void clear_mule_mode() { ++ mule_binlog_ = 0; ++ } ++ int flush_log_file(); + }; + + /* +diff -r 66cc9e0a6768 sql/sql_lex.h +--- a/sql/sql_lex.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_lex.h Thu Dec 04 21:46:15 2008 -0800 +@@ -104,6 +104,7 @@ + // TODO(mcallaghan): update status_vars in mysqld to export these + SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS, + SQLCOM_SHOW_CLIENT_STATS, ++ SQLCOM_MAKE_MASTER, + /* This should be the last !!! */ + SQLCOM_END + }; +@@ -171,6 +172,12 @@ + char *ssl_key, *ssl_cert, *ssl_ca, *ssl_capath, *ssl_cipher; + char *relay_log_name; + ulong relay_log_pos; ++ ++ /* the following fields are used for make master command */ ++ char *log_index_name; ++ bool in_failover; ++ bool kill_session; ++ bool with_old_binlog; + } LEX_MASTER_INFO; + + +diff -r 66cc9e0a6768 sql/sql_parse.cc +--- a/sql/sql_parse.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_parse.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -402,6 +402,15 @@ + passwd_len ? "yes": "no", + thd->main_security_ctx.master_access, + (thd->db ? thd->db : "*none*"))); ++ ++ /* If we are in failover mode, reject all non-super user connections. */ ++ if (is_in_failover() && ++ !(thd->main_security_ctx.master_access & SUPER_ACL)) { ++ net_send_error(thd, ER_SPECIFIC_ACCESS_DENIED_ERROR, ++ "super-user only during failover"); ++ DBUG_RETURN(-1); ++ } ++ + + if (check_count) + { +@@ -3470,6 +3479,22 @@ + else + res = load_master_data(thd); + break; ++ ++ case SQLCOM_MAKE_MASTER: ++ { ++ thd_proc_info(thd, "Making master"); ++ ++ if (check_global_access(thd, SUPER_ACL)) ++ goto error; ++ res = make_master(thd, NULL, NULL, &lex->mi); ++ if (res == 0) { ++ // TODO -- wei is this OK, setting it to NULL? ++ thd_proc_info(thd, 0); ++ send_ok(thd); ++ } ++ break; ++ } ++ + #endif /* HAVE_REPLICATION */ + #ifdef HAVE_NDBCLUSTER_DB + case SQLCOM_SHOW_NDBCLUSTER_STATUS: +diff -r 66cc9e0a6768 sql/sql_repl.cc +--- a/sql/sql_repl.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_repl.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -20,11 +20,19 @@ + #include "log_event.h" + #include <my_dir.h> + ++extern pthread_mutex_t LOCK_failover_master; ++extern bool failover_deny_access; ++ + int max_binlog_dump_events = 0; // unlimited + my_bool opt_sporadic_binlog_dump_fail = 0; + #ifndef DBUG_OFF + static int binlog_dump_count = 0; + #endif ++ ++static int make_master_open_log(MYSQL_LOG *log, const char *opt_name, ++ bool no_auto_events, ulong max_size); ++static int set_in_failover(bool kill_session); ++static void clear_in_failover(void); + + /* + fake_rotate_event() builds a fake (=which does not exist physically in any +@@ -255,7 +263,7 @@ + bool purge_master_logs(THD* thd, const char* to_log) + { + char search_file_name[FN_REFLEN]; +- if (!mysql_bin_log.is_open()) ++ if (!mysql_bin_log.is_log_open()) + { + send_ok(thd); + return FALSE; +@@ -308,6 +316,44 @@ + return error; + } + ++/* Show processlist command dump the binlog state. ++ * ++ * Input: ++ * output_info - (OUT) the output proc_info ++ * output_len - (IN) output proc_info's length ++ * thd - (IN) the thread ++ * input_msg - (IN) the input proc_info ++ * log_file_name - (IN) binlog file name ++ * log_pos - (IN) binlog position ++ */ ++static void processlist_show_binlog_state(char *output_info, ++ int output_len, ++ THD *thd, ++ const char *input_msg, ++ const char *log_file_name, ++ my_off_t log_pos) { ++ DBUG_ENTER("processlist_show_binlog_state"); ++ ++ /* Point to input_msg in case "show processlist" access it before the copy ++ * is finished. ++ */ ++ thd_proc_info(thd, input_msg); ++ ++ if (snprintf(output_info, output_len, "%s :%s:%lld:", input_msg, ++ log_file_name + dirname_length(log_file_name), ++ log_pos) > 0) { ++ thd_proc_info(thd, output_info); ++ } ++ ++ DBUG_VOID_RETURN; ++} ++ ++static void repl_cleanup(ushort flags) { ++ if (flags & BINLOG_MIRROR_CLIENT) { ++ /* One less mirror binlog client. */ ++ thread_safe_sub(rpl_mirror_binlog_clients, 1, &LOCK_stats); ++ } ++} + + /* + TODO: Clean up loop to only have one call to send_file() +@@ -319,6 +365,11 @@ + LOG_INFO linfo; + char *log_file_name = linfo.log_file_name; + char search_file_name[FN_REFLEN], *name; ++ ++ /* This buffer should be enough for "comments + :file_name:file_pos:". */ ++ char binlog_state_msg[FN_REFLEN + 100]; ++ int binlog_state_msg_len = FN_REFLEN + 100; ++ + IO_CACHE log; + File file = -1; + String* packet = &thd->packet; +@@ -335,6 +386,15 @@ + + bzero((char*) &log,sizeof(log)); + ++ sql_print_information("Start %s binlog_dump to slave_server(%d), pos(%s, %lu)", ++ "asynchronous", ++ thd->server_id, log_ident, (ulong)pos); ++ ++ if (flags & BINLOG_MIRROR_CLIENT) { ++ /* One more mirror binlog clients. */ ++ thread_safe_increment(rpl_mirror_binlog_clients, &LOCK_stats); ++ } ++ + #ifndef DBUG_OFF + if (opt_sporadic_binlog_dump_fail && (binlog_dump_count++ % 2)) + { +@@ -344,7 +404,7 @@ + } + #endif + +- if (!mysql_bin_log.is_open()) ++ if (!mysql_bin_log.is_log_open()) + { + errmsg = "Binary log is not open"; + my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; +@@ -529,6 +589,12 @@ + } + #endif + ++ /* Update the binlog sending state. */ ++ processlist_show_binlog_state( ++ binlog_state_msg, binlog_state_msg_len, thd, ++ "Send binlog events to slave", ++ log_file_name, pos); ++ + if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT) + { + binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] & +@@ -634,6 +700,13 @@ + } + if (!thd->killed) + { ++ /* Update the binlog sending state. */ ++ processlist_show_binlog_state( ++ binlog_state_msg, binlog_state_msg_len, thd, ++ "Has sent all binlog to slave; " ++ "waiting for binlog to be updated", ++ log_file_name, pos); ++ + /* Note that the following call unlocks lock_log */ + mysql_bin_log.wait_for_update(thd, 0); + } +@@ -650,7 +723,12 @@ + + if (read_packet) + { +- thd_proc_info(thd, "Sending binlog event to slave"); ++ // thd_proc_info(thd, "Sending binlog event to slave"); ++ /* Update the binlog sending state. */ ++ processlist_show_binlog_state(binlog_state_msg, ++ binlog_state_msg_len, thd, ++ "Sending binlog event to slave", ++ log_file_name, pos); + if (my_net_write(net, (char*)packet->ptr(), packet->length()) ) + { + errmsg = "Failed on my_net_write()"; +@@ -685,10 +763,21 @@ + } + else + { ++ char old_log_file_name[FN_REFLEN]; + bool loop_breaker = 0; + /* need this to break out of the for loop from switch */ + +- thd_proc_info(thd, "Finished reading one binlog; switching to next binlog"); ++ // thd_proc_info(thd, "Finished reading one binlog; switching to next binlog"); ++ /* Update the binlog sending state. */ ++ processlist_show_binlog_state( ++ binlog_state_msg, binlog_state_msg_len, thd, ++ "Finished reading one binlog; switching to next binlog", ++ log_file_name, pos); ++ ++ /* Keep the old fileename. */ ++ strmake(old_log_file_name, log_file_name, ++ sizeof(old_log_file_name) - 1); ++ + switch (mysql_bin_log.find_next_log(&linfo, 1)) { + case LOG_INFO_EOF: + loop_breaker = (flags & BINLOG_DUMP_NON_BLOCK); +@@ -706,6 +795,16 @@ + + end_io_cache(&log); + (void) my_close(file, MYF(MY_WME)); ++ ++ /* A sanity check that we can not serve the same binlog twice because ++ * the filenames are stored in a .index file. ++ */ ++ if (strcmp(old_log_file_name, log_file_name) >= 0) { ++ errmsg = "Re-serving an already served binlog file."; ++ my_errno = ER_MASTER_FATAL_ERROR_READING_BINLOG; ++ goto err; ++ } ++ + + /* + Call fake_rotate_event() in case the previous log (the one which +@@ -733,6 +832,8 @@ + end_io_cache(&log); + (void)my_close(file, MYF(MY_WME)); + ++ repl_cleanup(flags); ++ + send_eof(thd); + thd_proc_info(thd, "Waiting to finalize termination"); + pthread_mutex_lock(&LOCK_thread_count); +@@ -743,6 +844,7 @@ + err: + thd_proc_info(thd, "Waiting to finalize termination"); + end_io_cache(&log); ++ repl_cleanup(flags); + /* + Exclude iteration through thread list + this is needed for purge_logs() - it will iterate through +@@ -1316,7 +1418,7 @@ + Format_description_log_event *description_event= new + Format_description_log_event(3); /* MySQL 4.0 by default */ + +- if (mysql_bin_log.is_open()) ++ if (mysql_bin_log.is_log_open()) + { + LEX_MASTER_INFO *lex_mi= &thd->lex->mi; + SELECT_LEX_UNIT *unit= &thd->lex->unit; +@@ -1456,7 +1558,7 @@ + DBUG_RETURN(TRUE); + protocol->prepare_for_resend(); + +- if (mysql_bin_log.is_open()) ++ if (mysql_bin_log.is_log_open()) + { + LOG_INFO li; + mysql_bin_log.get_current_log(&li); +@@ -1497,7 +1599,7 @@ + Protocol *protocol= thd->protocol; + DBUG_ENTER("show_binlogs"); + +- if (!mysql_bin_log.is_open()) ++ if (!mysql_bin_log.is_log_open()) + { + my_message(ER_NO_BINARY_LOGGING, ER(ER_NO_BINARY_LOGGING), MYF(0)); + return 1; +@@ -1606,6 +1708,235 @@ + DBUG_RETURN(0); + } + ++ ++/* make_master: Make the current database a primary and starts the ++ * binlog logging for all updates. ++ * ++ * The function handles the following sql commands: ++ * . MAKE MASTER MASTER_LOG_FILE='replication_log', MASTER_SERVER_ID=1, ++ * [WITH BINLOG]; ++ * . MAKE MASTER MASTER_LOG_FILE='replication_log', MASTER_SERVER_ID=1, ++ * INDEX='replication_log.index' [WITH BINLOG]; ++ * . MAKE MASTER REVOKE SESSION; ++ * . MAKE MASTER REVOKE SESSION WITH KILL; ++ * . MAKE MASTER GRANT SESSION; ++ * ++ * Args: ++ * thd - the current thread ++ * binlog_name - binlog's filename ++ * binlog_indexname - binlog index's filename ++ * mi - master info struct containing binlog name ++ * (set when we enable master during runtime) ++ * ++ * Return: ++ * 0 : success ++ * -1 : failure ++ */ ++int make_master(THD* thd, ++ const char *binlog_name, ++ const char *binlog_indexname, ++ const LEX_MASTER_INFO* mi) { ++ int error = 0; ++ ++ DBUG_ENTER("make_master"); ++ /* In two mode, we enable the binlog: ++ * . !mi - LEX is not provided; this is called from startup time ++ * . mi->log_file_name - binlog is specified in the command ++ */ ++ if (!mi || mi->log_file_name) { ++ /* Get the mutex */ ++ VOID(pthread_mutex_lock(&LOCK_failover_master)); ++ ++ /* If the binlog is already opened, we issue an error. We reuse one ++ * existing error, which might not be fully accurate. ++ */ ++ if (mysql_bin_log.is_log_open()) { ++ my_error(ER_MASTER_INFO, MYF(0)); ++ sql_print_error("Replication master log is already open: cannot " ++ "make another master!"); ++ error = -1; ++ } else { ++ if (!mi) { ++ /* This opening happens at mysql startup time. */ ++ if (make_master_open_log(&mysql_bin_log, binlog_name, ++ 0, max_binlog_size) != 0) { ++ error = -1; ++ } ++ } else { ++ /* This opening happens during mysql runtime, which is mostly ++ * requested to do failover. ++ */ ++ ++ error = -1; ++ if (!is_in_failover()) { ++ sql_print_error( ++ "\"make master\" runs only in failover mode. " ++ "Please run \"make master revoke session (with kill)\""); ++ } else if (strlen(mi->log_file_name) == 0) { ++ sql_print_error("Master log filename is not specified correctly."); ++ } else if (!mi->server_id || mi->server_id == MASTER_INFO_SERVER_ID) { ++ sql_print_error("\"make master\": invalid server_id(%d)", ++ mi->server_id); ++ } else { ++ /* Open the new log files and delete all existing ones to avoid ++ * conflicts. ++ */ ++ uint32 old_server_id = server_id; ++ char *binlog_name = NULL; ++ ++ /* Set the global master server id. ++ * We would not change server id for all connection threads. ++ * All non-super sessions should be blocked by revoke sessions. ++ * Super-user sessions are responsible for their own operations. ++ */ ++ server_id = mi->server_id; ++ thd->server_id = mi->server_id; ++ ++ if (!(binlog_name = my_strdup(mi->log_file_name, MYF(0))) || ++ make_master_open_index(&binlog_name, mi->log_index_name) != 0 || ++ make_master_open_log(&mysql_bin_log, binlog_name, ++ 0, max_binlog_size) != 0) { ++ sql_print_error("Open master logfile failed."); ++ thd->server_id = old_server_id; ++ server_id = old_server_id; ++ } else if (!mi->with_old_binlog && ++ mysql_bin_log.reset_logs(thd) != 0) { ++ sql_print_error("Cleanup existing master logfiles failed."); ++ thd->server_id = old_server_id; ++ server_id = old_server_id; ++ } else { ++ error = 0; ++ } ++ } ++ if (error == -1) ++ my_error(ER_MASTER_INFO, MYF(0)); ++ } ++ } ++ ++ if (error == 0) { ++ /* indicates that binlog is enabled now */ ++ using_update_log = 1; ++ } else if (mysql_bin_log.is_open()) { ++ mysql_bin_log.close(LOG_CLOSE_INDEX); ++ } ++ ++ /* Release the mutex */ ++ VOID(pthread_mutex_unlock(&LOCK_failover_master)); ++ } else { ++ /* The following actions are related to session management during ++ * failover operation. We do not want some sessions come in ++ * during failover and make updates. ++ * This is invoked for command: MAKE MASTER GRANT/REVOKE SESSION; ++ */ ++ if (mi->in_failover) { ++ set_in_failover(mi->kill_session); ++ } else { ++ clear_in_failover(); ++ } ++ } ++ ++ DBUG_RETURN(error); ++} ++ ++static int make_master_open_log(MYSQL_LOG *log, ++ const char *opt_name, ++ bool no_auto_events, ++ ulong max_size) { ++ char tmp[FN_REFLEN]; ++ ++ // get rid of extension ++ char *p = fn_ext(opt_name); ++ uint length=(uint) (p-opt_name); ++ strmake(tmp,opt_name,min(length,FN_REFLEN)); ++ opt_name=tmp; ++ ++ return log->open(opt_name, LOG_BIN, NULL, WRITE_CACHE, 0, ++ max_size, 0); ++} ++ ++int make_master_open_index(char **binlog_name, ++ const char *binlog_indexname) { ++ char buf[FN_REFLEN]; ++ const char *ln; ++ DBUG_ENTER("make_master_open_index"); ++ ++ ln= mysql_bin_log.generate_name(*binlog_name, "-bin", 1, buf); ++ if (!(*binlog_name) && !binlog_indexname) { ++ /* ++ User didn't give us info to name the binlog index file. ++ Picking `hostname`-bin.index like did in 4.x, causes replication to ++ fail if the hostname is changed later. So, we would like to instead ++ require a name. But as we don't want to break many existing setups, we ++ only give warning, not error. ++ */ ++ sql_print_warning("No argument was provided to --log-bin, and " ++ "--log-bin-index was not used; so replication " ++ "may break when this MySQL server acts as a " ++ "master and has his hostname changed!! Please " ++ "use '--log-bin=%s' to avoid this problem.", ln); ++ } ++ if (ln == buf) { ++ my_free(*binlog_name, MYF(MY_ALLOW_ZERO_PTR)); ++ *binlog_name = my_strdup(buf, MYF(0)); ++ } ++ if (mysql_bin_log.open_index_file(binlog_indexname, ln) != 0) { ++ DBUG_RETURN(-1); ++ } ++ ++ /* ++ Used to specify which type of lock we need to use for queries of type ++ INSERT ... SELECT. This will change when we have row level logging. ++ */ ++ using_update_log=1; ++ ++ DBUG_RETURN(0); ++} ++ ++/* Set the status indicating that we are in failover and deny all non-super ++ * user access. ++ * ++ * Args: ++ * kill_session - kill all non-super sessions if specified ++ * ++ * Return: ++ * 0 - success ++ * -1 - failure (caused by not killing all sessions) ++ */ ++static int set_in_failover(bool kill_session) { ++ failover_deny_access = 1; ++ ++ if (kill_session) { ++ /* If kill session option is specified, we need to kill all non-super ++ * user sessions. ++ */ ++ THD *kill_thd; ++ ++ uint error=ER_NO_SUCH_THREAD; ++ pthread_mutex_lock(&LOCK_thread_count); // For unlink from list ++ I_List_iterator<THD> it(threads); ++ while ((kill_thd=it++)) { ++ if (!(kill_thd->main_security_ctx.master_access & SUPER_ACL)) { ++ pthread_mutex_lock(&kill_thd->LOCK_delete); // Lock from delete ++ ++ /* ask the thread to die */ ++ kill_thd->awake(THD::KILL_CONNECTION); ++ pthread_mutex_unlock(&kill_thd->LOCK_delete); ++ } ++ } ++ pthread_mutex_unlock(&LOCK_thread_count); ++ } ++ return 0; ++} ++ ++static void clear_in_failover(void) { ++ failover_deny_access = 0; ++} ++ ++bool is_in_failover(void) { ++ return failover_deny_access; ++} ++ ++ + #endif /* HAVE_REPLICATION */ + + +diff -r 66cc9e0a6768 sql/sql_repl.h +--- a/sql/sql_repl.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_repl.h Thu Dec 04 21:46:15 2008 -0800 +@@ -38,6 +38,10 @@ + int start_slave(THD* thd, MASTER_INFO* mi, bool net_report); + int stop_slave(THD* thd, MASTER_INFO* mi, bool net_report); + bool change_master(THD* thd, MASTER_INFO* mi); ++int make_master(THD* thd, const char *binlog_name, ++ const char *binlog_indexname, const LEX_MASTER_INFO* mi); ++int make_master_open_index(char **binlog_name, const char *binlog_indexname); ++bool is_in_failover(void); + bool mysql_show_binlog_events(THD* thd); + int cmp_master_pos(const char* log_file_name1, ulonglong log_pos1, + const char* log_file_name2, ulonglong log_pos2); +diff -r 66cc9e0a6768 sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_yacc.yy Thu Dec 04 21:46:15 2008 -0800 +@@ -735,6 +735,7 @@ + %token LOOP_SYM + %token LOW_PRIORITY + %token LT ++%token MAKE_SYM + %token MAKE_SET_SYM + %token MASTER_CONNECT_RETRY_SYM + %token MASTER_HOST_SYM +@@ -1167,7 +1168,7 @@ + query verb_clause create change select do drop insert replace insert2 + insert_values update delete truncate rename + show describe load alter optimize keycache preload flush +- reset purge begin commit rollback savepoint release ++ make reset purge begin commit rollback savepoint release + slave master_def master_defs master_file_def slave_until_opts + repair restore backup analyze check start checksum + field_list field_list_item field_spec kill column_def key_def +@@ -1301,6 +1302,7 @@ + | kill + | load + | lock ++ | make + | optimize + | keycache + | preload +@@ -1428,6 +1430,56 @@ + master_defs + {} + ; ++ ++/* make master */ ++make: ++ MAKE_SYM MASTER_SYM ++ { ++ LEX *lex = Lex; ++ lex->sql_command = SQLCOM_MAKE_MASTER; ++ bzero((char*) &lex->mi, sizeof(lex->mi)); ++ } ++ make_master_defs ++ { ++ } ++ ; ++ ++make_master_defs: ++ MASTER_LOG_FILE_SYM EQ TEXT_STRING ',' MASTER_SERVER_ID_SYM EQ ulong_num ++ { ++ Lex->mi.log_file_name = $3.str; ++ Lex->mi.server_id = $7; ++ } ++ make_master_with_defs {} ++ | MASTER_LOG_FILE_SYM EQ TEXT_STRING ',' MASTER_SERVER_ID_SYM EQ ulong_num ',' INDEX_SYM EQ TEXT_STRING ++ { ++ Lex->mi.log_file_name = $3.str; ++ Lex->mi.server_id = $7; ++ Lex->mi.log_index_name = $11.str; ++ } ++ make_master_with_defs {} ++ | GRANT SESSION_SYM ++ { ++ Lex->mi.in_failover = 0; ++ } ++ | REVOKE SESSION_SYM ++ { ++ Lex->mi.in_failover = 1; ++ } ++ | REVOKE SESSION_SYM WITH KILL_SYM ++ { ++ Lex->mi.in_failover = 1; ++ Lex->mi.kill_session = 1; ++ } ++ ; ++ ++make_master_with_defs: ++ /* empty */ {} ++ | WITH BINLOG_SYM ++ { ++ /* All old binlogs will be kept after "make master" command. */ ++ Lex->mi.with_old_binlog = 1; ++ } + + master_defs: + master_def +@@ -8396,6 +8448,7 @@ + | HANDLER_SYM {} + | HELP_SYM {} + | LANGUAGE_SYM {} ++ | MAKE_SYM {} + | NO_SYM {} + | OPEN_SYM {} + | PREPARE_SYM {} diff --git a/percona/5.0.75-b12/mysqld_safe_syslog.patch b/percona/5.0.75-b12/mysqld_safe_syslog.patch new file mode 100644 index 0000000..a493a29 --- /dev/null +++ b/percona/5.0.75-b12/mysqld_safe_syslog.patch @@ -0,0 +1,127 @@ +diff -r d91edeb58b50 patch_info/mysqld_safe_syslog.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/mysqld_safe_syslog.info Mon Sep 01 21:58:00 2008 -0700 +@@ -0,0 +1,6 @@ ++File=mysqld_safe_syslog.patch ++Name=Patch allows redirect output of error.log to syslog-ng ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment=Ported from Debian +diff -r d91edeb58b50 scripts/mysqld_safe.sh +--- a/scripts/mysqld_safe.sh Mon Sep 01 21:57:21 2008 -0700 ++++ b/scripts/mysqld_safe.sh Mon Sep 01 21:58:00 2008 -0700 +@@ -10,12 +10,16 @@ + # mysql.server works by first doing a cd to the base directory and from there + # executing mysqld_safe + +-KILL_MYSQLD=1; + MYSQLD= + + trap '' 1 2 3 15 # we shouldn't let anyone kill us + + umask 007 ++ ++KILL_MYSQLD=1; ++ ++# This command can be used as pipe to syslog. With "-s" it also logs to stderr. ++ERR_LOGGER="logger -p daemon.err -t mysqld_safe -i" + + defaults= + case "$1" in +@@ -177,7 +181,6 @@ + + # these rely on $DATADIR by default, so we'll set them later on + pid_file= +-err_log= + + # Get first arguments from the my.cnf file, groups [mysqld] and [mysqld_safe] + # and then merge with the command line arguments +@@ -245,7 +248,6 @@ + * ) pid_file="$DATADIR/$pid_file" ;; + esac + fi +-test -z "$err_log" && err_log=$DATADIR/`@HOSTNAME@`.err + + if test -n "$mysql_unix_port" + then +@@ -315,8 +317,6 @@ + then + USER_OPTION="--user=$user" + fi +- # If we are root, change the err log to the right user. +- touch $err_log; chown $user $err_log + if test -n "$open_files" + then + ulimit -n $open_files +@@ -341,18 +341,16 @@ + then + if @FIND_PROC@ + then # The pid contains a mysqld process +- echo "A mysqld process already exists" +- echo "A mysqld process already exists at " `date` >> $err_log ++ echo "A mysqld process already exists" | $ERR_LOGGER -s + exit 1 + fi + fi + rm -f $pid_file + if test -f $pid_file + then +- echo "Fatal error: Can't remove the pid file: $pid_file" +- echo "Fatal error: Can't remove the pid file: $pid_file at " `date` >> $err_log +- echo "Please remove it manually and start $0 again" +- echo "mysqld daemon not started" ++ echo "Fatal error: Can't remove the pid file: $pid_file" | $ERR_LOGGER -s ++ echo "Please remove it manually and start $0 again" | $ERR_LOGGER -s ++ echo "mysqld daemon not started" | $ERR_LOGGER -s + exit 1 + fi + fi +@@ -377,15 +375,15 @@ + # ulimit -n 256 > /dev/null 2>&1 # Fix for BSD and FreeBSD systems + #fi + +-echo "`date +'%y%m%d %H:%M:%S mysqld started'`" >> $err_log ++echo "started" | $ERR_LOGGER -s + while true + do + rm -f $safe_mysql_unix_port $pid_file # Some extra safety + if test -z "$args" + then +- $NOHUP_NICENESS $ledir/$MYSQLD $defaults --basedir=$MY_BASEDIR_VERSION --datadir=$DATADIR $USER_OPTION --pid-file=$pid_file @MYSQLD_DEFAULT_SWITCHES@ >> $err_log 2>&1 ++ $NOHUP_NICENESS $ledir/$MYSQLD $defaults --basedir=$MY_BASEDIR_VERSION --datadir=$DATADIR $USER_OPTION --pid-file=$pid_file @MYSQLD_DEFAULT_SWITCHES@ 2>&1 | $ERR_LOGGER -t mysqld + else +- eval "$NOHUP_NICENESS $ledir/$MYSQLD $defaults --basedir=$MY_BASEDIR_VERSION --datadir=$DATADIR $USER_OPTION --pid-file=$pid_file @MYSQLD_DEFAULT_SWITCHES@ $args >> $err_log 2>&1" ++ eval "$NOHUP_NICENESS $ledir/$MYSQLD $defaults --basedir=$MY_BASEDIR_VERSION --datadir=$DATADIR $USER_OPTION --pid-file=$pid_file @MYSQLD_DEFAULT_SWITCHES@ $args 2>&1 | $ERR_LOGGER -t mysqld" + fi + if test ! -f $pid_file # This is removed if normal shutdown + then +@@ -402,7 +400,7 @@ + # kill -9 is used or the process won't react on the kill. + numofproces=`ps xaww | grep -v "grep" | grep "$ledir/$MYSQLD\>" | grep -c "pid-file=$pid_file"` + +- echo -e "\nNumber of processes running now: $numofproces" | tee -a $err_log ++ echo -e "\nNumber of processes running now: $numofproces" | $ERR_LOGGER -s + I=1 + while test "$I" -le "$numofproces" + do +@@ -415,16 +413,14 @@ + # echo "TEST $I - $T **" + if kill -9 $T + then +- echo "$MYSQLD process hanging, pid $T - killed" | tee -a $err_log ++ echo "$MYSQLD process hanging, pid $T - killed" | $ERR_LOGGER -s + else + break + fi + I=`expr $I + 1` + done + fi +- echo "`date +'%y%m%d %H:%M:%S'` mysqld restarted" | tee -a $err_log ++ echo "restarted" | $ERR_LOGGER -s + done + +-echo "`date +'%y%m%d %H:%M:%S'` mysqld ended" | tee -a $err_log +-echo "" | tee -a $err_log +- ++echo "ended" | $ERR_LOGGER -s diff --git a/percona/5.0.75-b12/show_patches.patch b/percona/5.0.75-b12/show_patches.patch new file mode 100644 index 0000000..7f1d431 --- /dev/null +++ b/percona/5.0.75-b12/show_patches.patch @@ -0,0 +1,288 @@ +diff -r c3e57b0c22c4 patch_info/show_patches.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/show_patches.info Mon Dec 22 00:25:06 2008 -0800 +@@ -0,0 +1,6 @@ ++File=show_patches.patch ++Name=SHOW PATCHES ++Version=1.0 ++Author=Jeremy Cole ++License=N/A ++Comment +diff -r c3e57b0c22c4 sql/Makefile.am +--- a/sql/Makefile.am Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/Makefile.am Mon Dec 22 00:25:06 2008 -0800 +@@ -118,7 +118,7 @@ + -DSHAREDIR="\"$(MYSQLSHAREdir)\"" \ + @DEFS@ + +-BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h ++BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h patch_info.h + EXTRA_DIST = $(BUILT_SOURCES) nt_servc.cc nt_servc.h \ + message.mc message.h message.rc MSG00001.bin \ + examples/CMakeLists.txt CMakeLists.txt \ +@@ -175,6 +175,8 @@ + udf_example_la_SOURCES= udf_example.c + udf_example_la_LDFLAGS= -module -rpath $(pkglibdir) + ++patch_info.h: patch_info.h.pl ++ $(PERL) $< > $@ + + # Don't update the files from bitkeeper + %::SCCS/s.% +diff -r c3e57b0c22c4 sql/Makefile.in +--- a/sql/Makefile.in Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/Makefile.in Mon Dec 22 00:25:06 2008 -0800 +@@ -561,7 +561,7 @@ + gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) + mysql_tzinfo_to_sql_SOURCES = mysql_tzinfo_to_sql.cc + mysql_tzinfo_to_sql_LDADD = @MYSQLD_EXTRA_LDFLAGS@ $(LDADD) $(CXXLDFLAGS) +-BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h ++BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h patch_info.h + EXTRA_DIST = $(BUILT_SOURCES) nt_servc.cc nt_servc.h \ + message.mc message.h message.rc MSG00001.bin \ + examples/CMakeLists.txt CMakeLists.txt \ +@@ -1237,6 +1237,9 @@ + ./gen_lex_hash$(EXEEXT) > $@-t + $(MV) $@-t $@ + ++patch_info.h: patch_info.h.pl ++ $(PERL) $< > $@ ++ + # Don't update the files from bitkeeper + %::SCCS/s.% + # Tell versions [3.59,3.63) of GNU make to not export all variables. +diff -r c3e57b0c22c4 sql/lex.h +--- a/sql/lex.h Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/lex.h Mon Dec 22 00:25:06 2008 -0800 +@@ -367,6 +367,7 @@ + { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, + { "PARTIAL", SYM(PARTIAL)}, + { "PASSWORD", SYM(PASSWORD)}, ++ { "PATCHES", SYM(PATCHES)}, + { "PHASE", SYM(PHASE_SYM)}, + { "POINT", SYM(POINT_SYM)}, + { "POLYGON", SYM(POLYGON)}, +diff -r c3e57b0c22c4 sql/mysql_priv.h +--- a/sql/mysql_priv.h Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/mysql_priv.h Mon Dec 22 00:25:06 2008 -0800 +@@ -968,6 +968,7 @@ + int mysqld_show_status(THD *thd); + int mysqld_show_variables(THD *thd,const char *wild); + bool mysqld_show_storage_engines(THD *thd); ++bool mysqld_show_patches(THD *thd); + bool mysqld_show_privileges(THD *thd); + bool mysqld_show_column_types(THD *thd); + bool mysqld_help (THD *thd, const char *text); +diff -r c3e57b0c22c4 sql/patch_info.h.pl +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/sql/patch_info.h.pl Mon Dec 22 00:25:06 2008 -0800 +@@ -0,0 +1,65 @@ ++use strict; ++ ++my $patch_info_path = '../patch_info'; ++my $file = ''; ++my $output = ''; ++ ++ ++if (opendir(PATCH_DIR, $patch_info_path)) ++{ ++ while ((my $file = readdir(PATCH_DIR))) ++ { ++ open(PATCH_FILE, "<$patch_info_path/$file") || die("Unable to open $patch_info_path/$file ($!)"); ++ my %fields; ++ ++ if ($file =~ /^\./) ++ { ++ next; ++ } ++ ++ while (<PATCH_FILE>) ++ { ++ chomp; ++ ++ my ($key, $value) = split(/\s*=\s*/); ++ $fields{lc($key)} = $value; ++ } ++ ++ $output .= "{\"$fields{'file'}\", \"$fields{'name'}\", \"$fields{'version'}\", \"$fields{'author'}\", \"$fields{'license'}\",\"$fields{'comment'}\"},\n" ++ } ++} ++ ++print <<HEADER; ++ ++/* Copyright (C) 2002-2006 MySQL AB ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; version 2 of the License. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ ++ ++#ifdef USE_PRAGMA_INTERFACE ++#pragma interface /* gcc class implementation */ ++#endif ++ ++struct patch { ++ const char *file; ++ const char *name; ++ const char *version; ++ const char *author; ++ const char *license; ++ const char *comment; ++}patches[] = { ++$output ++{NULL, NULL, NULL, NULL} ++}; ++ ++HEADER +diff -r c3e57b0c22c4 sql/sp_head.cc +--- a/sql/sp_head.cc Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sp_head.cc Mon Dec 22 00:25:06 2008 -0800 +@@ -191,6 +191,7 @@ + case SQLCOM_SHOW_MUTEX_STATUS: + case SQLCOM_SHOW_NEW_MASTER: + case SQLCOM_SHOW_OPEN_TABLES: ++ case SQLCOM_SHOW_PATCHES: + case SQLCOM_SHOW_PRIVILEGES: + case SQLCOM_SHOW_PROCESSLIST: + case SQLCOM_SHOW_SLAVE_HOSTS: +diff -r c3e57b0c22c4 sql/sql_lex.h +--- a/sql/sql_lex.h Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_lex.h Mon Dec 22 00:25:06 2008 -0800 +@@ -95,6 +95,7 @@ + SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER, + SQLCOM_SHOW_PROC_CODE, SQLCOM_SHOW_FUNC_CODE, + SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES, ++ SQLCOM_SHOW_PATCHES, + + /* + When a command is added here, be sure it's also added in mysqld.cc +diff -r c3e57b0c22c4 sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_parse.cc Mon Dec 22 00:25:06 2008 -0800 +@@ -3947,6 +3947,9 @@ + break; + case SQLCOM_SHOW_STORAGE_ENGINES: + res= mysqld_show_storage_engines(thd); ++ break; ++ case SQLCOM_SHOW_PATCHES: ++ res= mysqld_show_patches(thd); + break; + case SQLCOM_SHOW_PRIVILEGES: + res= mysqld_show_privileges(thd); +diff -r c3e57b0c22c4 sql/sql_prepare.cc +--- a/sql/sql_prepare.cc Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_prepare.cc Mon Dec 22 00:25:06 2008 -0800 +@@ -1790,6 +1790,7 @@ + case SQLCOM_SHOW_DATABASES: + case SQLCOM_SHOW_PROCESSLIST: + case SQLCOM_SHOW_STORAGE_ENGINES: ++ case SQLCOM_SHOW_PATCHES: + case SQLCOM_SHOW_PRIVILEGES: + case SQLCOM_SHOW_COLUMN_TYPES: + case SQLCOM_SHOW_STATUS: +diff -r c3e57b0c22c4 sql/sql_show.cc +--- a/sql/sql_show.cc Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_show.cc Mon Dec 22 00:25:06 2008 -0800 +@@ -22,6 +22,7 @@ + #include "sp.h" + #include "sp_head.h" + #include "sql_trigger.h" ++#include "patch_info.h" + #include <my_dir.h> + + #ifdef HAVE_BERKELEY_DB +@@ -45,6 +46,47 @@ + static int + view_store_create_info(THD *thd, TABLE_LIST *table, String *buff); + bool schema_table_store_record(THD *thd, TABLE *table); ++ ++/*************************************************************************** ++** List patches built into this release ++***************************************************************************/ ++ ++bool mysqld_show_patches(THD *thd) ++{ ++ List<Item> field_list; ++ int i = 0; ++ Protocol *protocol= thd->protocol; ++ DBUG_ENTER("mysqld_show_patches"); ++ ++ field_list.push_back(new Item_empty_string("File", 255)); ++ field_list.push_back(new Item_empty_string("Name", 50)); ++ field_list.push_back(new Item_empty_string("Version", 10)); ++ field_list.push_back(new Item_empty_string("Author", 50)); ++ field_list.push_back(new Item_empty_string("License", 50)); ++ field_list.push_back(new Item_empty_string("Comment", 32)); ++ ++ if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) ++ DBUG_RETURN(TRUE); ++ ++ for (i = 0; patches[i].file; i++) ++ { ++ protocol->prepare_for_resend(); ++ protocol->store(patches[i].file, system_charset_info); ++ protocol->store(patches[i].name, system_charset_info); ++ protocol->store(patches[i].version, system_charset_info); ++ protocol->store(patches[i].author, system_charset_info); ++ protocol->store(patches[i].license, system_charset_info); ++ protocol->store(patches[i].comment, system_charset_info); ++ ++ if (protocol->write()) ++ DBUG_RETURN(TRUE); ++ } ++ ++ ++ send_eof(thd); ++ DBUG_RETURN(FALSE); ++ ++} + + + /*************************************************************************** +diff -r c3e57b0c22c4 sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_yacc.yy Mon Dec 22 00:25:06 2008 -0800 +@@ -824,6 +824,7 @@ + %token PAGE_SYM + %token PARTIAL + %token PASSWORD ++%token PATCHES + %token PARAM_MARKER + %token PHASE_SYM + %token POINTFROMTEXT +@@ -8019,7 +8020,7 @@ + ; + + show_param: +- DATABASES wild_and_where ++ DATABASES wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SELECT; +@@ -8119,6 +8120,10 @@ + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES; + WARN_DEPRECATED("SHOW TABLE TYPES", "SHOW [STORAGE] ENGINES"); ++ } ++ | PATCHES ++ { ++ Lex->sql_command= SQLCOM_SHOW_PATCHES; + } + | opt_storage ENGINES_SYM + { +@@ -9554,6 +9559,7 @@ + | PAGE_SYM {} + | PARTIAL {} + | PASSWORD {} ++ | PATCHES {} + | PHASE_SYM {} + | POINT_SYM {} + | POLYGON {} diff --git a/percona/5.0.75-b12/split_buf_pool_mutex_fixed_optimistic_safe.patch b/percona/5.0.75-b12/split_buf_pool_mutex_fixed_optimistic_safe.patch new file mode 100644 index 0000000..016d667 --- /dev/null +++ b/percona/5.0.75-b12/split_buf_pool_mutex_fixed_optimistic_safe.patch @@ -0,0 +1,1305 @@ +diff -r 2e0c46e78b50 innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/buf/buf0buf.c Mon Dec 22 00:33:59 2008 -0800 +@@ -548,6 +548,19 @@ + mutex_create(&(buf_pool->mutex)); + mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL); + ++ mutex_create(&(buf_pool->flush_list_mutex)); ++ mutex_create(&(buf_pool->LRU_mutex)); ++ mutex_create(&(buf_pool->free_mutex)); ++ mutex_create(&(buf_pool->hash_mutex)); ++ mutex_set_level(&(buf_pool->flush_list_mutex), SYNC_NO_ORDER_CHECK); ++ mutex_set_level(&(buf_pool->LRU_mutex), SYNC_NO_ORDER_CHECK); ++ mutex_set_level(&(buf_pool->free_mutex), SYNC_NO_ORDER_CHECK); ++ mutex_set_level(&(buf_pool->hash_mutex), SYNC_NO_ORDER_CHECK); ++ ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); + mutex_enter(&(buf_pool->mutex)); + + if (srv_use_awe) { +@@ -723,6 +736,10 @@ + block->in_free_list = TRUE; + } + ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + mutex_exit(&(buf_pool->mutex)); + + if (srv_use_adaptive_hash_indexes) { +@@ -859,12 +876,12 @@ + if (buf_pool->freed_page_clock >= block->freed_page_clock + + 1 + (buf_pool->curr_size / 4)) { + +- mutex_enter(&buf_pool->mutex); ++ mutex_enter(&(buf_pool->LRU_mutex)); + /* There has been freeing activity in the LRU list: + best to move to the head of the LRU list */ + + buf_LRU_make_block_young(block); +- mutex_exit(&buf_pool->mutex); ++ mutex_exit(&(buf_pool->LRU_mutex)); + } + } + +@@ -880,7 +897,7 @@ + { + buf_block_t* block; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + block = buf_block_align(frame); + +@@ -888,7 +905,7 @@ + + buf_LRU_make_block_young(block); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + } + + /************************************************************************ +@@ -899,7 +916,7 @@ + /*===========*/ + buf_block_t* block) /* in, own: block to be freed */ + { +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + mutex_enter(&block->mutex); + +@@ -909,7 +926,7 @@ + + mutex_exit(&block->mutex); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + } + + /************************************************************************* +@@ -950,11 +967,11 @@ + { + buf_block_t* block; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ mutex_enter_fast(&(buf_pool->hash_mutex)); + + block = buf_page_hash_get(space, offset); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + return(block); + } +@@ -971,7 +988,7 @@ + { + buf_block_t* block; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ mutex_enter_fast(&(buf_pool->hash_mutex)); + + block = buf_page_hash_get(space, offset); + +@@ -979,7 +996,7 @@ + block->check_index_page_at_flush = FALSE; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + } + + /************************************************************************ +@@ -998,7 +1015,7 @@ + buf_block_t* block; + ibool is_hashed; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ mutex_enter_fast(&(buf_pool->hash_mutex)); + + block = buf_page_hash_get(space, offset); + +@@ -1008,7 +1025,7 @@ + is_hashed = block->is_hashed; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + return(is_hashed); + } +@@ -1050,7 +1067,7 @@ + { + buf_block_t* block; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ mutex_enter_fast(&(buf_pool->hash_mutex)); + + block = buf_page_hash_get(space, offset); + +@@ -1058,7 +1075,7 @@ + block->file_page_was_freed = TRUE; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + return(block); + } +@@ -1079,7 +1096,7 @@ + { + buf_block_t* block; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ mutex_enter_fast(&(buf_pool->hash_mutex)); + + block = buf_page_hash_get(space, offset); + +@@ -1087,7 +1104,7 @@ + block->file_page_was_freed = FALSE; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + return(block); + } +@@ -1166,26 +1183,33 @@ + buf_pool->n_page_gets++; + loop: + block = NULL; +- mutex_enter_fast(&(buf_pool->mutex)); ++ // mutex_enter_fast(&(buf_pool->mutex)); + + if (guess) { + block = buf_block_align(guess); + ++ mutex_enter(&block->mutex); + if ((offset != block->offset) || (space != block->space) + || (block->state != BUF_BLOCK_FILE_PAGE)) { + ++ mutex_exit(&block->mutex); + block = NULL; + } + } + + if (block == NULL) { ++ mutex_enter_fast(&(buf_pool->hash_mutex)); + block = buf_page_hash_get(space, offset); ++ if(block) { ++ mutex_enter(&block->mutex); ++ } ++ mutex_exit(&(buf_pool->hash_mutex)); + } + + if (block == NULL) { + /* Page not in buf_pool: needs to be read from file */ + +- mutex_exit(&(buf_pool->mutex)); ++ // mutex_exit(&(buf_pool->mutex)); + + if (mode == BUF_GET_IF_IN_POOL) { + +@@ -1204,7 +1228,7 @@ + goto loop; + } + +- mutex_enter(&block->mutex); ++ // mutex_enter(&block->mutex); + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + +@@ -1216,7 +1240,7 @@ + + if (mode == BUF_GET_IF_IN_POOL) { + /* The page is only being read to buffer */ +- mutex_exit(&buf_pool->mutex); ++ // mutex_exit(&buf_pool->mutex); + mutex_exit(&block->mutex); + + return(NULL); +@@ -1233,7 +1257,9 @@ + LRU list and we must put it to awe_LRU_free_mapped list once + mapped to a frame */ + ++ mutex_enter_fast(&(buf_pool->mutex)); + buf_awe_map_page_to_frame(block, TRUE); ++ mutex_exit(&buf_pool->mutex); + } + + #ifdef UNIV_SYNC_DEBUG +@@ -1241,7 +1267,7 @@ + #else + buf_block_buf_fix_inc(block); + #endif +- mutex_exit(&buf_pool->mutex); ++ // mutex_exit(&buf_pool->mutex); + + /* Check if this is the first access to the page */ + +@@ -1791,7 +1817,8 @@ + + ut_a(block); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); + mutex_enter(&block->mutex); + + if (fil_tablespace_deleted_or_being_deleted_in_mem(space, +@@ -1806,7 +1833,8 @@ + being deleted, or the page is already in buf_pool, return */ + + mutex_exit(&block->mutex); +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + buf_block_free(block); + +@@ -1821,10 +1849,14 @@ + ut_ad(block); + + buf_page_init(space, offset, block); ++ mutex_exit(&(buf_pool->hash_mutex)); + + /* The block must be put to the LRU list, to the old blocks */ + + buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */ ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ ++ mutex_enter(&(buf_pool->mutex)); /* for consistency about aio */ + + block->io_fix = BUF_IO_READ; + +@@ -1873,7 +1905,8 @@ + + free_block = buf_LRU_get_free_block(); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); + + block = buf_page_hash_get(space, offset); + +@@ -1884,7 +1917,8 @@ + block->file_page_was_freed = FALSE; + + /* Page can be found in buf_pool */ +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + buf_block_free(free_block); + +@@ -1907,6 +1941,7 @@ + mutex_enter(&block->mutex); + + buf_page_init(space, offset, block); ++ mutex_exit(&(buf_pool->hash_mutex)); + + /* The block must be put to the LRU list */ + buf_LRU_add_block(block, FALSE); +@@ -1918,7 +1953,7 @@ + #endif + buf_pool->n_pages_created++; + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + + mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); + +@@ -1932,7 +1967,7 @@ + ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE); + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + frame = block->frame; + +@@ -1968,6 +2003,7 @@ + { + ulint io_type; + ulint read_page_no; ++ ulint flush_type; + + buf_io_counter_t* io_counter; + ulint fold; +@@ -2050,9 +2086,6 @@ + } + } + +- mutex_enter(&(buf_pool->mutex)); +- mutex_enter(&block->mutex); +- + #ifdef UNIV_IBUF_DEBUG + ut_a(ibuf_count_get(block->space, block->offset) == 0); + #endif +@@ -2061,9 +2094,12 @@ + removes the newest lock debug record, without checking the thread + id. */ + +- block->io_fix = 0; +- + if (io_type == BUF_IO_READ) { ++ mutex_enter(&block->mutex); ++ mutex_enter(&(buf_pool->mutex)); ++ ++ block->io_fix = 0; ++ + /* NOTE that the call to ibuf may have moved the ownership of + the x-latch to this OS thread: do not let this confuse you in + debugging! */ +@@ -2094,6 +2130,8 @@ + } + } + ++ mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); + #ifdef UNIV_DEBUG + if (buf_debug_prints) { + fputs("Has read ", stderr); +@@ -2102,10 +2140,25 @@ + } else { + ut_ad(io_type == BUF_IO_WRITE); + ++ flush_type = block->flush_type; ++ if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */ ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ } ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&block->mutex); ++ mutex_enter(&(buf_pool->mutex)); ++ ++ block->io_fix = 0; ++ + /* Write means a flush operation: call the completion + routine in the flush system */ + + buf_flush_write_complete(block); ++ ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */ ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ } + + rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE); + /* io_counter here */ +@@ -2131,6 +2184,9 @@ + + buf_pool->n_pages_written++; + ++ mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); ++ + #ifdef UNIV_DEBUG + if (buf_debug_prints) { + fputs("Has written ", stderr); +@@ -2138,9 +2194,6 @@ + #endif /* UNIV_DEBUG */ + } + +- mutex_exit(&block->mutex); +- mutex_exit(&(buf_pool->mutex)); +- + #ifdef UNIV_DEBUG + if (buf_debug_prints) { + fprintf(stderr, "page space %lu page no %lu\n", +@@ -2168,11 +2221,11 @@ + freed = buf_LRU_search_and_free_block(100); + } + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + } + + /************************************************************************* +@@ -2191,10 +2244,22 @@ + ulint n_flush = 0; + ulint n_free = 0; + ulint n_page = 0; ++ ulint n_single_flush_tmp = 0; ++ ulint n_lru_flush_tmp = 0; ++ ulint n_list_flush_tmp = 0; + + ut_ad(buf_pool); + ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); ++ + mutex_enter(&(buf_pool->mutex)); ++ n_single_flush_tmp = buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]; ++ n_list_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LIST]; ++ n_lru_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LRU]; ++ mutex_exit(&(buf_pool->mutex)); + + for (i = 0; i < buf_pool->curr_size; i++) { + +@@ -2262,11 +2327,14 @@ + } + ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush); + +- ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush); +- ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush); +- ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush); ++ ut_a(n_single_flush_tmp == n_single_flush); ++ ut_a(n_list_flush_tmp == n_list_flush); ++ ut_a(n_lru_flush_tmp == n_lru_flush); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + ut_a(buf_LRU_validate()); + ut_a(buf_flush_validate()); +@@ -2298,7 +2366,9 @@ + index_ids = mem_alloc(sizeof(dulint) * size); + counts = mem_alloc(sizeof(ulint) * size); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + fprintf(stderr, + "buf_pool size %lu\n" +@@ -2351,7 +2421,9 @@ + } + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + for (i = 0; i < n_found; i++) { + index = dict_index_get_if_in_cache(index_ids[i]); +@@ -2386,8 +2458,6 @@ + ulint i; + ulint fixed_pages_number = 0; + +- mutex_enter(&(buf_pool->mutex)); +- + for (i = 0; i < buf_pool->curr_size; i++) { + + block = buf_pool_get_nth_block(buf_pool, i); +@@ -2403,7 +2473,6 @@ + } + } + +- mutex_exit(&(buf_pool->mutex)); + return fixed_pages_number; + } + #endif /* UNIV_DEBUG */ +@@ -2431,7 +2500,9 @@ + { + ulint ratio; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list)) + / (1 + UT_LIST_GET_LEN(buf_pool->LRU) +@@ -2439,7 +2510,9 @@ + + /* 1 + is there to avoid division by zero */ + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + return(ratio); + } +@@ -2459,6 +2532,9 @@ + ut_ad(buf_pool); + size = buf_pool->curr_size; + ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + mutex_enter(&(buf_pool->mutex)); + + if (srv_use_awe) { +@@ -2532,6 +2608,9 @@ + buf_pool->n_pages_written_old = buf_pool->n_pages_written; + buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped; + ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + mutex_exit(&(buf_pool->mutex)); + } + +@@ -2562,8 +2641,6 @@ + + ut_ad(buf_pool); + +- mutex_enter(&(buf_pool->mutex)); +- + for (i = 0; i < buf_pool->curr_size; i++) { + + block = buf_pool_get_nth_block(buf_pool, i); +@@ -2584,8 +2661,6 @@ + + mutex_exit(&block->mutex); + } +- +- mutex_exit(&(buf_pool->mutex)); + + return(TRUE); + } +@@ -2625,11 +2700,11 @@ + { + ulint len; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + len = UT_LIST_GET_LEN(buf_pool->free); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + return(len); + } +diff -r 2e0c46e78b50 innobase/buf/buf0flu.c +--- a/innobase/buf/buf0flu.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/buf/buf0flu.c Mon Dec 22 00:33:59 2008 -0800 +@@ -117,12 +117,14 @@ + ut_ad(mutex_own(&block->mutex)); + #endif /* UNIV_SYNC_DEBUG */ + if (block->state != BUF_BLOCK_FILE_PAGE) { ++ /* I permited not to own LRU_mutex.. */ ++/* + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: buffer block state %lu in the LRU list!\n", + (ulong)block->state); + ut_print_buf(stderr, (byte*)block, sizeof(buf_block_t)); +- ++*/ + return(FALSE); + } + +@@ -536,18 +538,20 @@ + ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST + || flush_type == BUF_FLUSH_SINGLE_PAGE); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); + + block = buf_page_hash_get(space, offset); + + ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE); + + if (!block) { +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + return(0); + } + + mutex_enter(&block->mutex); ++ mutex_enter(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + if (flush_type == BUF_FLUSH_LIST + && buf_flush_ready_for_flush(block, flush_type)) { +@@ -744,7 +748,7 @@ + high = fil_space_get_size(space); + } + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); + + for (i = low; i < high; i++) { + +@@ -778,7 +782,7 @@ + + mutex_exit(&block->mutex); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + /* Note: as we release the buf_pool mutex + above, in buf_flush_try_page we cannot be sure +@@ -789,14 +793,14 @@ + count += buf_flush_try_page(space, i, + flush_type); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); + } else { + mutex_exit(&block->mutex); + } + } + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + return(count); + } +@@ -849,7 +853,14 @@ + } + + (buf_pool->init_flush)[flush_type] = TRUE; ++ ++ mutex_exit(&(buf_pool->mutex)); + ++ if (flush_type == BUF_FLUSH_LRU) { ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ } ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ + for (;;) { + /* If we have flushed enough, leave the loop */ + if (page_count >= min_n) { +@@ -895,7 +906,10 @@ + offset = block->offset; + + mutex_exit(&block->mutex); +- mutex_exit(&(buf_pool->mutex)); ++ if (flush_type == BUF_FLUSH_LRU) { ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ } ++ mutex_exit(&(buf_pool->flush_list_mutex)); + + old_page_count = page_count; + +@@ -908,7 +922,10 @@ + flush_type, offset, + page_count - old_page_count); */ + +- mutex_enter(&(buf_pool->mutex)); ++ if (flush_type == BUF_FLUSH_LRU) { ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ } ++ mutex_enter(&(buf_pool->flush_list_mutex)); + + } else if (flush_type == BUF_FLUSH_LRU) { + +@@ -930,6 +947,13 @@ + break; + } + } ++ ++ if (flush_type == BUF_FLUSH_LRU) { ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ } ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ ++ mutex_enter(&(buf_pool->mutex)); + + (buf_pool->init_flush)[flush_type] = FALSE; + +@@ -989,10 +1013,14 @@ + buf_block_t* block; + ulint n_replaceable; + ulint distance = 0; +- +- mutex_enter(&(buf_pool->mutex)); ++ ++ /* optimistic search... */ ++ //mutex_enter(&(buf_pool->LRU_mutex)); ++ //mutex_enter(&(buf_pool->free_mutex)); + + n_replaceable = UT_LIST_GET_LEN(buf_pool->free); ++ ++ //mutex_exit(&(buf_pool->free_mutex)); + + block = UT_LIST_GET_LAST(buf_pool->LRU); + +@@ -1014,7 +1042,7 @@ + block = UT_LIST_GET_PREV(LRU, block); + } + +- mutex_exit(&(buf_pool->mutex)); ++ //mutex_exit(&(buf_pool->LRU_mutex)); + + if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) { + +@@ -1033,8 +1061,9 @@ + immediately, without waiting. */ + + void +-buf_flush_free_margin(void) ++buf_flush_free_margin( + /*=======================*/ ++ ibool wait) + { + ulint n_to_flush; + ulint n_flushed; +@@ -1044,7 +1073,7 @@ + if (n_to_flush > 0) { + n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, + ut_dulint_zero); +- if (n_flushed == ULINT_UNDEFINED) { ++ if (wait && n_flushed == ULINT_UNDEFINED) { + /* There was an LRU type flush batch already running; + let us wait for it to end */ + +@@ -1094,11 +1123,11 @@ + { + ibool ret; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); + + ret = buf_flush_validate_low(); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); + + return(ret); + } +diff -r 2e0c46e78b50 innobase/buf/buf0lru.c +--- a/innobase/buf/buf0lru.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/buf/buf0lru.c Mon Dec 22 00:33:59 2008 -0800 +@@ -79,7 +79,10 @@ + ibool all_freed; + + scan_again: +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); + + all_freed = TRUE; + +@@ -117,7 +120,10 @@ + + mutex_exit(&block->mutex); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + /* Note that the following call will acquire + an S-latch on the page */ +@@ -147,7 +153,10 @@ + block = UT_LIST_GET_PREV(LRU, block); + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + if (!all_freed) { + os_thread_sleep(20000); +@@ -170,14 +179,14 @@ + ulint len; + ulint limit; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + len = UT_LIST_GET_LEN(buf_pool->LRU); + + if (len < BUF_LRU_OLD_MIN_LEN) { + /* The LRU list is too short to do read-ahead */ + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + + return(0); + } +@@ -186,7 +195,7 @@ + + limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO; + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + + return(limit); + } +@@ -210,13 +219,15 @@ + ulint distance = 0; + ibool freed; + +- mutex_enter(&(buf_pool->mutex)); ++ /* optimistic search... */ ++ //mutex_enter(&(buf_pool->LRU_mutex)); + ++retry: + freed = FALSE; + block = UT_LIST_GET_LAST(buf_pool->LRU); + + while (block != NULL) { +- ut_a(block->in_LRU_list); ++ //ut_a(block->in_LRU_list); /* optimistic */ + + mutex_enter(&block->mutex); + +@@ -231,9 +242,17 @@ + } + #endif /* UNIV_DEBUG */ + ++ mutex_exit(&block->mutex); ++ ++ mutex_enter(&(buf_pool->LRU_mutex));/* optimistic */ ++ ++ mutex_enter(&(buf_pool->hash_mutex)); ++ mutex_enter(&block->mutex); ++ if(block->in_LRU_list && buf_flush_ready_for_replace(block)) { + buf_LRU_block_remove_hashed_page(block); ++ mutex_exit(&(buf_pool->hash_mutex)); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + mutex_exit(&block->mutex); + + /* Remove possible adaptive hash index built on the +@@ -246,14 +265,25 @@ + + ut_a(block->buf_fix_count == 0); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + mutex_enter(&block->mutex); + + buf_LRU_block_free_hashed_page(block); + freed = TRUE; ++ mutex_exit(&(buf_pool->free_mutex)); + mutex_exit(&block->mutex); + + break; ++ } else { /* someone may interrupt...??? */ ++ mutex_exit(&(buf_pool->LRU_mutex));/* optimistic */ ++ ++ mutex_exit(&(buf_pool->hash_mutex)); ++ ++ if (!(block->in_LRU_list)) { ++ mutex_exit(&block->mutex); ++ goto retry; ++ } ++ } + } + + mutex_exit(&block->mutex); +@@ -264,13 +294,21 @@ + if (!freed && n_iterations <= 10 + && distance > 100 + (n_iterations * buf_pool->curr_size) + / 10) { ++ ++ mutex_enter(&(buf_pool->mutex)); + buf_pool->LRU_flush_ended = 0; ++ mutex_exit(&(buf_pool->mutex)); + +- mutex_exit(&(buf_pool->mutex)); ++ //mutex_exit(&(buf_pool->LRU_mutex)); + + return(FALSE); + } + } ++ if (!freed) { ++ //mutex_exit(&(buf_pool->LRU_mutex)); ++ } ++ ++ mutex_enter(&(buf_pool->mutex)); + if (buf_pool->LRU_flush_ended > 0) { + buf_pool->LRU_flush_ended--; + } +@@ -322,7 +360,8 @@ + { + ibool ret = FALSE; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) { +@@ -330,7 +369,8 @@ + ret = TRUE; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + return(ret); + } +@@ -353,7 +393,7 @@ + ibool mon_value_was = FALSE; + ibool started_monitor = FALSE; + loop: +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); /* LRU info:optimistic */ + + if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) { +@@ -437,7 +477,7 @@ + + mutex_exit(&block->mutex); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + if (started_monitor) { + srv_print_innodb_monitor = mon_value_was; +@@ -449,7 +489,7 @@ + /* If no block was in the free list, search from the end of the LRU + list and try to free a block there */ + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + freed = buf_LRU_search_and_free_block(n_iterations); + +@@ -486,7 +526,7 @@ + + /* No free block was found: try to flush the LRU list */ + +- buf_flush_free_margin(); ++ buf_flush_free_margin(TRUE); + ++srv_buf_pool_wait_free; + + os_aio_simulated_wake_handler_threads(); +@@ -958,7 +998,7 @@ + ulint LRU_pos; + + ut_ad(buf_pool); +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { + +@@ -1001,7 +1041,10 @@ + + if (buf_pool->LRU_old) { + ut_a(buf_pool->LRU_old_len == old_len); +- } ++ } ++ ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free); + +@@ -1013,7 +1056,7 @@ + block = UT_LIST_GET_NEXT(free, block); + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + return(TRUE); + } + +@@ -1029,7 +1072,7 @@ + ulint len; + + ut_ad(buf_pool); +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + fprintf(stderr, "Pool ulint clock %lu\n", (ulong) buf_pool->ulint_clock); + +@@ -1073,5 +1116,5 @@ + } + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + } +diff -r 2e0c46e78b50 innobase/buf/buf0rea.c +--- a/innobase/buf/buf0rea.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/buf/buf0rea.c Mon Dec 22 00:33:59 2008 -0800 +@@ -236,10 +236,12 @@ + + return(0); + } ++ mutex_exit(&(buf_pool->mutex)); + + /* Count how many blocks in the area have been recently accessed, + that is, reside near the start of the LRU list. */ + ++ mutex_enter(&(buf_pool->hash_mutex)); + for (i = low; i < high; i++) { + block = buf_page_hash_get(space, i); + +@@ -250,8 +252,9 @@ + recent_blocks++; + } + } ++ mutex_exit(&(buf_pool->hash_mutex)); + +- mutex_exit(&(buf_pool->mutex)); ++ // mutex_exit(&(buf_pool->mutex)); + + if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) { + /* Do nothing */ +@@ -347,7 +350,7 @@ + } + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + return(count + count2); + } +@@ -450,6 +453,7 @@ + + return(0); + } ++ mutex_exit(&(buf_pool->mutex)); + + /* Check that almost all pages in the area have been accessed; if + offset == low, the accesses must be in a descending order, otherwise, +@@ -463,6 +467,7 @@ + + fail_count = 0; + ++ mutex_enter(&(buf_pool->hash_mutex)); + for (i = low; i < high; i++) { + block = buf_page_hash_get(space, i); + +@@ -479,12 +484,13 @@ + pred_block = block; + } + } ++ mutex_exit(&(buf_pool->hash_mutex)); + + if (fail_count > BUF_READ_AHEAD_LINEAR_AREA - + BUF_READ_AHEAD_LINEAR_THRESHOLD) { + /* Too many failures: return */ + +- mutex_exit(&(buf_pool->mutex)); ++ //mutex_exit(&(buf_pool->mutex)); + + return(0); + } +@@ -492,10 +498,11 @@ + /* If we got this far, we know that enough pages in the area have + been accessed in the right order: linear read-ahead can be sensible */ + ++ mutex_enter(&(buf_pool->hash_mutex)); + block = buf_page_hash_get(space, offset); + + if (block == NULL) { +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + return(0); + } +@@ -511,7 +518,7 @@ + pred_offset = fil_page_get_prev(frame); + succ_offset = fil_page_get_next(frame); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + if ((offset == low) && (succ_offset == offset + 1)) { + +@@ -587,7 +594,7 @@ + os_aio_simulated_wake_handler_threads(); + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + #ifdef UNIV_DEBUG + if (buf_debug_prints && (count > 0)) { +@@ -655,7 +662,7 @@ + os_aio_simulated_wake_handler_threads(); + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + #ifdef UNIV_DEBUG + if (buf_debug_prints) { +@@ -727,7 +734,7 @@ + os_aio_simulated_wake_handler_threads(); + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + #ifdef UNIV_DEBUG + if (buf_debug_prints) { +diff -r 2e0c46e78b50 innobase/include/buf0buf.h +--- a/innobase/include/buf0buf.h Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/include/buf0buf.h Mon Dec 22 00:33:59 2008 -0800 +@@ -946,6 +946,7 @@ + mem_heap_t* io_counter_heap; + ulint io_counters; + hash_table_t* page_hash; /* hash table of the file pages */ ++ mutex_t hash_mutex; + + ulint n_pend_reads; /* number of pending read operations */ + +@@ -978,6 +979,7 @@ + UT_LIST_BASE_NODE_T(buf_block_t) flush_list; + /* base node of the modified block + list */ ++ mutex_t flush_list_mutex; + ibool init_flush[BUF_FLUSH_LIST + 1]; + /* this is TRUE when a flush of the + given type is being initialized */ +@@ -1011,8 +1013,10 @@ + in the case of AWE, at the start are + always free blocks for which the + physical memory is mapped to a frame */ ++ mutex_t free_mutex; + UT_LIST_BASE_NODE_T(buf_block_t) LRU; + /* base node of the LRU list */ ++ mutex_t LRU_mutex; + buf_block_t* LRU_old; /* pointer to the about 3/8 oldest + blocks in the LRU list; NULL if LRU + length less than BUF_LRU_OLD_MIN_LEN */ +diff -r 2e0c46e78b50 innobase/include/buf0buf.ic +--- a/innobase/include/buf0buf.ic Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/include/buf0buf.ic Mon Dec 22 00:33:59 2008 -0800 +@@ -112,7 +112,7 @@ + buf_block_t* block; + dulint lsn; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); + + block = UT_LIST_GET_LAST(buf_pool->flush_list); + +@@ -122,7 +122,7 @@ + lsn = block->oldest_modification; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); + + return(lsn); + } +@@ -392,18 +392,18 @@ + /* out: TRUE if io going on */ + buf_block_t* block) /* in: buf_pool block, must be bufferfixed */ + { +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&block->mutex); + + ut_ad(block->state == BUF_BLOCK_FILE_PAGE); + ut_ad(block->buf_fix_count > 0); + + if (block->io_fix != 0) { +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); + + return(TRUE); + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); + + return(FALSE); + } +@@ -425,7 +425,7 @@ + + block = buf_block_align(frame); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&block->mutex); + + if (block->state == BUF_BLOCK_FILE_PAGE) { + lsn = block->newest_modification; +@@ -433,7 +433,7 @@ + lsn = ut_dulint_zero; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); + + return(lsn); + } +@@ -632,9 +632,9 @@ + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + + if (rw_latch == RW_X_LATCH && mtr->modifications) { +- mutex_enter(&buf_pool->mutex); ++ mutex_enter(&buf_pool->flush_list_mutex); + buf_flush_note_modification(block, mtr); +- mutex_exit(&buf_pool->mutex); ++ mutex_exit(&buf_pool->flush_list_mutex); + } + + mutex_enter(&block->mutex); +diff -r 2e0c46e78b50 innobase/include/buf0flu.h +--- a/innobase/include/buf0flu.h Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/include/buf0flu.h Mon Dec 22 00:33:59 2008 -0800 +@@ -26,8 +26,9 @@ + a margin of replaceable pages there. */ + + void +-buf_flush_free_margin(void); ++buf_flush_free_margin( + /*=======================*/ ++ ibool wait); + /************************************************************************ + Initializes a page for writing to the tablespace. */ + +diff -r 2e0c46e78b50 innobase/include/buf0flu.ic +--- a/innobase/include/buf0flu.ic Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/include/buf0flu.ic Mon Dec 22 00:33:59 2008 -0800 +@@ -84,7 +84,7 @@ + ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); + #endif /* UNIV_SYNC_DEBUG */ + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); + + ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0); + +@@ -102,5 +102,5 @@ + start_lsn) <= 0); + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); + } +diff -r 2e0c46e78b50 innobase/log/log0recv.c +--- a/innobase/log/log0recv.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/log/log0recv.c Mon Dec 22 00:33:59 2008 -0800 +@@ -1693,11 +1693,11 @@ + + mtr_start(&mtr); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->hash_mutex)); + + page = buf_page_hash_get(space, page_no)->frame; + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->hash_mutex)); + + replica = buf_page_get(space + RECV_REPLICA_SPACE_ADD, page_no, + RW_X_LATCH, &mtr); +diff -r 2e0c46e78b50 patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info Mon Dec 22 00:33:59 2008 -0800 +@@ -0,0 +1,6 @@ ++File=split_buf_pool_mutex_fixed_optimistic_safe.patch ++Name=InnoDB patch to fix buffer pool scalability ++Version=1.0 ++Author=Yasufumi Kinoshita ++License=BSD ++Comment= diff --git a/percona/5.0.75-b12/userstatv2.patch b/percona/5.0.75-b12/userstatv2.patch new file mode 100644 index 0000000..de796c0 --- /dev/null +++ b/percona/5.0.75-b12/userstatv2.patch @@ -0,0 +1,4463 @@ +diff -r 1270c564d514 BUILD/Makefile.in +--- a/BUILD/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/BUILD/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -169,6 +169,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 Docs/Makefile.in +--- a/Docs/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/Docs/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -167,6 +167,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 Makefile.in +--- a/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -191,6 +191,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 SSL/Makefile.in +--- a/SSL/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/SSL/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -167,6 +167,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 aclocal.m4 +--- a/aclocal.m4 Mon Dec 22 00:26:39 2008 -0800 ++++ b/aclocal.m4 Mon Dec 22 00:31:13 2008 -0800 +@@ -1597,7 +1597,7 @@ + + # Append ld.so.conf contents to the search path + if test -f /etc/ld.so.conf; then +- lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '` ++ lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib${libsuff} /usr/lib${libsuff} $lt_ld_extra" + fi + +@@ -4305,6 +4305,9 @@ + # Is the compiler the GNU C compiler? + with_gcc=$_LT_AC_TAGVAR(GCC, $1) + ++gcc_dir=\`gcc -print-file-name=. | $SED 's,/\.$,,'\` ++gcc_ver=\`gcc -dumpversion\` ++ + # An ERE matcher. + EGREP=$lt_EGREP + +@@ -4438,11 +4441,11 @@ + + # Dependencies to place before the objects being linked to create a + # shared library. +-predep_objects=$lt_[]_LT_AC_TAGVAR(predep_objects, $1) ++predep_objects=\`echo $lt_[]_LT_AC_TAGVAR(predep_objects, $1) | \$SED -e "s@\${gcc_dir}@\\\${gcc_dir}@g;s@\${gcc_ver}@\\\${gcc_ver}@g"\` + + # Dependencies to place after the objects being linked to create a + # shared library. +-postdep_objects=$lt_[]_LT_AC_TAGVAR(postdep_objects, $1) ++postdep_objects=\`echo $lt_[]_LT_AC_TAGVAR(postdep_objects, $1) | \$SED -e "s@\${gcc_dir}@\\\${gcc_dir}@g;s@\${gcc_ver}@\\\${gcc_ver}@g"\` + + # Dependencies to place before the objects being linked to create a + # shared library. +@@ -4454,7 +4457,7 @@ + + # The library search path used internally by the compiler when linking + # a shared library. +-compiler_lib_search_path=$lt_[]_LT_AC_TAGVAR(compiler_lib_search_path, $1) ++compiler_lib_search_path=\`echo $lt_[]_LT_AC_TAGVAR(compiler_lib_search_path, $1) | \$SED -e "s@\${gcc_dir}@\\\${gcc_dir}@g;s@\${gcc_ver}@\\\${gcc_ver}@g"\` + + # Method to check whether dependent libraries are shared objects. + deplibs_check_method=$lt_deplibs_check_method +@@ -4534,7 +4537,7 @@ + link_all_deplibs=$_LT_AC_TAGVAR(link_all_deplibs, $1) + + # Compile-time system search path for libraries +-sys_lib_search_path_spec=$lt_sys_lib_search_path_spec ++sys_lib_search_path_spec=\`echo $lt_sys_lib_search_path_spec | \$SED -e "s@\${gcc_dir}@\\\${gcc_dir}@g;s@\${gcc_ver}@\\\${gcc_ver}@g"\` + + # Run-time system search path for libraries + sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec +@@ -6370,6 +6373,7 @@ + done + done + done ++IFS=$as_save_IFS + lt_ac_max=0 + lt_ac_count=0 + # Add /usr/xpg4/bin/sed as it is typically found on Solaris +@@ -6402,6 +6406,7 @@ + done + ]) + SED=$lt_cv_path_SED ++AC_SUBST([SED]) + AC_MSG_RESULT([$SED]) + ]) + +diff -r 1270c564d514 client/Makefile.in +--- a/client/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/client/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -268,6 +268,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @CLIENT_LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 cmd-line-utils/Makefile.in +--- a/cmd-line-utils/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/cmd-line-utils/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -177,6 +177,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 cmd-line-utils/libedit/Makefile.in +--- a/cmd-line-utils/libedit/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/cmd-line-utils/libedit/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -197,6 +197,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 cmd-line-utils/readline/Makefile.in +--- a/cmd-line-utils/readline/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/cmd-line-utils/readline/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -195,6 +195,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 configure +--- a/configure Mon Dec 22 00:26:39 2008 -0800 ++++ b/configure Mon Dec 22 00:31:13 2008 -0800 +@@ -477,7 +477,7 @@ + #endif" + + ac_subdirs_all="$ac_subdirs_all innobase" +-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar MYSQL_NO_DASH_VERSION MYSQL_BASE_VERSION MYSQL_VERSION_ID MYSQL_PREVIOUS_BASE_VERSION PROTOCOL_VERSION DOT_FRM_VERSION SHARED_LIB_MAJOR_VERSION SHARED_LIB_VERSION NDB_SHARED_LIB_MAJOR_VERSION NDB_SHARED_LIB_VERSION AVAILABLE_LANGUAGES NDB_VERSION_MAJOR NDB_VERSION_MINOR NDB_VERSION_BUILD NDB_VERSION_STATUS SYSTEM_TYPE MACHINE_TYPE CONF_COMMAND SAVE_CC SAVE_CXX SAVE_ASFLAGS SAVE_CFLAGS SAVE_CXXFLAGS SAVE_LDFLAGS SAVE_CXXLDFLAGS CXXLDFLAGS AR RANLIB DARWIN_MWCC_TRUE DARWIN_MWCC_FALSE CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CXX CXXFLAGS ac_ct_CXX CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE CPP CC_VERSION CXX_VERSION AS ac_ct_RANLIB EGREP LN_S ECHO ac_ct_AR CXXCPP F77 FFLAGS ac_ct_F77 LIBTOOL NM YACC PDFMANUAL DVIS uname_prog ASFLAGS LD ARFLAGS LD_VERSION_SCRIPT MYSQLD_DEFAULT_SWITCHES TARGET_LINUX LN LN_CP_F MV RM CP SED CMP CHMOD HOSTNAME TAR PERL PERL5 DOXYGEN PDFLATEX MAKEINDEX ICHECK PS FIND_PROC KILL CHECK_PID CCAS CCASFLAGS NOINST_LDFLAGS MYSQL_SERVER_SUFFIX ASSEMBLER_x86_TRUE ASSEMBLER_x86_FALSE ASSEMBLER_sparc32_TRUE ASSEMBLER_sparc32_FALSE ASSEMBLER_sparc64_TRUE ASSEMBLER_sparc64_FALSE ASSEMBLER_TRUE ASSEMBLER_FALSE MYSQL_UNIX_ADDR MYSQL_TCP_PORT MYSQL_TCP_PORT_DEFAULT MYSQLD_USER GETCONF ac_ct_GETCONF zlib_dir ZLIB_LIBS ZLIB_DEPS ZLIB_INCLUDES WRAPLIBS pstack_dirs pstack_libs COMPILE_PSTACK_TRUE COMPILE_PSTACK_FALSE LIBDL MYSQLD_EXTRA_LDFLAGS CLIENT_EXTRA_LDFLAGS MYSQLD_EXTRA_LIBS LIB_EXTRA_CCFLAGS LM_CFLAGS COMPILATION_COMMENT ALLOCA MAKE_SHELL TERMCAP_LIB LIBEDIT_LOBJECTS tools_dirs openssl_libs openssl_includes yassl_taocrypt_extra_cxxflags yassl_h_ln_cmd yassl_libs yassl_dir HAVE_YASSL_TRUE HAVE_YASSL_FALSE libmysqld_dirs linked_libmysqld_targets docs_dirs extra_docs bench_dirs readline_dir readline_topdir readline_basedir readline_link readline_h_ln_cmd bdb_includes bdb_libs bdb_libs_with_path innodb_includes innodb_libs innodb_system_libs NDB_SCI_INCLUDES NDB_SCI_LIBS NDB_LD_VERSION_SCRIPT HAVE_NDBCLUSTER_DB_TRUE HAVE_NDBCLUSTER_DB_FALSE ndbcluster_includes ndbcluster_libs ndbcluster_system_libs ndb_mgmclient_libs man_dirs man1_files man8_files CLIENT_LIBS NON_THREADED_LIBS STATIC_NSS_FLAGS sql_client_dirs linked_client_targets netware_dir linked_netware_sources HAVE_NETWARE_TRUE HAVE_NETWARE_FALSE THREAD_LOBJECTS BUILD_INNODB_TOOLS_TRUE BUILD_INNODB_TOOLS_FALSE subdirs sql_server_dirs thread_dirs server_scripts sql_union_dirs GXX NDB_DEFS ndb_cxxflags_fix ndb_port ndb_transporter_opt_objs ndb_bin_am_ldflags ndb_opt_subdirs NDB_SIZEOF_CHARP NDB_SIZEOF_CHAR NDB_SIZEOF_SHORT NDB_SIZEOF_INT NDB_SIZEOF_LONG NDB_SIZEOF_LONG_LONG MAKE_BINARY_DISTRIBUTION_OPTIONS LIBOBJS LTLIBOBJS' ++ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar MYSQL_NO_DASH_VERSION MYSQL_BASE_VERSION MYSQL_VERSION_ID MYSQL_PREVIOUS_BASE_VERSION PROTOCOL_VERSION DOT_FRM_VERSION SHARED_LIB_MAJOR_VERSION SHARED_LIB_VERSION NDB_SHARED_LIB_MAJOR_VERSION NDB_SHARED_LIB_VERSION AVAILABLE_LANGUAGES NDB_VERSION_MAJOR NDB_VERSION_MINOR NDB_VERSION_BUILD NDB_VERSION_STATUS SYSTEM_TYPE MACHINE_TYPE CONF_COMMAND SAVE_CC SAVE_CXX SAVE_ASFLAGS SAVE_CFLAGS SAVE_CXXFLAGS SAVE_LDFLAGS SAVE_CXXLDFLAGS CXXLDFLAGS AR RANLIB DARWIN_MWCC_TRUE DARWIN_MWCC_FALSE CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CXX CXXFLAGS ac_ct_CXX CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE CPP CC_VERSION CXX_VERSION AS ac_ct_RANLIB SED EGREP LN_S ECHO ac_ct_AR CXXCPP F77 FFLAGS ac_ct_F77 LIBTOOL NM YACC PDFMANUAL DVIS uname_prog ASFLAGS LD ARFLAGS LD_VERSION_SCRIPT MYSQLD_DEFAULT_SWITCHES TARGET_LINUX LN LN_CP_F MV RM CP SED CMP CHMOD HOSTNAME TAR PERL PERL5 DOXYGEN PDFLATEX MAKEINDEX ICHECK PS FIND_PROC KILL CHECK_PID CCAS CCASFLAGS NOINST_LDFLAGS MYSQL_SERVER_SUFFIX ASSEMBLER_x86_TRUE ASSEMBLER_x86_FALSE ASSEMBLER_sparc32_TRUE ASSEMBLER_sparc32_FALSE ASSEMBLER_sparc64_TRUE ASSEMBLER_sparc64_FALSE ASSEMBLER_TRUE ASSEMBLER_FALSE MYSQL_UNIX_ADDR MYSQL_TCP_PORT MYSQL_TCP_PORT_DEFAULT MYSQLD_USER GETCONF ac_ct_GETCONF zlib_dir ZLIB_LIBS ZLIB_DEPS ZLIB_INCLUDES WRAPLIBS pstack_dirs pstack_libs COMPILE_PSTACK_TRUE COMPILE_PSTACK_FALSE LIBDL MYSQLD_EXTRA_LDFLAGS CLIENT_EXTRA_LDFLAGS MYSQLD_EXTRA_LIBS LIB_EXTRA_CCFLAGS LM_CFLAGS COMPILATION_COMMENT ALLOCA MAKE_SHELL TERMCAP_LIB LIBEDIT_LOBJECTS tools_dirs openssl_libs openssl_includes yassl_taocrypt_extra_cxxflags yassl_h_ln_cmd yassl_libs yassl_dir HAVE_YASSL_TRUE HAVE_YASSL_FALSE libmysqld_dirs linked_libmysqld_targets docs_dirs extra_docs bench_dirs readline_dir readline_topdir readline_basedir readline_link readline_h_ln_cmd bdb_includes bdb_libs bdb_libs_with_path innodb_includes innodb_libs innodb_system_libs NDB_SCI_INCLUDES NDB_SCI_LIBS NDB_LD_VERSION_SCRIPT HAVE_NDBCLUSTER_DB_TRUE HAVE_NDBCLUSTER_DB_FALSE ndbcluster_includes ndbcluster_libs ndbcluster_system_libs ndb_mgmclient_libs man_dirs man1_files man8_files CLIENT_LIBS NON_THREADED_LIBS STATIC_NSS_FLAGS sql_client_dirs linked_client_targets netware_dir linked_netware_sources HAVE_NETWARE_TRUE HAVE_NETWARE_FALSE THREAD_LOBJECTS BUILD_INNODB_TOOLS_TRUE BUILD_INNODB_TOOLS_FALSE subdirs sql_server_dirs thread_dirs server_scripts sql_union_dirs GXX NDB_DEFS ndb_cxxflags_fix ndb_port ndb_transporter_opt_objs ndb_bin_am_ldflags ndb_opt_subdirs NDB_SIZEOF_CHARP NDB_SIZEOF_CHAR NDB_SIZEOF_SHORT NDB_SIZEOF_INT NDB_SIZEOF_LONG NDB_SIZEOF_LONG_LONG MAKE_BINARY_DISTRIBUTION_OPTIONS LIBOBJS LTLIBOBJS' + ac_subst_files='' + + # Initialize some variables set by options. +@@ -38236,7 +38236,91 @@ + # We also disable for SCO for the time being, the headers for the + # thread library we use conflicts with other headers. + ;; +- *) ++*) ++ # most systems require the program be linked with librt library to use ++ # the function clock_gettime ++ my_save_LIBS="$LIBS" ++ LIBS="" ++ ++echo "$as_me:$LINENO: checking for clock_gettime in -lrt" >&5 ++echo $ECHO_N "checking for clock_gettime in -lrt... $ECHO_C" >&6 ++if test "${ac_cv_lib_rt_clock_gettime+set}" = set; then ++ echo $ECHO_N "(cached) $ECHO_C" >&6 ++else ++ ac_check_lib_save_LIBS=$LIBS ++LIBS="-lrt $LIBS" ++cat >conftest.$ac_ext <<_ACEOF ++/* confdefs.h. */ ++_ACEOF ++cat confdefs.h >>conftest.$ac_ext ++cat >>conftest.$ac_ext <<_ACEOF ++/* end confdefs.h. */ ++ ++/* Override any gcc2 internal prototype to avoid an error. */ ++#ifdef __cplusplus ++extern "C" ++#endif ++/* We use char because int might match the return type of a gcc2 ++ builtin and then its argument prototype would still apply. */ ++char clock_gettime (); ++int ++main () ++{ ++clock_gettime (); ++ ; ++ return 0; ++} ++_ACEOF ++rm -f conftest.$ac_objext conftest$ac_exeext ++if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 ++ (eval $ac_link) 2>conftest.er1 ++ ac_status=$? ++ grep -v '^ *+' conftest.er1 >conftest.err ++ rm -f conftest.er1 ++ cat conftest.err >&5 ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); } && ++ { ac_try='test -z "$ac_c_werror_flag" ++ || test ! -s conftest.err' ++ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); }; } && ++ { ac_try='test -s conftest$ac_exeext' ++ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); }; }; then ++ ac_cv_lib_rt_clock_gettime=yes ++else ++ echo "$as_me: failed program was:" >&5 ++sed 's/^/| /' conftest.$ac_ext >&5 ++ ++ac_cv_lib_rt_clock_gettime=no ++fi ++rm -f conftest.err conftest.$ac_objext \ ++ conftest$ac_exeext conftest.$ac_ext ++LIBS=$ac_check_lib_save_LIBS ++fi ++echo "$as_me:$LINENO: result: $ac_cv_lib_rt_clock_gettime" >&5 ++echo "${ECHO_T}$ac_cv_lib_rt_clock_gettime" >&6 ++if test $ac_cv_lib_rt_clock_gettime = yes; then ++ cat >>confdefs.h <<_ACEOF ++#define HAVE_LIBRT 1 ++_ACEOF ++ ++ LIBS="-lrt $LIBS" ++ ++fi ++ ++ LIBRT=$LIBS ++ LIBS="$my_save_LIBS" ++ ++ ++ LIBS="$LIBS $LIBRT" ++ + for ac_func in clock_gettime + do + as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +@@ -41644,7 +41728,7 @@ + + fi + +-CLIENT_LIBS="$NON_THREADED_LIBS $openssl_libs $ZLIB_LIBS $STATIC_NSS_FLAGS" ++CLIENT_LIBS="$NON_THREADED_LIBS $openssl_libs $ZLIB_LIBS $STATIC_NSS_FLAGS $LIBRT" + + + +@@ -42984,6 +43068,7 @@ + s,@MAKE_SHELL@,$MAKE_SHELL,;t t + s,@TERMCAP_LIB@,$TERMCAP_LIB,;t t + s,@LIBEDIT_LOBJECTS@,$LIBEDIT_LOBJECTS,;t t ++s,@LIBRT@,$LIBRT,;t t + s,@tools_dirs@,$tools_dirs,;t t + s,@openssl_libs@,$openssl_libs,;t t + s,@openssl_includes@,$openssl_includes,;t t +diff -r 1270c564d514 configure.in +--- a/configure.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/configure.in Mon Dec 22 00:31:13 2008 -0800 +@@ -2098,7 +2098,18 @@ + # We also disable for SCO for the time being, the headers for the + # thread library we use conflicts with other headers. + ;; +- *) AC_CHECK_FUNCS(clock_gettime) ++*) ++ # most systems require the program be linked with librt library to use ++ # the function clock_gettime ++ my_save_LIBS="$LIBS" ++ LIBS="" ++ AC_CHECK_LIB(rt,clock_gettime) ++ LIBRT=$LIBS ++ LIBS="$my_save_LIBS" ++ AC_SUBST(LIBRT) ++ ++ LIBS="$LIBS $LIBRT" ++ AC_CHECK_FUNCS(clock_gettime) + ;; + esac + +@@ -2713,7 +2724,7 @@ + AC_DEFINE([THREAD_SAFE_CLIENT], [1], [Should be client be thread safe]) + fi + +-CLIENT_LIBS="$NON_THREADED_LIBS $openssl_libs $ZLIB_LIBS $STATIC_NSS_FLAGS" ++CLIENT_LIBS="$NON_THREADED_LIBS $openssl_libs $ZLIB_LIBS $STATIC_NSS_FLAGS $LIBRT" + + AC_SUBST(CLIENT_LIBS) + AC_SUBST(NON_THREADED_LIBS) +diff -r 1270c564d514 dbug/Makefile.in +--- a/dbug/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/dbug/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -214,6 +214,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 extra/Makefile.in +--- a/extra/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/extra/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -259,6 +259,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 extra/yassl/Makefile.in +--- a/extra/yassl/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/extra/yassl/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -162,6 +162,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 extra/yassl/src/Makefile.in +--- a/extra/yassl/src/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/extra/yassl/src/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -173,6 +173,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 extra/yassl/taocrypt/Makefile.in +--- a/extra/yassl/taocrypt/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/extra/yassl/taocrypt/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -162,6 +162,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 extra/yassl/taocrypt/benchmark/Makefile.in +--- a/extra/yassl/taocrypt/benchmark/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/extra/yassl/taocrypt/benchmark/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -172,6 +172,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 extra/yassl/taocrypt/src/Makefile.in +--- a/extra/yassl/taocrypt/src/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/extra/yassl/taocrypt/src/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -182,6 +182,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 extra/yassl/taocrypt/test/Makefile.in +--- a/extra/yassl/taocrypt/test/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/extra/yassl/taocrypt/test/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -172,6 +172,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 extra/yassl/testsuite/Makefile.in +--- a/extra/yassl/testsuite/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/extra/yassl/testsuite/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -175,6 +175,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 heap/Makefile.in +--- a/heap/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/heap/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 include/Makefile.in +--- a/include/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/include/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -183,6 +183,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 include/mysql_com.h +--- a/include/mysql_com.h Mon Dec 22 00:26:39 2008 -0800 ++++ b/include/mysql_com.h Mon Dec 22 00:31:13 2008 -0800 +@@ -106,6 +106,11 @@ + thread */ + #define REFRESH_MASTER 128 /* Remove all bin logs in the index + and truncate the index */ ++#define REFRESH_TABLE_STATS 256 /* Refresh table stats hash table */ ++#define REFRESH_INDEX_STATS 512 /* Refresh index stats hash table */ ++#define REFRESH_USER_STATS 1024 /* Refresh user stats hash table */ ++#define REFRESH_SLOW_QUERY_LOG 4096 /* Flush slow query log and rotate*/ ++#define REFRESH_CLIENT_STATS 8192 /* Refresh client stats hash table */ + + /* The following can't be set with mysql_refresh() */ + #define REFRESH_READ_LOCK 16384 /* Lock tables for read */ +diff -r 1270c564d514 libmysql/Makefile.in +--- a/libmysql/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/libmysql/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -278,6 +278,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @CLIENT_LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 libmysql_r/Makefile.in +--- a/libmysql_r/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/libmysql_r/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -275,6 +275,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ @ZLIB_LIBS@ @openssl_libs@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 libmysqld/Makefile.in +--- a/libmysqld/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/libmysqld/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -264,6 +264,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 libmysqld/examples/Makefile.in +--- a/libmysqld/examples/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/libmysqld/examples/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -213,6 +213,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ @WRAPLIBS@ @CLIENT_LIBS@ $(yassl_libs) + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 man/Makefile.in +--- a/man/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/man/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -174,6 +174,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 myisam/Makefile.in +--- a/myisam/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/myisam/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -257,6 +257,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 myisammrg/Makefile.in +--- a/myisammrg/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/myisammrg/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -205,6 +205,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 mysql-test/Makefile.in +--- a/mysql-test/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/mysql-test/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -181,6 +181,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 mysql-test/ndb/Makefile.in +--- a/mysql-test/ndb/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/mysql-test/ndb/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -170,6 +170,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Mon Dec 22 00:26:39 2008 -0800 ++++ b/mysql-test/r/information_schema.result Mon Dec 22 00:31:13 2008 -0800 +@@ -37,10 +37,12 @@ + select * from v1; + c + CHARACTER_SETS ++CLIENT_STATISTICS + COLLATIONS + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INDEX_STATISTICS + KEY_COLUMN_USAGE + PROFILING + ROUTINES +@@ -50,8 +52,10 @@ + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + USER_PRIVILEGES ++USER_STATISTICS + VIEWS + columns_priv + db +@@ -83,6 +87,7 @@ + TABLES TABLES + TABLE_CONSTRAINTS TABLE_CONSTRAINTS + TABLE_PRIVILEGES TABLE_PRIVILEGES ++TABLE_STATISTICS TABLE_STATISTICS + TRIGGERS TRIGGERS + tables_priv tables_priv + time_zone time_zone +@@ -102,6 +107,7 @@ + TABLES TABLES + TABLE_CONSTRAINTS TABLE_CONSTRAINTS + TABLE_PRIVILEGES TABLE_PRIVILEGES ++TABLE_STATISTICS TABLE_STATISTICS + TRIGGERS TRIGGERS + tables_priv tables_priv + time_zone time_zone +@@ -121,6 +127,7 @@ + TABLES TABLES + TABLE_CONSTRAINTS TABLE_CONSTRAINTS + TABLE_PRIVILEGES TABLE_PRIVILEGES ++TABLE_STATISTICS TABLE_STATISTICS + TRIGGERS TRIGGERS + tables_priv tables_priv + time_zone time_zone +@@ -594,12 +601,13 @@ + where table_schema='information_schema' limit 2; + TABLE_NAME TABLE_TYPE ENGINE + CHARACTER_SETS SYSTEM VIEW MEMORY +-COLLATIONS SYSTEM VIEW MEMORY ++CLIENT_STATISTICS SYSTEM VIEW MEMORY + show tables from information_schema like "T%"; + Tables_in_information_schema (T%) + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + create database information_schema; + ERROR 42000: Access denied for user 'root'@'localhost' to database 'information_schema' +@@ -609,6 +617,7 @@ + TABLES SYSTEM VIEW + TABLE_CONSTRAINTS SYSTEM VIEW + TABLE_PRIVILEGES SYSTEM VIEW ++TABLE_STATISTICS SYSTEM VIEW + TRIGGERS SYSTEM VIEW + create table t1(a int); + ERROR 42S02: Unknown table 't1' in information_schema +@@ -621,6 +630,7 @@ + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + select table_name from tables where table_name='user'; + table_name +@@ -730,7 +740,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-102 ++106 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -790,18 +800,20 @@ + TABLE_NAME COLUMN_NAME PRIVILEGES + COLUMNS TABLE_NAME select + COLUMN_PRIVILEGES TABLE_NAME select ++INDEX_STATISTICS TABLE_NAME select + KEY_COLUMN_USAGE TABLE_NAME select + STATISTICS TABLE_NAME select + TABLES TABLE_NAME select + TABLE_CONSTRAINTS TABLE_NAME select + TABLE_PRIVILEGES TABLE_NAME select ++TABLE_STATISTICS TABLE_NAME select + VIEWS TABLE_NAME select + delete from mysql.user where user='mysqltest_4'; + delete from mysql.db where user='mysqltest_4'; + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 17 ++information_schema 21 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1187,10 +1199,12 @@ + ); + table_name column_name + CHARACTER_SETS CHARACTER_SET_NAME ++CLIENT_STATISTICS CLIENT + COLLATIONS COLLATION_NAME + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA +@@ -1200,8 +1214,10 @@ + TABLES TABLE_SCHEMA + TABLE_CONSTRAINTS CONSTRAINT_SCHEMA + TABLE_PRIVILEGES TABLE_SCHEMA ++TABLE_STATISTICS TABLE_SCHEMA + TRIGGERS TRIGGER_SCHEMA + USER_PRIVILEGES GRANTEE ++USER_STATISTICS USER + VIEWS TABLE_SCHEMA + SELECT t.table_name, c1.column_name + FROM information_schema.tables t +@@ -1219,10 +1235,12 @@ + ); + table_name column_name + CHARACTER_SETS CHARACTER_SET_NAME ++CLIENT_STATISTICS CLIENT + COLLATIONS COLLATION_NAME + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA +@@ -1232,8 +1250,10 @@ + TABLES TABLE_SCHEMA + TABLE_CONSTRAINTS CONSTRAINT_SCHEMA + TABLE_PRIVILEGES TABLE_SCHEMA ++TABLE_STATISTICS TABLE_SCHEMA + TRIGGERS TRIGGER_SCHEMA + USER_PRIVILEGES GRANTEE ++USER_STATISTICS USER + VIEWS TABLE_SCHEMA + SELECT MAX(table_name) FROM information_schema.tables; + MAX(table_name) +@@ -1302,10 +1322,12 @@ + group by t.table_name order by num1, t.table_name; + table_name group_concat(t.table_schema, '.', t.table_name) num1 + CHARACTER_SETS information_schema.CHARACTER_SETS 1 ++CLIENT_STATISTICS information_schema.CLIENT_STATISTICS 1 + COLLATIONS information_schema.COLLATIONS 1 + COLLATION_CHARACTER_SET_APPLICABILITY information_schema.COLLATION_CHARACTER_SET_APPLICABILITY 1 + COLUMNS information_schema.COLUMNS 1 + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 ++INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 + PROFILING information_schema.PROFILING 1 + ROUTINES information_schema.ROUTINES 1 +@@ -1315,8 +1337,10 @@ + TABLES information_schema.TABLES 1 + TABLE_CONSTRAINTS information_schema.TABLE_CONSTRAINTS 1 + TABLE_PRIVILEGES information_schema.TABLE_PRIVILEGES 1 ++TABLE_STATISTICS information_schema.TABLE_STATISTICS 1 + TRIGGERS information_schema.TRIGGERS 1 + USER_PRIVILEGES information_schema.USER_PRIVILEGES 1 ++USER_STATISTICS information_schema.USER_STATISTICS 1 + VIEWS information_schema.VIEWS 1 + show global status like "Uptime_%"; + Variable_name Value +diff -r 1270c564d514 mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Mon Dec 22 00:26:39 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Mon Dec 22 00:31:13 2008 -0800 +@@ -6,10 +6,12 @@ + show tables; + Tables_in_information_schema + CHARACTER_SETS ++CLIENT_STATISTICS + COLLATIONS + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INDEX_STATISTICS + KEY_COLUMN_USAGE + PROFILING + ROUTINES +@@ -19,14 +21,17 @@ + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + USER_PRIVILEGES ++USER_STATISTICS + VIEWS + show tables from INFORMATION_SCHEMA like 'T%'; + Tables_in_information_schema (T%) + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + create database `inf%`; + create database mbase; +diff -r 1270c564d514 mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Mon Dec 22 00:26:39 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Mon Dec 22 00:31:13 2008 -0800 +@@ -80,10 +80,12 @@ + | Tables | + +---------------------------------------+ + | CHARACTER_SETS | ++| CLIENT_STATISTICS | + | COLLATIONS | + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROFILING | + | ROUTINES | +@@ -93,8 +95,10 @@ + | TABLES | + | TABLE_CONSTRAINTS | + | TABLE_PRIVILEGES | ++| TABLE_STATISTICS | + | TRIGGERS | + | USER_PRIVILEGES | ++| USER_STATISTICS | + | VIEWS | + +---------------------------------------+ + Database: INFORMATION_SCHEMA +@@ -102,10 +106,12 @@ + | Tables | + +---------------------------------------+ + | CHARACTER_SETS | ++| CLIENT_STATISTICS | + | COLLATIONS | + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROFILING | + | ROUTINES | +@@ -115,8 +121,10 @@ + | TABLES | + | TABLE_CONSTRAINTS | + | TABLE_PRIVILEGES | ++| TABLE_STATISTICS | + | TRIGGERS | + | USER_PRIVILEGES | ++| USER_STATISTICS | + | VIEWS | + +---------------------------------------+ + Wildcard: inf_rmation_schema +diff -r 1270c564d514 mysys/Makefile.in +--- a/mysys/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/mysys/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -249,6 +249,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/Makefile.in +--- a/ndb/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -191,6 +191,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/docs/Makefile.in +--- a/ndb/docs/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/docs/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -172,6 +172,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/include/Makefile.in +--- a/ndb/include/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/include/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -202,6 +202,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/Makefile.in +--- a/ndb/src/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -220,6 +220,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/common/Makefile.in +--- a/ndb/src/common/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/common/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -193,6 +193,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/common/debugger/Makefile.in +--- a/ndb/src/common/debugger/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/common/debugger/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -225,6 +225,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/common/debugger/signaldata/Makefile.in +--- a/ndb/src/common/debugger/signaldata/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/common/debugger/signaldata/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -233,6 +233,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/common/logger/Makefile.in +--- a/ndb/src/common/logger/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/common/logger/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -219,6 +219,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/common/mgmcommon/Makefile.in +--- a/ndb/src/common/mgmcommon/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/common/mgmcommon/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -233,6 +233,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/common/portlib/Makefile.in +--- a/ndb/src/common/portlib/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/common/portlib/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -243,6 +243,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/common/transporter/Makefile.in +--- a/ndb/src/common/transporter/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/common/transporter/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -219,6 +219,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/common/util/Makefile.in +--- a/ndb/src/common/util/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/common/util/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -235,6 +235,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/cw/Makefile.in +--- a/ndb/src/cw/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/cw/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -176,6 +176,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/cw/cpcd/Makefile.in +--- a/ndb/src/cw/cpcd/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/cw/cpcd/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -226,6 +226,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/Makefile.in +--- a/ndb/src/kernel/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -246,6 +246,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/Makefile.in +--- a/ndb/src/kernel/blocks/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -176,6 +176,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/backup/Makefile.in +--- a/ndb/src/kernel/blocks/backup/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/backup/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/cmvmi/Makefile.in +--- a/ndb/src/kernel/blocks/cmvmi/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/cmvmi/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/dbacc/Makefile.in +--- a/ndb/src/kernel/blocks/dbacc/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/dbacc/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/dbdict/Makefile.in +--- a/ndb/src/kernel/blocks/dbdict/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/dbdict/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -228,6 +228,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/dbdih/Makefile.in +--- a/ndb/src/kernel/blocks/dbdih/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/dbdih/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -225,6 +225,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/dblqh/Makefile.in +--- a/ndb/src/kernel/blocks/dblqh/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/dblqh/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -226,6 +226,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/dbtc/Makefile.in +--- a/ndb/src/kernel/blocks/dbtc/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/dbtc/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/dbtup/Makefile.in +--- a/ndb/src/kernel/blocks/dbtup/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/dbtup/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -226,6 +226,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/dbtux/Makefile.in +--- a/ndb/src/kernel/blocks/dbtux/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/dbtux/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -221,6 +221,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/dbutil/Makefile.in +--- a/ndb/src/kernel/blocks/dbutil/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/dbutil/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/ndbcntr/Makefile.in +--- a/ndb/src/kernel/blocks/ndbcntr/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/ndbcntr/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -219,6 +219,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/ndbfs/Makefile.in +--- a/ndb/src/kernel/blocks/ndbfs/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/ndbfs/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -219,6 +219,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/qmgr/Makefile.in +--- a/ndb/src/kernel/blocks/qmgr/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/qmgr/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/suma/Makefile.in +--- a/ndb/src/kernel/blocks/suma/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/suma/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/blocks/trix/Makefile.in +--- a/ndb/src/kernel/blocks/trix/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/blocks/trix/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -218,6 +218,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/error/Makefile.in +--- a/ndb/src/kernel/error/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/error/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -227,6 +227,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/kernel/vm/Makefile.in +--- a/ndb/src/kernel/vm/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/kernel/vm/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -229,6 +229,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/mgmapi/Makefile.in +--- a/ndb/src/mgmapi/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/mgmapi/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -226,6 +226,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/mgmclient/Makefile.in +--- a/ndb/src/mgmclient/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/mgmclient/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -236,6 +236,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/mgmsrv/Makefile.in +--- a/ndb/src/mgmsrv/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/mgmsrv/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -232,6 +232,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/src/ndbapi/Makefile.in +--- a/ndb/src/ndbapi/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/src/ndbapi/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -236,6 +236,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/test/Makefile.in +--- a/ndb/test/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/test/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -176,6 +176,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/test/ndbapi/Makefile.in +--- a/ndb/test/ndbapi/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/test/ndbapi/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -613,6 +613,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/test/ndbapi/bank/Makefile.in +--- a/ndb/test/ndbapi/bank/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/test/ndbapi/bank/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -305,6 +305,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/test/run-test/Makefile.in +--- a/ndb/test/run-test/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/test/run-test/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -264,6 +264,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/test/src/Makefile.in +--- a/ndb/test/src/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/test/src/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -234,6 +234,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/test/tools/Makefile.in +--- a/ndb/test/tools/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/test/tools/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -347,6 +347,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 ndb/tools/Makefile.in +--- a/ndb/tools/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/ndb/tools/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -333,6 +333,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 netware/Makefile.in +--- a/netware/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/netware/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -221,6 +221,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 os2/Makefile.in +--- a/os2/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/os2/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -176,6 +176,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 os2/include/Makefile.in +--- a/os2/include/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/os2/include/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -176,6 +176,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 os2/include/sys/Makefile.in +--- a/os2/include/sys/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/os2/include/sys/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -167,6 +167,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 patch_info/userstats.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/userstats.info Mon Dec 22 00:31:13 2008 -0800 +@@ -0,0 +1,14 @@ ++File=userstatsv2.patch ++Name=SHOW USER/TABLE/INDEX statistics ++Version=V2 ++Author=Google ++License=GPL ++Comment=Added INFORMATION_SCHEMA.*_STATISTICS ++2008-12-01 ++YK: fix behavior for prepared statements ++ ++2008-11-26 ++YK: add switch variable "userstat_running" to control INFORMATION_SCHEMA.*_STATISTICS (default:OFF) ++ ++2008-12-09 ++YK: fixed "Row_sent: 0" problem at microslow_innodb.patch +diff -r 1270c564d514 pstack/Makefile.in +--- a/pstack/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/pstack/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -215,6 +215,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 pstack/aout/Makefile.in +--- a/pstack/aout/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/pstack/aout/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -157,6 +157,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 regex/Makefile.in +--- a/regex/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/regex/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -199,6 +199,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 scripts/Makefile.in +--- a/scripts/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/scripts/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -198,6 +198,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 server-tools/Makefile.in +--- a/server-tools/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/server-tools/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -175,6 +175,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 server-tools/instance-manager/Makefile.in +--- a/server-tools/instance-manager/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/server-tools/instance-manager/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -223,6 +223,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 sql/Makefile.in +--- a/sql/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -285,6 +285,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -3316,6 +3316,8 @@ + + error = row_insert_for_mysql((byte*) record, prebuilt); + ++ if (error == DB_SUCCESS) rows_changed++; ++ + if (error == DB_SUCCESS && auto_inc_used) { + + /* Fetch the value that was set in the autoincrement field */ +@@ -3588,6 +3590,8 @@ + } + } + ++ if (error == DB_SUCCESS) rows_changed++; ++ + innodb_srv_conc_exit_innodb(prebuilt->trx); + + error = convert_error_code_to_mysql(error, user_thd); +@@ -3635,6 +3639,8 @@ + innodb_srv_conc_enter_innodb(prebuilt->trx); + + error = row_update_for_mysql((byte*) record, prebuilt); ++ ++ if (error == DB_SUCCESS) rows_changed++; + + innodb_srv_conc_exit_innodb(prebuilt->trx); + +@@ -4068,6 +4074,9 @@ + if (ret == DB_SUCCESS) { + error = 0; + table->status = 0; ++ rows_read++; ++ if (active_index >= 0 && active_index < MAX_KEY) ++ index_rows_read[active_index]++; + + } else if (ret == DB_RECORD_NOT_FOUND) { + error = HA_ERR_END_OF_FILE; +diff -r 1270c564d514 sql/ha_myisam.cc +--- a/sql/ha_myisam.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/ha_myisam.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -670,7 +670,9 @@ + if ((error= update_auto_increment())) + return error; + } +- return mi_write(file,buf); ++ int error=mi_write(file,buf); ++ if (!error) rows_changed++; ++ return error; + } + + int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) +@@ -1518,13 +1520,17 @@ + statistic_increment(table->in_use->status_var.ha_update_count,&LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); +- return mi_update(file,old_data,new_data); ++ int error=mi_update(file,old_data,new_data); ++ if (!error) rows_changed++; ++ return error; + } + + int ha_myisam::delete_row(const byte * buf) + { + statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); +- return mi_delete(file,buf); ++ int error=mi_delete(file,buf); ++ if (!error) rows_changed++; ++ return error; + } + + int ha_myisam::index_read(byte * buf, const byte * key, +@@ -1535,6 +1541,13 @@ + &LOCK_status); + int error=mi_rkey(file,buf,active_index, key, key_len, find_flag); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1545,6 +1558,13 @@ + &LOCK_status); + int error=mi_rkey(file,buf,index, key, key_len, find_flag); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1555,6 +1575,13 @@ + &LOCK_status); + int error=mi_rkey(file,buf,active_index, key, key_len, HA_READ_PREFIX_LAST); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1565,6 +1592,13 @@ + &LOCK_status); + int error=mi_rnext(file,buf,active_index); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1575,6 +1609,13 @@ + &LOCK_status); + int error=mi_rprev(file,buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1585,6 +1626,13 @@ + &LOCK_status); + int error=mi_rfirst(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1595,6 +1643,13 @@ + &LOCK_status); + int error=mi_rlast(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1611,6 +1666,13 @@ + error= mi_rnext_same(file,buf); + } while (error == HA_ERR_RECORD_DELETED); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1628,6 +1690,7 @@ + &LOCK_status); + int error=mi_scan(file, buf); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) rows_read++; + return error; + } + +@@ -1642,6 +1705,7 @@ + &LOCK_status); + int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length)); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) rows_read++; + return error; + } + +diff -r 1270c564d514 sql/handler.cc +--- a/sql/handler.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/handler.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -725,6 +725,8 @@ + if (cookie) + tc_log->unlog(cookie, xid); + DBUG_EXECUTE_IF("crash_commit_after", abort();); ++ if (is_real_trans) ++ thd->diff_commit_trans++; + end: + if (is_real_trans) + start_waiting_global_read_lock(thd); +@@ -829,6 +831,7 @@ + thd->transaction.cleanup(); + } + } ++ thd->diff_rollback_trans++; + #endif /* USING_TRANSACTIONS */ + if (all) + thd->transaction_rollback_request= FALSE; +@@ -1212,6 +1215,7 @@ + statistic_increment(thd->status_var.ha_rollback_count,&LOCK_status); + *ht=0; // keep it conveniently zero-filled + } ++ thd->diff_rollback_trans++; + DBUG_RETURN(error); + } + +@@ -1442,6 +1446,8 @@ + else + dupp_ref=ref+ALIGN_SIZE(ref_length); + } ++ rows_read = rows_changed = 0; ++ memset(index_rows_read, 0, sizeof(index_rows_read)); + DBUG_RETURN(error); + } + +@@ -2276,6 +2282,111 @@ + return error; + } + ++// Updates the global table stats with the TABLE this handler represents. ++void handler::update_global_table_stats() { ++ if (!opt_userstat_running) { ++ rows_read = rows_changed = 0; ++ return; ++ } ++ ++ if (!rows_read && !rows_changed) return; // Nothing to update. ++ // table_cache_key is db_name + '\0' + table_name + '\0'. ++ if (!table->s || !table->s->table_cache_key || !table->s->table_name) return; ++ ++ TABLE_STATS* table_stats; ++ char key[NAME_LEN * 2 + 2]; ++ // [db] + '.' + [table] ++ sprintf(key, "%s.%s", table->s->table_cache_key, table->s->table_name); ++ ++ pthread_mutex_lock(&LOCK_global_table_stats); ++ // Gets the global table stats, creating one if necessary. ++ if (!(table_stats = (TABLE_STATS*)hash_search(&global_table_stats, ++ (byte*)key, ++ strlen(key)))) { ++ if (!(table_stats = ((TABLE_STATS*) ++ my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL))))) { ++ // Out of memory. ++ sql_print_error("Allocating table stats failed."); ++ goto end; ++ } ++ strncpy(table_stats->table, key, sizeof(table_stats->table)); ++ table_stats->rows_read = 0; ++ table_stats->rows_changed = 0; ++ table_stats->rows_changed_x_indexes = 0; ++ table_stats->engine_type = (int) ht->db_type; ++ ++ if (my_hash_insert(&global_table_stats, (byte*)table_stats)) { ++ // Out of memory. ++ sql_print_error("Inserting table stats failed."); ++ my_free((char*)table_stats, 0); ++ goto end; ++ } ++ } ++ // Updates the global table stats. ++ table_stats->rows_read += rows_read; ++ table_stats->rows_changed += rows_changed; ++ table_stats->rows_changed_x_indexes += ++ rows_changed * (table->s->keys ? table->s->keys : 1); ++ current_thd->diff_total_read_rows += rows_read; ++ rows_read = rows_changed = 0; ++end: ++ pthread_mutex_unlock(&LOCK_global_table_stats); ++} ++ ++// Updates the global index stats with this handler's accumulated index reads. ++void handler::update_global_index_stats() { ++ // table_cache_key is db_name + '\0' + table_name + '\0'. ++ if (!table->s || !table->s->table_cache_key || !table->s->table_name) return; ++ ++ if (!opt_userstat_running) { ++ for (int x = 0; x < table->s->keys; x++) { ++ index_rows_read[x] = 0; ++ } ++ return; ++ } ++ ++ for (int x = 0; x < table->s->keys; x++) { ++ if (index_rows_read[x]) { ++ // Rows were read using this index. ++ KEY* key_info = &table->key_info[x]; ++ ++ if (!key_info->name) continue; ++ ++ INDEX_STATS* index_stats; ++ char key[NAME_LEN * 3 + 3]; ++ // [db] + '.' + [table] + '.' + [index] ++ sprintf(key, "%s.%s.%s", table->s->table_cache_key, ++ table->s->table_name, key_info->name); ++ ++ pthread_mutex_lock(&LOCK_global_index_stats); ++ // Gets the global index stats, creating one if necessary. ++ if (!(index_stats = (INDEX_STATS*)hash_search(&global_index_stats, ++ (byte*)key, ++ strlen(key)))) { ++ if (!(index_stats = ((INDEX_STATS*) ++ my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL))))) { ++ // Out of memory. ++ sql_print_error("Allocating index stats failed."); ++ goto end; ++ } ++ strncpy(index_stats->index, key, sizeof(index_stats->index)); ++ index_stats->rows_read = 0; ++ ++ if (my_hash_insert(&global_index_stats, (byte*)index_stats)) { ++ // Out of memory. ++ sql_print_error("Inserting index stats failed."); ++ my_free((char*)index_stats, 0); ++ goto end; ++ } ++ } ++ // Updates the global index stats. ++ index_stats->rows_read += index_rows_read[x]; ++ index_rows_read[x] = 0; ++end: ++ pthread_mutex_unlock(&LOCK_global_index_stats); ++ } ++ } ++} + + /**************************************************************************** + ** Some general functions that isn't in the handler class +diff -r 1270c564d514 sql/handler.h +--- a/sql/handler.h Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/handler.h Mon Dec 22 00:31:13 2008 -0800 +@@ -30,6 +30,10 @@ + #if defined(HAVE_BERKELEY_DB) || defined(HAVE_INNOBASE_DB) || \ + defined(HAVE_NDBCLUSTER_DB) + #define USING_TRANSACTIONS ++#endif ++ ++#if MAX_KEY > 128 ++#error MAX_KEY is too large. Values up to 128 are supported. + #endif + + // the following is for checking tables +@@ -604,6 +608,9 @@ + bool auto_increment_column_changed; + bool implicit_emptied; /* Can be !=0 only if HEAP */ + const COND *pushed_cond; ++ ulonglong rows_read; ++ ulonglong rows_changed; ++ ulonglong index_rows_read[MAX_KEY]; + + handler(const handlerton *ht_arg, TABLE *table_arg) :table(table_arg), + ht(ht_arg), +@@ -615,8 +622,10 @@ + ref_length(sizeof(my_off_t)), block_size(0), + raid_type(0), ft_handler(0), inited(NONE), + locked(FALSE), implicit_emptied(0), +- pushed_cond(NULL) +- {} ++ pushed_cond(NULL), rows_read(0), rows_changed(0) ++ { ++ memset(index_rows_read, 0, sizeof(index_rows_read)); ++ } + virtual ~handler(void) { DBUG_ASSERT(locked == FALSE); /* TODO: DBUG_ASSERT(inited == NONE); */ } + virtual handler *clone(MEM_ROOT *mem_root); + int ha_open(const char *name, int mode, int test_if_locked); +@@ -625,7 +634,11 @@ + virtual void print_error(int error, myf errflag); + virtual bool get_error_message(int error, String *buf); + uint get_dup_key(int error); +- void change_table_ptr(TABLE *table_arg) { table=table_arg; } ++ void change_table_ptr(TABLE *table_arg) { ++ table=table_arg; ++ rows_read = rows_changed = 0; ++ memset(index_rows_read, 0, sizeof(index_rows_read)); ++ } + virtual double scan_time() + { return ulonglong2double(data_file_length) / IO_SIZE + 2; } + virtual double read_time(uint index, uint ranges, ha_rows rows) +@@ -886,6 +899,9 @@ + virtual bool is_crashed() const { return 0; } + virtual bool auto_repair() const { return 0; } + ++ void update_global_table_stats(); ++ void update_global_index_stats(); ++ + /* + default rename_table() and delete_table() rename/delete files with a + given name and extensions from bas_ext() +diff -r 1270c564d514 sql/lex.h +--- a/sql/lex.h Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/lex.h Mon Dec 22 00:31:13 2008 -0800 +@@ -109,6 +109,7 @@ + { "CHECKSUM", SYM(CHECKSUM_SYM)}, + { "CIPHER", SYM(CIPHER_SYM)}, + { "CLIENT", SYM(CLIENT_SYM)}, ++ { "CLIENT_STATISTICS", SYM(CLIENT_STATS_SYM)}, + { "CLOSE", SYM(CLOSE_SYM)}, + { "CODE", SYM(CODE_SYM)}, + { "COLLATE", SYM(COLLATE_SYM)}, +@@ -238,6 +239,7 @@ + { "IN", SYM(IN_SYM)}, + { "INDEX", SYM(INDEX_SYM)}, + { "INDEXES", SYM(INDEXES)}, ++ { "INDEX_STATISTICS", SYM(INDEX_STATS_SYM)}, + { "INFILE", SYM(INFILE)}, + { "INNER", SYM(INNER_SYM)}, + { "INNOBASE", SYM(INNOBASE_SYM)}, +@@ -443,6 +445,7 @@ + { "SIGNED", SYM(SIGNED_SYM)}, + { "SIMPLE", SYM(SIMPLE_SYM)}, + { "SLAVE", SYM(SLAVE)}, ++ { "SLOW", SYM(SLOW_SYM)}, + { "SNAPSHOT", SYM(SNAPSHOT_SYM)}, + { "SMALLINT", SYM(SMALLINT)}, + { "SOME", SYM(ANY_SYM)}, +@@ -488,6 +491,7 @@ + { "TABLE", SYM(TABLE_SYM)}, + { "TABLES", SYM(TABLES)}, + { "TABLESPACE", SYM(TABLESPACE)}, ++ { "TABLE_STATISTICS", SYM(TABLE_STATS_SYM)}, + { "TEMPORARY", SYM(TEMPORARY)}, + { "TEMPTABLE", SYM(TEMPTABLE_SYM)}, + { "TERMINATED", SYM(TERMINATED)}, +@@ -525,6 +529,7 @@ + { "USE", SYM(USE_SYM)}, + { "USER", SYM(USER)}, + { "USER_RESOURCES", SYM(RESOURCES)}, ++ { "USER_STATISTICS", SYM(USER_STATS_SYM)}, + { "USE_FRM", SYM(USE_FRM)}, + { "USING", SYM(USING)}, + { "UTC_DATE", SYM(UTC_DATE_SYM)}, +diff -r 1270c564d514 sql/log.cc +--- a/sql/log.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/log.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -1899,18 +1899,24 @@ + thd->current_insert_id); + if (e.write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += e.data_written; + } + if (thd->insert_id_used) + { + Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id); + if (e.write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += e.data_written; + } + if (thd->rand_used) + { + Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2); + if (e.write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += e.data_written; + } + if (thd->user_var_events.elements) + { +@@ -1926,6 +1932,8 @@ + user_var_event->charset_number); + if (e.write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += e.data_written; + } + } + } +@@ -1936,6 +1944,8 @@ + + if (event_info->write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += event_info->data_written; + + if (file == &log_file) // we are writing to the real log (disk) + { +@@ -2057,6 +2067,7 @@ + */ + if (qinfo.write(&log_file)) + goto err; ++ thd->binlog_bytes_written += qinfo.data_written; + + /* Read from the file used to cache the queries .*/ + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) +@@ -2103,6 +2114,7 @@ + /* write the first half of the split header */ + if (my_b_write(&log_file, header, carry)) + goto err; ++ thd->binlog_bytes_written += carry; + + /* + copy fixed second half of header to cache so the correct +@@ -2171,6 +2183,8 @@ + /* Write data to the binary log file */ + if (my_b_write(&log_file, cache->read_pos, length)) + goto err; ++ thd->binlog_bytes_written += length; ++ + cache->read_pos=cache->read_end; // Mark buffer used up + DBUG_EXECUTE_IF("half_binlogged_transaction", goto DBUG_skip_commit;); + } while ((length=my_b_fill(cache))); +@@ -2179,6 +2193,8 @@ + + if (commit_event->write(&log_file)) + goto err; ++ thd->binlog_bytes_written += commit_event->data_written; ++ + #ifndef DBUG_OFF + DBUG_skip_commit: + #endif +diff -r 1270c564d514 sql/mysql_priv.h +--- a/sql/mysql_priv.h Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/mysql_priv.h Mon Dec 22 00:31:13 2008 -0800 +@@ -818,7 +818,15 @@ + bool multi_delete_set_locks_and_link_aux_tables(LEX *lex); + void init_max_user_conn(void); + void init_update_queries(void); ++void init_global_user_stats(void); ++void init_global_table_stats(void); ++void init_global_index_stats(void); ++void init_global_client_stats(void); + void free_max_user_conn(void); ++void free_global_user_stats(void); ++void free_global_table_stats(void); ++void free_global_index_stats(void); ++void free_global_client_stats(void); + pthread_handler_t handle_one_connection(void *arg); + pthread_handler_t handle_bootstrap(void *arg); + void end_thread(THD *thd,bool put_in_cache); +@@ -1396,6 +1404,7 @@ + extern ulong max_connections,max_connect_errors, connect_timeout; + extern ulong slave_net_timeout, slave_trans_retries; + extern uint max_user_connections; ++extern ulonglong denied_connections; + extern ulong what_to_log,flush_time; + extern ulong query_buff_size, thread_stack; + extern ulong max_prepared_stmt_count, prepared_stmt_count; +@@ -1426,6 +1435,7 @@ + extern my_bool opt_safe_show_db, opt_local_infile; + extern my_bool opt_slave_compressed_protocol, use_temp_pool; + extern my_bool opt_readonly, lower_case_file_system; ++extern my_bool opt_userstat_running; + extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs; + extern my_bool opt_secure_auth; + extern char* opt_secure_file_priv; +@@ -1472,6 +1482,14 @@ + extern struct system_variables max_system_variables; + extern struct system_status_var global_status_var; + extern struct rand_struct sql_rand; ++extern HASH global_user_stats; ++extern HASH global_client_stats; ++extern pthread_mutex_t LOCK_global_user_client_stats; ++extern HASH global_table_stats; ++extern pthread_mutex_t LOCK_global_table_stats; ++extern HASH global_index_stats; ++extern pthread_mutex_t LOCK_global_index_stats; ++extern pthread_mutex_t LOCK_stats; + + extern const char *opt_date_time_formats[]; + extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[]; +diff -r 1270c564d514 sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -414,6 +414,7 @@ + uint opt_large_page_size= 0; + my_bool opt_old_style_user_limits= 0, trust_function_creators= 0; + char* opt_slow_logname= 0; ++my_bool opt_userstat_running= 0; + /* + True if there is at least one per-hour limit for some user, so we should + check them before each query (and possibly reset counters when hour is +@@ -450,6 +451,7 @@ + ulong binlog_cache_use= 0, binlog_cache_disk_use= 0; + ulong max_connections, max_connect_errors; + uint max_user_connections= 0; ++ulonglong denied_connections = 0; + /* + Limit of the total number of prepared statements in the server. + Is necessary to protect the server against out-of-memory attacks. +@@ -550,6 +552,10 @@ + LOCK_crypt, LOCK_bytes_sent, LOCK_bytes_received, + LOCK_global_system_variables, + LOCK_user_conn, LOCK_slave_list, LOCK_active_mi; ++pthread_mutex_t LOCK_stats; ++pthread_mutex_t LOCK_global_user_client_stats; ++pthread_mutex_t LOCK_global_table_stats; ++pthread_mutex_t LOCK_global_index_stats; + /* + The below lock protects access to two global server variables: + max_prepared_stmt_count and prepared_stmt_count. These variables +@@ -1191,6 +1197,10 @@ + x_free(opt_secure_file_priv); + bitmap_free(&temp_pool); + free_max_user_conn(); ++ free_global_user_stats(); ++ free_global_client_stats(); ++ free_global_table_stats(); ++ free_global_index_stats(); + #ifdef HAVE_REPLICATION + end_slave_list(); + free_list(&replicate_do_db); +@@ -1305,6 +1315,10 @@ + (void) pthread_cond_destroy(&COND_thread_cache); + (void) pthread_cond_destroy(&COND_flush_thread_cache); + (void) pthread_cond_destroy(&COND_manager); ++ (void) pthread_mutex_destroy(&LOCK_stats); ++ (void) pthread_mutex_destroy(&LOCK_global_user_client_stats); ++ (void) pthread_mutex_destroy(&LOCK_global_table_stats); ++ (void) pthread_mutex_destroy(&LOCK_global_index_stats); + } + + #endif /*EMBEDDED_LIBRARY*/ +@@ -3152,6 +3166,10 @@ + (void) pthread_mutex_init(&LOCK_rpl_status, MY_MUTEX_INIT_FAST); + (void) pthread_cond_init(&COND_rpl_status, NULL); + #endif ++ (void) pthread_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST); ++ (void) pthread_mutex_init(&LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST); ++ (void) pthread_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST); ++ (void) pthread_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST); + sp_cache_init(); + /* Parameter for threads created for connections */ + (void) pthread_attr_init(&connection_attrib); +@@ -3423,6 +3441,10 @@ + sql_print_error("Out of memory"); + unireg_abort(1); + } ++ ++ init_global_table_stats(); ++ init_global_index_stats(); ++ + if (ha_init()) + { + sql_print_error("Can't init databases"); +@@ -3505,6 +3527,8 @@ + + init_max_user_conn(); + init_update_queries(); ++ init_global_user_stats(); ++ init_global_client_stats(); + DBUG_RETURN(0); + } + +@@ -4189,6 +4213,7 @@ + { + DBUG_PRINT("error",("Too many connections")); + close_connection(thd, ER_CON_COUNT_ERROR, 1); ++ statistic_increment(denied_connections, &LOCK_status); + delete thd; + DBUG_VOID_RETURN; + } +@@ -5007,6 +5032,7 @@ + OPT_PROFILING, + OPT_SLOW_LOG, + OPT_SLOW_QUERY_LOG_FILE, ++ OPT_USERSTAT_RUNNING, + OPT_USE_GLOBAL_LONG_QUERY_TIME, + OPT_INNODB_ROLLBACK_ON_TIMEOUT, + OPT_SECURE_FILE_PRIV, +@@ -6450,6 +6476,10 @@ + (gptr*) &max_system_variables.net_wait_timeout, 0, GET_ULONG, + REQUIRED_ARG, NET_WAIT_TIMEOUT, 1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT), + 0, 1, 0}, ++ {"userstat_running", OPT_USERSTAT_RUNNING, ++ "Control USER_STATISTICS, CLIENT_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS running", ++ (gptr*) &opt_userstat_running, (gptr*) &opt_userstat_running, ++ 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} + }; + +diff -r 1270c564d514 sql/set_var.cc +--- a/sql/set_var.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/set_var.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -323,6 +323,7 @@ + sys_var_thd_ulong sys_read_buff_size("read_buffer_size", + &SV::read_buff_size); + sys_var_bool_ptr sys_readonly("read_only", &opt_readonly); ++sys_var_bool_ptr sys_userstat_running("userstat_running", &opt_userstat_running); + sys_var_thd_ulong sys_read_rnd_buff_size("read_rnd_buffer_size", + &SV::read_rnd_buff_size); + sys_var_thd_ulong sys_div_precincrement("div_precision_increment", +@@ -825,6 +826,7 @@ + &sys_trans_alloc_block_size, + &sys_trans_prealloc_size, + &sys_tx_isolation, ++ &sys_userstat_running, + &sys_version, + #ifdef HAVE_BERKELEY_DB + &sys_version_bdb, +@@ -1171,6 +1173,7 @@ + {sys_tx_isolation.name, (char*) &sys_tx_isolation, SHOW_SYS}, + {sys_updatable_views_with_limit.name, + (char*) &sys_updatable_views_with_limit,SHOW_SYS}, ++ {sys_userstat_running.name, (char*) &sys_userstat_running, SHOW_SYS}, + {sys_use_global_long_query_time.name, (char*) &sys_use_global_long_query_time, SHOW_SYS}, + {sys_version.name, (char*) &sys_version, SHOW_SYS}, + #ifdef HAVE_BERKELEY_DB +diff -r 1270c564d514 sql/share/Makefile.in +--- a/sql/share/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/share/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -167,6 +167,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 sql/sql_base.cc +--- a/sql/sql_base.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_base.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -624,6 +624,12 @@ + DBUG_ENTER("close_thread_table"); + DBUG_ASSERT(table->key_read == 0); + DBUG_ASSERT(!table->file || table->file->inited == handler::NONE); ++ ++ if(table->file) ++ { ++ table->file->update_global_table_stats(); ++ table->file->update_global_index_stats(); ++ } + + *table_ptr=table->next; + if (table->needs_reopen_or_name_lock() || +@@ -670,6 +676,9 @@ + { + DBUG_ENTER("close_temporary"); + char path[FN_REFLEN]; ++ ++ table->file->update_global_table_stats(); ++ table->file->update_global_index_stats(); + db_type table_type=table->s->db_type; + strmov(path,table->s->path); + free_io_cache(table); +diff -r 1270c564d514 sql/sql_class.cc +--- a/sql/sql_class.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_class.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -236,6 +236,13 @@ + bzero(ha_data, sizeof(ha_data)); + mysys_var=0; + binlog_evt_union.do_union= FALSE; ++ busy_time = 0; ++ cpu_time = 0; ++ bytes_received = 0; ++ bytes_sent = 0; ++ binlog_bytes_written = 0; ++ updated_row_count = 0; ++ sent_row_count_2 = 0; + #ifndef DBUG_OFF + dbug_sentry=THD_SENTRY_MAGIC; + #endif +@@ -369,6 +376,88 @@ + total_warn_count= 0; + update_charset(); + bzero((char *) &status_var, sizeof(status_var)); ++ reset_stats(); ++} ++ ++// Resets stats in a THD. ++void THD::reset_stats(void) { ++ current_connect_time = time(NULL); ++ last_global_update_time = current_connect_time; ++ reset_diff_stats(); ++} ++ ++// Resets the 'diff' stats, which are used to update global stats. ++void THD::reset_diff_stats(void) { ++ diff_total_busy_time = 0; ++ diff_total_cpu_time = 0; ++ diff_total_bytes_received = 0; ++ diff_total_bytes_sent = 0; ++ diff_total_binlog_bytes_written = 0; ++ diff_total_sent_rows = 0; ++ diff_total_updated_rows = 0; ++ diff_total_read_rows = 0; ++ diff_select_commands = 0; ++ diff_update_commands = 0; ++ diff_other_commands = 0; ++ diff_commit_trans = 0; ++ diff_rollback_trans = 0; ++ diff_denied_connections = 0; ++ diff_lost_connections = 0; ++ diff_access_denied_errors = 0; ++ diff_empty_queries = 0; ++} ++ ++// Updates 'diff' stats of a THD. ++void THD::update_stats(bool ran_command) { ++ if (opt_userstat_running) { ++ diff_total_busy_time += busy_time; ++ diff_total_cpu_time += cpu_time; ++ diff_total_bytes_received += bytes_received; ++ diff_total_bytes_sent += bytes_sent; ++ diff_total_binlog_bytes_written += binlog_bytes_written; ++ diff_total_sent_rows += sent_row_count_2; ++ diff_total_updated_rows += updated_row_count; ++ // diff_total_read_rows is updated in handler.cc. ++ ++ if (ran_command) { ++ // The replication thread has the COM_CONNECT command. ++ if ((old_command == COM_QUERY || command == COM_CONNECT) && ++ (lex->sql_command >= 0 && lex->sql_command < SQLCOM_END)) { ++ // A SQL query. ++ if (lex->sql_command == SQLCOM_SELECT) { ++ if (lex->orig_sql_command == SQLCOM_END) { ++ diff_select_commands++; ++ if (!sent_row_count_2) ++ diff_empty_queries++; ++ } else { ++ // 'SHOW ' commands become SQLCOM_SELECT. ++ diff_other_commands++; ++ // 'SHOW ' commands shouldn't inflate total sent row count. ++ diff_total_sent_rows -= sent_row_count_2; ++ } ++ } else if (is_update_query(lex->sql_command)) { ++ diff_update_commands++; ++ } else { ++ diff_other_commands++; ++ } ++ } ++ } ++ // diff_commit_trans is updated in handler.cc. ++ // diff_rollback_trans is updated in handler.cc. ++ // diff_denied_connections is updated in sql_parse.cc. ++ // diff_lost_connections is updated in sql_parse.cc. ++ // diff_access_denied_errors is updated in sql_parse.cc. ++ ++ /* reset counters to zero to avoid double-counting since values ++ are already store in diff_total_*. */ ++ } ++ busy_time = 0; ++ cpu_time = 0; ++ bytes_received = 0; ++ bytes_sent = 0; ++ binlog_bytes_written = 0; ++ updated_row_count = 0; ++ sent_row_count_2 = 0; + } + + +@@ -408,6 +497,9 @@ + + void THD::change_user(void) + { ++ pthread_mutex_lock(&LOCK_status); ++ add_to_status(&global_status_var, &status_var); ++ pthread_mutex_unlock(&LOCK_status); + cleanup(); + cleanup_done= 0; + init(); +@@ -892,6 +984,33 @@ + } + #endif + ++char *THD::get_client_host_port(THD *client) ++{ ++ Security_context *client_sctx= client->security_ctx; ++ char *client_host= NULL; ++ ++ if (client->peer_port && (client_sctx->host || client_sctx->ip) && ++ security_ctx->host_or_ip[0]) ++ { ++ if ((client_host= this->alloc(LIST_PROCESS_HOST_LEN+1))) ++ my_snprintf((char *) client_host, LIST_PROCESS_HOST_LEN, ++ "%s:%u", client_sctx->host_or_ip, client->peer_port); ++ } ++ else ++ client_host= this->strdup(client_sctx->host_or_ip[0] ? ++ client_sctx->host_or_ip : ++ client_sctx->host ? client_sctx->host : ""); ++ ++ return client_host; ++} ++ ++const char *get_client_host(THD *client) ++{ ++ return client->security_ctx->host_or_ip[0] ? ++ client->security_ctx->host_or_ip : ++ client->security_ctx->host ? client->security_ctx->host : ""; ++} ++ + + struct Item_change_record: public ilink + { +@@ -1062,6 +1181,7 @@ + } + } + thd->sent_row_count++; ++ thd->sent_row_count_2++; + if (!thd->vio_ok()) + DBUG_RETURN(0); + if (!thd->net.report_error) +@@ -1154,6 +1274,7 @@ + select_export::~select_export() + { + thd->sent_row_count=row_count; ++ thd->sent_row_count_2=row_count; + } + + +@@ -2088,6 +2209,7 @@ + if (likely(thd != 0)) + { /* current_thd==0 when close_connection() calls net_send_error() */ + thd->status_var.bytes_sent+= length; ++ thd->bytes_sent+= length; + } + } + +@@ -2095,6 +2217,7 @@ + void thd_increment_bytes_received(ulong length) + { + current_thd->status_var.bytes_received+= length; ++ current_thd->bytes_received+= length; + } + + +diff -r 1270c564d514 sql/sql_class.h +--- a/sql/sql_class.h Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_class.h Mon Dec 22 00:31:13 2008 -0800 +@@ -1298,6 +1298,8 @@ + first byte of the packet in do_command() + */ + enum enum_server_command command; ++ // Used to save the command, before it is set to COM_SLEEP. ++ enum enum_server_command old_command; + uint32 server_id; + uint32 file_id; // for LOAD DATA INFILE + /* +@@ -1487,6 +1489,8 @@ + /* variables.transaction_isolation is reset to this after each commit */ + enum_tx_isolation session_tx_isolation; + enum_check_fields count_cuted_fields; ++ ha_rows updated_row_count; ++ ha_rows sent_row_count_2; /* for userstat */ + + DYNAMIC_ARRAY user_var_events; /* For user variables replication */ + MEM_ROOT *user_var_events_alloc; /* Allocate above array elements here */ +@@ -1593,6 +1597,49 @@ + */ + LOG_INFO* current_linfo; + NET* slave_net; // network connection from slave -> m. ++ ++ /* ++ Used to update global user stats. The global user stats are updated ++ occasionally with the 'diff' variables. After the update, the 'diff' ++ variables are reset to 0. ++ */ ++ // Time when the current thread connected to MySQL. ++ time_t current_connect_time; ++ // Last time when THD stats were updated in global_user_stats. ++ time_t last_global_update_time; ++ // Busy (non-idle) time for just one command. ++ double busy_time; ++ // Busy time not updated in global_user_stats yet. ++ double diff_total_busy_time; ++ // Cpu (non-idle) time for just one thread. ++ double cpu_time; ++ // Cpu time not updated in global_user_stats yet. ++ double diff_total_cpu_time; ++ /* bytes counting */ ++ ulonglong bytes_received; ++ ulonglong diff_total_bytes_received; ++ ulonglong bytes_sent; ++ ulonglong diff_total_bytes_sent; ++ ulonglong binlog_bytes_written; ++ ulonglong diff_total_binlog_bytes_written; ++ ++ // Number of rows not reflected in global_user_stats yet. ++ ha_rows diff_total_sent_rows, diff_total_updated_rows, diff_total_read_rows; ++ // Number of commands not reflected in global_user_stats yet. ++ ulonglong diff_select_commands, diff_update_commands, diff_other_commands; ++ // Number of transactions not reflected in global_user_stats yet. ++ ulonglong diff_commit_trans, diff_rollback_trans; ++ // Number of connection errors not reflected in global_user_stats yet. ++ ulonglong diff_denied_connections, diff_lost_connections; ++ // Number of db access denied, not reflected in global_user_stats yet. ++ ulonglong diff_access_denied_errors; ++ // Number of queries that return 0 rows ++ ulonglong diff_empty_queries; ++ ++ // Per account query delay in miliseconds. When not 0, sleep this number of ++ // milliseconds before every SQL command. ++ ulonglong query_delay_millis; ++ + /* Used by the sys_var class to store temporary values */ + union + { +@@ -1648,6 +1695,11 @@ + alloc_root. + */ + void init_for_queries(); ++ void reset_stats(void); ++ void reset_diff_stats(void); ++ // ran_command is true when this is called immediately after a ++ // command has been run. ++ void update_stats(bool ran_command); + void change_user(void); + void cleanup(void); + void cleanup_after_query(); +@@ -1877,7 +1929,13 @@ + if (p_db_length) + *p_db_length= db_length; + return FALSE; ++ ++ // Returns string as 'IP:port' for the client-side of the connnection represented ++ // by 'client' as displayed by SHOW PROCESSLIST. Allocates memory from the heap of ++ // this THD and that is not reclaimed immediately, so use sparingly. May return NULL. + } ++ ++ char *get_client_host_port(THD *client); + + public: + /** +@@ -1921,6 +1979,11 @@ + MEM_ROOT main_mem_root; + }; + ++// Returns string as 'IP' for the client-side of the connection represented by ++// 'client'. Does not allocate memory. May return "". ++const char *get_client_host(THD *client); ++ ++#define LIST_PROCESS_HOST_LEN 64 + + #define tmp_disable_binlog(A) \ + {ulonglong tmp_disable_binlog__save_options= (A)->options; \ +diff -r 1270c564d514 sql/sql_delete.cc +--- a/sql/sql_delete.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_delete.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -358,6 +358,7 @@ + send_ok(thd,deleted); + DBUG_PRINT("info",("%ld records deleted",(long) deleted)); + } ++ thd->updated_row_count += deleted; + DBUG_RETURN(error >= 0 || thd->net.report_error); + } + +@@ -869,6 +870,7 @@ + thd->row_count_func= deleted; + ::send_ok(thd, deleted); + } ++ thd->updated_row_count += deleted; + return 0; + } + +diff -r 1270c564d514 sql/sql_insert.cc +--- a/sql/sql_insert.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_insert.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -989,6 +989,7 @@ + thd->row_count_func= info.copied + info.deleted + updated; + ::send_ok(thd, (ulong) thd->row_count_func, id, buff); + } ++ thd->updated_row_count += thd->row_count_func; + thd->abort_on_warning= 0; + DBUG_RETURN(FALSE); + +@@ -3036,6 +3037,7 @@ + autoinc_value_of_first_inserted_row : thd->insert_id_used ? + thd->last_insert_id : 0; + ::send_ok(thd, (ulong) thd->row_count_func, id, buff); ++ thd->updated_row_count += thd->row_count_func; + DBUG_RETURN(0); + } + +diff -r 1270c564d514 sql/sql_lex.h +--- a/sql/sql_lex.h Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_lex.h Mon Dec 22 00:31:13 2008 -0800 +@@ -101,6 +101,9 @@ + When a command is added here, be sure it's also added in mysqld.cc + in "struct show_var_st status_vars[]= {" ... + */ ++ // TODO(mcallaghan): update status_vars in mysqld to export these ++ SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS, ++ SQLCOM_SHOW_CLIENT_STATS, + /* This should be the last !!! */ + SQLCOM_END + }; +diff -r 1270c564d514 sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_parse.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -78,6 +78,12 @@ + const char *table_name); + static bool check_show_create_table_access(THD *thd, TABLE_LIST *table); + ++// Increments connection count for user. ++static int increment_connection_count(THD* thd, bool use_lock); ++ ++// Uses the THD to update the global stats by user name and client IP ++void update_global_user_stats(THD* thd, bool create_user, time_t now); ++ + const char *any_db="*any*"; // Special symbol for check_access + + const char *command_name[]={ +@@ -145,6 +151,17 @@ + #ifndef EMBEDDED_LIBRARY + static bool do_command(THD *thd); + #endif // EMBEDDED_LIBRARY ++ ++HASH global_user_stats; ++HASH global_client_stats; ++// Protects global_user_stats and global_client_stats ++extern pthread_mutex_t LOCK_global_user_client_stats; ++ ++HASH global_table_stats; ++extern pthread_mutex_t LOCK_global_table_stats; ++ ++HASH global_index_stats; ++extern pthread_mutex_t LOCK_global_index_stats; + + #ifdef __WIN__ + extern void win_install_sigabrt_handler(void); +@@ -504,6 +521,7 @@ + mysql_log.write(thd,COM_CONNECT,ER(ER_NOT_SUPPORTED_AUTH_MODE)); + DBUG_RETURN(-1); + } ++ thd->diff_access_denied_errors++; + net_printf_error(thd, ER_ACCESS_DENIED_ERROR, + thd->main_security_ctx.user, + thd->main_security_ctx.host_or_ip, +@@ -536,12 +554,190 @@ + void init_max_user_conn(void) + { + #ifndef NO_EMBEDDED_ACCESS_CHECKS +- (void) hash_init(&hash_user_connections,system_charset_info,max_connections, +- 0,0, +- (hash_get_key) get_key_conn, (hash_free_key) free_user, +- 0); +-#endif +-} ++ if (hash_init(&hash_user_connections,system_charset_info,max_connections, ++ 0,0, ++ (hash_get_key) get_key_conn, (hash_free_key) free_user, ++ 0)) { ++ sql_print_error("Initializing hash_user_connections failed."); ++ exit(1); ++ } ++#endif ++} ++ ++byte *get_key_user_stats(USER_STATS *user_stats, uint *length, ++ my_bool not_used __attribute__((unused))) ++{ ++ *length = strlen(user_stats->user); ++ return (byte*)user_stats->user; ++} ++ ++void free_user_stats(USER_STATS* user_stats) ++{ ++ my_free((char*)user_stats, MYF(0)); ++} ++ ++void init_user_stats(USER_STATS *user_stats, ++ const char *user, ++ const char *priv_user, ++ uint total_connections, ++ uint concurrent_connections, ++ time_t connected_time, ++ double busy_time, ++ double cpu_time, ++ ulonglong bytes_received, ++ ulonglong bytes_sent, ++ ulonglong binlog_bytes_written, ++ ha_rows rows_fetched, ++ ha_rows rows_updated, ++ ha_rows rows_read, ++ ulonglong select_commands, ++ ulonglong update_commands, ++ ulonglong other_commands, ++ ulonglong commit_trans, ++ ulonglong rollback_trans, ++ ulonglong denied_connections, ++ ulonglong lost_connections, ++ ulonglong access_denied_errors, ++ ulonglong empty_queries) ++{ ++ DBUG_ENTER("init_user_stats"); ++ DBUG_PRINT("info", ++ ("Add user_stats entry for user %s - priv_user %s", ++ user, priv_user)); ++ strncpy(user_stats->user, user, sizeof(user_stats->user)); ++ strncpy(user_stats->priv_user, priv_user, sizeof(user_stats->priv_user)); ++ ++ user_stats->total_connections = total_connections; ++ user_stats->concurrent_connections = concurrent_connections; ++ user_stats->connected_time = connected_time; ++ user_stats->busy_time = busy_time; ++ user_stats->cpu_time = cpu_time; ++ user_stats->bytes_received = bytes_received; ++ user_stats->bytes_sent = bytes_sent; ++ user_stats->binlog_bytes_written = binlog_bytes_written; ++ user_stats->rows_fetched = rows_fetched; ++ user_stats->rows_updated = rows_updated; ++ user_stats->rows_read = rows_read; ++ user_stats->select_commands = select_commands; ++ user_stats->update_commands = update_commands; ++ user_stats->other_commands = other_commands; ++ user_stats->commit_trans = commit_trans; ++ user_stats->rollback_trans = rollback_trans; ++ user_stats->denied_connections = denied_connections; ++ user_stats->lost_connections = lost_connections; ++ user_stats->access_denied_errors = access_denied_errors; ++ user_stats->empty_queries = empty_queries; ++ DBUG_VOID_RETURN; ++} ++ ++void add_user_stats(USER_STATS *user_stats, ++ uint total_connections, ++ uint concurrent_connections, ++ time_t connected_time, ++ double busy_time, ++ double cpu_time, ++ ulonglong bytes_received, ++ ulonglong bytes_sent, ++ ulonglong binlog_bytes_written, ++ ha_rows rows_fetched, ++ ha_rows rows_updated, ++ ha_rows rows_read, ++ ulonglong select_commands, ++ ulonglong update_commands, ++ ulonglong other_commands, ++ ulonglong commit_trans, ++ ulonglong rollback_trans, ++ ulonglong denied_connections, ++ ulonglong lost_connections, ++ ulonglong access_denied_errors, ++ ulonglong empty_queries) ++{ ++ user_stats->total_connections += total_connections; ++ user_stats->concurrent_connections += concurrent_connections; ++ user_stats->connected_time += connected_time; ++ user_stats->busy_time += busy_time; ++ user_stats->cpu_time += cpu_time; ++ user_stats->bytes_received += bytes_received; ++ user_stats->bytes_sent += bytes_sent; ++ user_stats->binlog_bytes_written += binlog_bytes_written; ++ user_stats->rows_fetched += rows_fetched; ++ user_stats->rows_updated += rows_updated; ++ user_stats->rows_read += rows_read; ++ user_stats->select_commands += select_commands; ++ user_stats->update_commands += update_commands; ++ user_stats->other_commands += other_commands; ++ user_stats->commit_trans += commit_trans; ++ user_stats->rollback_trans += rollback_trans; ++ user_stats->denied_connections += denied_connections; ++ user_stats->lost_connections += lost_connections; ++ user_stats->access_denied_errors += access_denied_errors; ++ user_stats->empty_queries += empty_queries; ++} ++ ++void init_global_user_stats(void) ++{ ++ if (hash_init(&global_user_stats, system_charset_info, max_connections, ++ 0, 0, (hash_get_key)get_key_user_stats, ++ (hash_free_key)free_user_stats, 0)) { ++ sql_print_error("Initializing global_user_stats failed."); ++ exit(1); ++ } ++} ++ ++void init_global_client_stats(void) ++{ ++ if (hash_init(&global_client_stats, system_charset_info, max_connections, ++ 0, 0, (hash_get_key)get_key_user_stats, ++ (hash_free_key)free_user_stats, 0)) { ++ sql_print_error("Initializing global_client_stats failed."); ++ exit(1); ++ } ++} ++ ++extern "C" byte *get_key_table_stats(TABLE_STATS *table_stats, uint *length, ++ my_bool not_used __attribute__((unused))) ++{ ++ *length = strlen(table_stats->table); ++ return (byte*)table_stats->table; ++} ++ ++extern "C" void free_table_stats(TABLE_STATS* table_stats) ++{ ++ my_free((char*)table_stats, MYF(0)); ++} ++ ++void init_global_table_stats(void) ++{ ++ if (hash_init(&global_table_stats, system_charset_info, max_connections, ++ 0, 0, (hash_get_key)get_key_table_stats, ++ (hash_free_key)free_table_stats, 0)) { ++ sql_print_error("Initializing global_table_stats failed."); ++ exit(1); ++ } ++} ++ ++extern "C" byte *get_key_index_stats(INDEX_STATS *index_stats, uint *length, ++ my_bool not_used __attribute__((unused))) ++{ ++ *length = strlen(index_stats->index); ++ return (byte*)index_stats->index; ++} ++ ++extern "C" void free_index_stats(INDEX_STATS* index_stats) ++{ ++ my_free((char*)index_stats, MYF(0)); ++} ++ ++void init_global_index_stats(void) ++{ ++ if (hash_init(&global_index_stats, system_charset_info, max_connections, ++ 0, 0, (hash_get_key)get_key_index_stats, ++ (hash_free_key)free_index_stats, 0)) { ++ sql_print_error("Initializing global_index_stats failed."); ++ exit(1); ++ } ++} ++ + + + /* +@@ -599,7 +795,10 @@ + + end: + if (error) ++ { ++ statistic_increment(denied_connections, &LOCK_status); + uc->connections--; // no need for decrease_user_connections() here ++ } + (void) pthread_mutex_unlock(&LOCK_user_conn); + DBUG_RETURN(error); + } +@@ -646,6 +845,25 @@ + #endif /* NO_EMBEDDED_ACCESS_CHECKS */ + } + ++void free_global_user_stats(void) ++{ ++ hash_free(&global_user_stats); ++} ++ ++void free_global_table_stats(void) ++{ ++ hash_free(&global_table_stats); ++} ++ ++void free_global_index_stats(void) ++{ ++ hash_free(&global_index_stats); ++} ++ ++void free_global_client_stats(void) ++{ ++ hash_free(&global_client_stats); ++} + + + /* +@@ -698,6 +916,214 @@ + return uc_update_queries[command] != 0; + } + ++// 'mysql_system_user' is used for when the user is not defined for a THD. ++static char mysql_system_user[] = "#mysql_system#"; ++ ++// Returns 'user' if it's not NULL. Returns 'mysql_system_user' otherwise. ++static char* get_valid_user_string(char* user) { ++ return user ? user : mysql_system_user; ++} ++ ++// Increments the global stats connection count for an entry from ++// global_client_stats or global_user_stats. Returns 0 on success ++// and 1 on error. ++static int increment_count_by_name(const char *name, const char *role_name, ++ HASH *users_or_clients, THD *thd) ++{ ++ USER_STATS* user_stats; ++ ++ if (!(user_stats = (USER_STATS*)hash_search(users_or_clients, name, ++ strlen(name)))) ++ { ++ // First connection for this user or client ++ if (!(user_stats = ((USER_STATS*) ++ my_malloc(sizeof(USER_STATS), MYF(MY_WME | MY_ZEROFILL))))) ++ { ++ return 1; // Out of memory ++ } ++ ++ init_user_stats(user_stats, name, role_name, ++ 0, 0, // connections ++ 0, 0, 0, // time ++ 0, 0, 0, // bytes sent, received and written ++ 0, 0, 0, // rows fetched, updated and read ++ 0, 0, 0, // select, update and other commands ++ 0, 0, // commit and rollback trans ++ thd->diff_denied_connections, ++ 0, // lost connections ++ 0, // access denied errors ++ 0); // empty queries ++ ++ if (my_hash_insert(users_or_clients, (byte*)user_stats)) ++ { ++ my_free((char*)user_stats, 0); ++ return 1; // Out of memory ++ } ++ } ++ user_stats->total_connections++; ++ return 0; ++} ++ ++// Increments the global user and client stats connection count. If 'use_lock' ++// is true, LOCK_global_user_client_stats will be locked/unlocked. Returns ++// 0 on success, 1 on error. ++static int increment_connection_count(THD* thd, bool use_lock) ++{ ++ char* user_string = get_valid_user_string(thd->main_security_ctx.user); ++ const char* client_string = get_client_host(thd); ++ int return_value = 0; ++ ++ if (!opt_userstat_running) ++ return return_value; ++ ++ if (use_lock) pthread_mutex_lock(&LOCK_global_user_client_stats); ++ ++ if (increment_count_by_name(user_string, user_string, ++ &global_user_stats, thd)) ++ { ++ return_value = 1; ++ goto end; ++ } ++ if (increment_count_by_name(client_string, ++ user_string, ++ &global_client_stats, thd)) ++ { ++ return_value = 1; ++ goto end; ++ } ++ ++end: ++ if (use_lock) pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ return return_value; ++} ++ ++// Used to update the global user and client stats. ++static void update_global_user_stats_with_user(THD* thd, ++ USER_STATS* user_stats, ++ time_t now) ++{ ++ user_stats->connected_time += now - thd->last_global_update_time; ++ thd->last_global_update_time = now; ++ user_stats->busy_time += thd->diff_total_busy_time; ++ user_stats->cpu_time += thd->diff_total_cpu_time; ++ user_stats->bytes_received += thd->diff_total_bytes_received; ++ user_stats->bytes_sent += thd->diff_total_bytes_sent; ++ user_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written; ++ user_stats->rows_fetched += thd->diff_total_sent_rows; ++ user_stats->rows_updated += thd->diff_total_updated_rows; ++ user_stats->rows_read += thd->diff_total_read_rows; ++ user_stats->select_commands += thd->diff_select_commands; ++ user_stats->update_commands += thd->diff_update_commands; ++ user_stats->other_commands += thd->diff_other_commands; ++ user_stats->commit_trans += thd->diff_commit_trans; ++ user_stats->rollback_trans += thd->diff_rollback_trans; ++ user_stats->denied_connections += thd->diff_denied_connections; ++ user_stats->lost_connections += thd->diff_lost_connections; ++ user_stats->access_denied_errors += thd->diff_access_denied_errors; ++ user_stats->empty_queries += thd->diff_empty_queries; ++} ++ ++// Updates the global stats of a user or client ++void update_global_user_stats(THD* thd, bool create_user, time_t now) ++{ ++ if (opt_userstat_running) { ++ char* user_string = get_valid_user_string(thd->main_security_ctx.user); ++ const char* client_string = get_client_host(thd); ++ ++ USER_STATS* user_stats; ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ ++ // Update by user name ++ if ((user_stats = (USER_STATS*)hash_search(&global_user_stats, ++ (byte*)user_string, ++ strlen(user_string)))) { ++ // Found user. ++ update_global_user_stats_with_user(thd, user_stats, now); ++ } else { ++ // Create the entry ++ if (create_user) { ++ increment_count_by_name(user_string, user_string, ++ &global_user_stats, thd); ++ } ++ } ++ ++ // Update by client IP ++ if ((user_stats = (USER_STATS*)hash_search(&global_client_stats, ++ (byte*)client_string, ++ strlen(client_string)))) { ++ // Found by client IP ++ update_global_user_stats_with_user(thd, user_stats, now); ++ } else { ++ // Create the entry ++ if (create_user) { ++ increment_count_by_name(client_string, ++ user_string, ++ &global_client_stats, thd); ++ } ++ } ++ thd->reset_diff_stats(); ++ ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ } else { ++ thd->reset_diff_stats(); ++ } ++} ++ ++// Determines the concurrent number of connections of current threads. ++static void set_connections_stats() ++{ ++ USER_STATS* user_stats; ++ ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ pthread_mutex_lock(&LOCK_thread_count); ++ ++ // Resets all concurrent connections to 0. ++ for (int i = 0; i < global_user_stats.records; ++i) { ++ user_stats = (USER_STATS*)hash_element(&global_user_stats, i); ++ user_stats->concurrent_connections = 0; ++ } ++ for (int i = 0; i < global_client_stats.records; ++i) { ++ user_stats = (USER_STATS*)hash_element(&global_client_stats, i); ++ user_stats->concurrent_connections = 0; ++ } ++ ++ I_List_iterator<THD> it(threads); ++ THD* thd; ++ time_t now = time(NULL); ++ // Iterates through the current threads. ++ while ((thd = it++)) { ++ char* user_string = get_valid_user_string(thd->main_security_ctx.user); ++ if ((user_stats = (USER_STATS*)hash_search(&global_user_stats, ++ (byte*)user_string, ++ strlen(user_string)))) { ++ // Found user. ++ user_stats->concurrent_connections++; ++ update_global_user_stats_with_user(thd, user_stats, now); ++ } else { ++ // The user name should exist. ++ if (user_string == mysql_system_user) { ++ // Only create the user if it is the mysql_system_user ++ increment_count_by_name(user_string, user_string, ++ &global_user_stats, thd); ++ } ++ } ++ ++ const char* client_string = get_client_host(thd); ++ if ((user_stats = (USER_STATS*)hash_search(&global_client_stats, ++ (byte*)client_string, ++ strlen(client_string)))) { ++ // Found user. ++ user_stats->concurrent_connections++; ++ update_global_user_stats_with_user(thd, user_stats, now); ++ } else { ++ // Do nothing, unlike what is done for global_user_stats ++ } ++ thd->reset_diff_stats(); ++ } ++ pthread_mutex_unlock(&LOCK_thread_count); ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++} ++ + /* + Reset per-hour user resource limits when it has been more than + an hour since they were last checked +@@ -1184,6 +1607,8 @@ + my_net_set_read_timeout(net, connect_timeout); + my_net_set_write_timeout(net, connect_timeout); + ++ bool create_user = true; ++ + if ((error=check_connection(thd))) + { // Wrong permissions + if (error > 0) +@@ -1193,8 +1618,22 @@ + my_sleep(1000); /* must wait after eof() */ + #endif + statistic_increment(aborted_connects,&LOCK_status); ++ thd->diff_denied_connections++; ++ if (error == -2) { ++ // Do not create statistics for a user who does not exist, or failed ++ // to authenticate. ++ create_user = false; ++ } + goto end_thread; + } ++ ++ thd->reset_stats(); ++ // Updates global user connection stats. ++ if (increment_connection_count(thd, true)) { ++ net_send_error(thd, ER_OUTOFMEMORY); // Out of memory ++ goto end_thread; ++ } ++ + #ifdef __NETWARE__ + netware_reg_user(sctx->ip, sctx->user, "MySQL"); + #endif +@@ -1251,6 +1690,7 @@ + net->vio && net->error && net->report_error) + { + statistic_increment(aborted_threads, &LOCK_status); ++ thd->diff_lost_connections++; + } + + if (net->error && net->vio != 0 && net->report_error) +@@ -1270,6 +1710,8 @@ + + end_thread: + close_connection(thd, 0, 1); ++ thd->update_stats(false); ++ update_global_user_stats(thd, create_user, time(NULL)); + end_thread(thd,1); + /* + If end_thread returns, we are either running with --one-thread +@@ -1601,6 +2043,13 @@ + + thd->clear_error(); // Clear error message + ++ thd->updated_row_count=0; ++ thd->busy_time=0; ++ thd->cpu_time=0; ++ thd->bytes_received=0; ++ thd->bytes_sent=0; ++ thd->binlog_bytes_written=0; ++ + net_new_transaction(net); + + packet_length= my_net_read(net); +@@ -1759,6 +2208,9 @@ + } + + thd->command=command; ++ // To increment the corrent command counter for user stats, 'command' must ++ // be saved because it is set to COM_SLEEP at the end of this function. ++ thd->old_command = command; + /* + Commands which always take a long time are logged into + the slow log only if opt_log_slow_admin_statements is set. +@@ -4424,6 +4876,15 @@ + if (check_global_access(thd,RELOAD_ACL)) + goto error; + ++ if(lex->type & REFRESH_SLOW_QUERY_LOG) { ++ /* We are only flushing slow query log */ ++ mysql_slow_log.new_file(1); ++ ++ send_ok(thd); ++ break; ++ } ++ ++ + /* + reload_acl_and_cache() will tell us if we are allowed to write to the + binlog or not. +@@ -4731,6 +5192,7 @@ + { + if (check_global_access(thd, SUPER_ACL)) + { ++ thd->diff_access_denied_errors++; + my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "SUPER"); + goto create_sp_error; + } +@@ -5567,6 +6029,7 @@ + if (!no_errors) + { + const char *db_name= db ? db : thd->db; ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->priv_host, db_name); + } +@@ -5602,6 +6065,7 @@ + { // We can never grant this + DBUG_PRINT("error",("No possible access")); + if (!no_errors) ++ thd->diff_access_denied_errors++; + my_error(ER_ACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, + sctx->priv_host, +@@ -5634,11 +6098,15 @@ + + DBUG_PRINT("error",("Access denied")); + if (!no_errors) ++ { ++ // increment needs !no_errors condition, otherwise double counting. ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->priv_host, + (db ? db : (thd->db ? + thd->db : + "unknown"))); /* purecov: tested */ ++ } + DBUG_RETURN(TRUE); /* purecov: tested */ + #endif /* NO_EMBEDDED_ACCESS_CHECKS */ + } +@@ -5672,6 +6140,7 @@ + if ((thd->security_ctx->master_access & want_access)) + return 0; + get_privilege_desc(command, sizeof(command), want_access); ++ thd->diff_access_denied_errors++; + my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command); + return 1; + #endif /* NO_EMBEDDED_ACCESS_CHECKS */ +@@ -5704,6 +6173,7 @@ + + if (!thd->col_access && check_grant_db(thd, dst_db_name)) + { ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + thd->security_ctx->priv_user, + thd->security_ctx->priv_host, +@@ -5735,6 +6205,12 @@ + check_grant(thd, SELECT_ACL, dst_table, 2, UINT_MAX, FALSE); + } + ++ ++ case SCH_USER_STATS: ++ case SCH_CLIENT_STATS: ++ return check_global_access(thd, SUPER_ACL | PROCESS_ACL); ++ case SCH_TABLE_STATS: ++ case SCH_INDEX_STATS: + case SCH_OPEN_TABLES: + case SCH_VARIABLES: + case SCH_STATUS: +@@ -5788,8 +6264,8 @@ + #ifndef NO_EMBEDDED_ACCESS_CHECKS + TABLE_LIST *org_tables= tables; + #endif ++ Security_context *sctx= thd->security_ctx, *backup_ctx= thd->security_ctx; + TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table(); +- Security_context *sctx= thd->security_ctx, *backup_ctx= thd->security_ctx; + /* + The check that first_not_own_table is not reached is for the case when + the given table list refers to the list for prelocking (contains tables +@@ -5806,9 +6282,12 @@ + (want_access & ~(SELECT_ACL | EXTRA_ACL | FILE_ACL))) + { + if (!no_errors) ++ { ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->priv_host, + INFORMATION_SCHEMA_NAME.str); ++ } + return TRUE; + } + /* +@@ -6317,6 +6796,30 @@ + */ + lex_start(thd); + mysql_reset_thd_for_next_command(thd); ++ ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } + + if (query_cache_send_result_to_client(thd, (char*) inBuf, length) <= 0) + { +@@ -6396,6 +6899,39 @@ + *found_semicolon= NULL; + } + ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); ++ + DBUG_VOID_RETURN; + } + +@@ -7407,8 +7943,35 @@ + pthread_mutex_unlock(&LOCK_active_mi); + } + #endif +- if (options & REFRESH_USER_RESOURCES) +- reset_mqh((LEX_USER *) NULL); ++ if (options & REFRESH_TABLE_STATS) ++ { ++ pthread_mutex_lock(&LOCK_global_table_stats); ++ free_global_table_stats(); ++ init_global_table_stats(); ++ pthread_mutex_unlock(&LOCK_global_table_stats); ++ } ++ if (options & REFRESH_INDEX_STATS) ++ { ++ pthread_mutex_lock(&LOCK_global_index_stats); ++ free_global_index_stats(); ++ init_global_index_stats(); ++ pthread_mutex_unlock(&LOCK_global_index_stats); ++ } ++ if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS)) ++ { ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ if (options & REFRESH_USER_STATS) ++ { ++ free_global_user_stats(); ++ init_global_user_stats(); ++ } ++ if (options & REFRESH_CLIENT_STATS) ++ { ++ free_global_client_stats(); ++ init_global_client_stats(); ++ } ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ } + *write_to_binlog= tmp_write_to_binlog; + return result; + } +diff -r 1270c564d514 sql/sql_prepare.cc +--- a/sql/sql_prepare.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_prepare.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -80,6 +80,9 @@ + #else + #include <mysql_com.h> + #endif ++ ++// Uses the THD to update the global stats by user name and client IP ++void update_global_user_stats(THD* thd, bool create_user, time_t now); + + /* A result class used to send cursor rows using the binary protocol. */ + +@@ -1910,8 +1913,32 @@ + /* First of all clear possible warnings from the previous command */ + mysql_reset_thd_for_next_command(thd); + ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } ++ + if (! (stmt= new Prepared_statement(thd, &thd->protocol_prep))) +- DBUG_VOID_RETURN; /* out of memory: error is set in Sql_alloc */ ++ goto end; /* out of memory: error is set in Sql_alloc */ + + if (thd->stmt_map.insert(thd, stmt)) + { +@@ -1919,7 +1946,7 @@ + The error is set in the insert. The statement itself + will be also deleted there (this is how the hash works). + */ +- DBUG_VOID_RETURN; ++ goto end; + } + + /* Reset warnings from previous command */ +@@ -1941,6 +1968,40 @@ + thd->stmt_map.erase(stmt); + } + /* check_prepared_statemnt sends the metadata packet in case of success */ ++end: ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); ++ + DBUG_VOID_RETURN; + } + +@@ -2281,8 +2342,32 @@ + /* First of all clear possible warnings from the previous command */ + mysql_reset_thd_for_next_command(thd); + ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } ++ + if (!(stmt= find_prepared_statement(thd, stmt_id, "mysql_stmt_execute"))) +- DBUG_VOID_RETURN; ++ goto end; + + #ifdef ENABLED_PROFILING + thd->profiling.set_query_source(stmt->query, stmt->query_length); +@@ -2325,11 +2410,46 @@ + test(flags & (ulong) CURSOR_TYPE_READ_ONLY)); + if (!(specialflag & SPECIAL_NO_PRIOR)) + my_pthread_setprio(pthread_self(), WAIT_PRIOR); +- DBUG_VOID_RETURN; ++ goto end; + + set_params_data_err: + my_error(ER_WRONG_ARGUMENTS, MYF(0), "mysql_stmt_execute"); + reset_stmt_params(stmt); ++ ++end: ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); ++ + DBUG_VOID_RETURN; + } + +@@ -2423,6 +2543,31 @@ + + /* First of all clear possible warnings from the previous command */ + mysql_reset_thd_for_next_command(thd); ++ ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } ++ + statistic_increment(thd->status_var.com_stmt_fetch, &LOCK_status); + if (!(stmt= find_prepared_statement(thd, stmt_id, "mysql_stmt_fetch"))) + DBUG_VOID_RETURN; +@@ -2455,6 +2600,39 @@ + thd->restore_backup_statement(stmt, &stmt_backup); + thd->stmt_arena= thd; + ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); ++ + DBUG_VOID_RETURN; + } + +@@ -2487,6 +2665,30 @@ + /* First of all clear possible warnings from the previous command */ + mysql_reset_thd_for_next_command(thd); + ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } ++ + statistic_increment(thd->status_var.com_stmt_reset, &LOCK_status); + if (!(stmt= find_prepared_statement(thd, stmt_id, "mysql_stmt_reset"))) + DBUG_VOID_RETURN; +@@ -2502,6 +2704,39 @@ + stmt->state= Query_arena::PREPARED; + + send_ok(thd); ++ ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); + + DBUG_VOID_RETURN; + } +diff -r 1270c564d514 sql/sql_show.cc +--- a/sql/sql_show.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_show.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -530,6 +530,7 @@ + sctx->master_access); + if (!(db_access & DB_ACLS) && (!grant_option || check_grant_db(thd,dbname))) + { ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->host_or_ip, dbname); + mysql_log.write(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR), +@@ -1858,6 +1859,300 @@ + DBUG_RETURN(FALSE); + } + ++/* ++ Aggregate values for mapped_user entries by their role. ++ ++ SYNOPSIS ++ aggregate_user_stats ++ all_user_stats - input to aggregate ++ agg_user_stats - returns aggregated values ++ ++ RETURN ++ 0 - OK ++ 1 - error ++ */ ++static int ++aggregate_user_stats(HASH *all_user_stats, HASH *agg_user_stats) ++{ ++ DBUG_ENTER("aggregate_user_stats"); ++ if (hash_init(agg_user_stats, system_charset_info, ++ max(all_user_stats->records, 1), ++ 0, 0, (hash_get_key)get_key_user_stats, ++ (hash_free_key)free_user_stats, 0)) ++ { ++ sql_print_error("Malloc in aggregate_user_stats failed"); ++ DBUG_RETURN(1); ++ } ++ ++ for (int i = 0; i < all_user_stats->records; ++i) { ++ USER_STATS *user = (USER_STATS*)hash_element(all_user_stats, i); ++ USER_STATS *agg_user; ++ if (!(agg_user = (USER_STATS*)hash_search(agg_user_stats, ++ (byte*)user->priv_user, ++ strlen(user->priv_user)))) ++ { ++ // First entry for this role. ++ if (!(agg_user = ++ (USER_STATS*) my_malloc(sizeof(USER_STATS), MYF(MY_WME | MY_ZEROFILL)))) ++ { ++ sql_print_error("Malloc in aggregate_user_stats failed"); ++ DBUG_RETURN(1); ++ } ++ ++ init_user_stats(agg_user, user->priv_user, user->priv_user, ++ user->total_connections, user->concurrent_connections, ++ user->connected_time, user->busy_time, user->cpu_time, ++ user->bytes_received, user->bytes_sent, ++ user->binlog_bytes_written, ++ user->rows_fetched, user->rows_updated, user->rows_read, ++ user->select_commands, user->update_commands, ++ user->other_commands, ++ user->commit_trans, user->rollback_trans, ++ user->denied_connections, user->lost_connections, ++ user->access_denied_errors, user->empty_queries); ++ ++ if (my_hash_insert(agg_user_stats, (byte*)agg_user)) ++ { ++ // Out of memory. ++ my_free((char*)agg_user, 0); ++ sql_print_error("Malloc in aggregate_user_stats failed"); ++ DBUG_RETURN(1); ++ } ++ } ++ else ++ { ++ // Aggregate with existing values for this role. ++ add_user_stats(agg_user, ++ user->total_connections, user->concurrent_connections, ++ user->connected_time, user->busy_time, user->cpu_time, ++ user->bytes_received, user->bytes_sent, ++ user->binlog_bytes_written, ++ user->rows_fetched, user->rows_updated, user->rows_read, ++ user->select_commands, user->update_commands, ++ user->other_commands, ++ user->commit_trans, user->rollback_trans, ++ user->denied_connections, user->lost_connections, ++ user->access_denied_errors, user->empty_queries); ++ } ++ } ++ DBUG_PRINT("exit", ("aggregated %d input into %d output entries", ++ all_user_stats->records, agg_user_stats->records)); ++ DBUG_RETURN(0); ++} ++ ++/* ++ Write result to network for SHOW USER_STATISTICS ++ ++ SYNOPSIS ++ send_user_stats ++ all_user_stats - values to return ++ table - I_S table ++ ++ RETURN ++ 0 - OK ++ 1 - error ++ */ ++int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table) ++{ ++ DBUG_ENTER("send_user_stats"); ++ for (int i = 0; i < all_user_stats->records; ++i) { ++ restore_record(table, s->default_values); ++ USER_STATS *user_stats = (USER_STATS*)hash_element(all_user_stats, i); ++ table->field[0]->store(user_stats->user, strlen(user_stats->user), system_charset_info); ++ table->field[1]->store((longlong)user_stats->total_connections); ++ table->field[2]->store((longlong)user_stats->concurrent_connections); ++ table->field[3]->store((longlong)user_stats->connected_time); ++ table->field[4]->store((longlong)user_stats->busy_time); ++ table->field[5]->store((longlong)user_stats->cpu_time); ++ table->field[6]->store((longlong)user_stats->bytes_received); ++ table->field[7]->store((longlong)user_stats->bytes_sent); ++ table->field[8]->store((longlong)user_stats->binlog_bytes_written); ++ table->field[9]->store((longlong)user_stats->rows_fetched); ++ table->field[10]->store((longlong)user_stats->rows_updated); ++ table->field[11]->store((longlong)user_stats->rows_read); ++ table->field[12]->store((longlong)user_stats->select_commands); ++ table->field[13]->store((longlong)user_stats->update_commands); ++ table->field[14]->store((longlong)user_stats->other_commands); ++ table->field[15]->store((longlong)user_stats->commit_trans); ++ table->field[16]->store((longlong)user_stats->rollback_trans); ++ table->field[17]->store((longlong)user_stats->denied_connections); ++ table->field[18]->store((longlong)user_stats->lost_connections); ++ table->field[19]->store((longlong)user_stats->access_denied_errors); ++ table->field[20]->store((longlong)user_stats->empty_queries); ++ if (schema_table_store_record(thd, table)) ++ { ++ DBUG_PRINT("error", ("store record error")); ++ DBUG_RETURN(1); ++ } ++ } ++ DBUG_RETURN(0); ++} ++ ++/* ++ Process SHOW USER_STATISTICS ++ ++ SYNOPSIS ++ mysqld_show_user_stats ++ thd - current thread ++ wild - limit results to the entry for this user ++ with_roles - when true, display role for mapped users ++ ++ RETURN ++ 0 - OK ++ 1 - error ++ */ ++ ++ ++int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ DBUG_ENTER("fill_schema_user_stats"); ++ ++ if (check_global_access(thd, SUPER_ACL | PROCESS_ACL)) ++ DBUG_RETURN(1); ++ ++ // Iterates through all the global stats and sends them to the client. ++ // Pattern matching on the client IP is supported. ++ ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ int result= send_user_stats(thd, &global_user_stats, table); ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ if (result) ++ goto err; ++ ++ DBUG_PRINT("exit", ("fill_schema_user_stats result is 0")); ++ DBUG_RETURN(0); ++ ++ err: ++ DBUG_PRINT("exit", ("fill_schema_user_stats result is 1")); ++ DBUG_RETURN(1); ++} ++ ++/* ++ Process SHOW CLIENT_STATISTICS ++ ++ SYNOPSIS ++ mysqld_show_client_stats ++ thd - current thread ++ wild - limit results to the entry for this client ++ ++ RETURN ++ 0 - OK ++ 1 - error ++ */ ++ ++ ++int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ DBUG_ENTER("fill_schema_client_stats"); ++ ++ if (check_global_access(thd, SUPER_ACL | PROCESS_ACL)) ++ DBUG_RETURN(1); ++ ++ // Iterates through all the global stats and sends them to the client. ++ // Pattern matching on the client IP is supported. ++ ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ int result= send_user_stats(thd, &global_client_stats, table); ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ if (result) ++ goto err; ++ ++ DBUG_PRINT("exit", ("mysqld_show_client_stats result is 0")); ++ DBUG_RETURN(0); ++ ++ err: ++ DBUG_PRINT("exit", ("mysqld_show_client_stats result is 1")); ++ DBUG_RETURN(1); ++} ++ ++ ++// Sends the global table stats back to the client. ++int fill_schema_table_stats(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ DBUG_ENTER("fill_schema_table_stats"); ++ char *table_full_name, *table_schema; ++ ++ pthread_mutex_lock(&LOCK_global_table_stats); ++ for (int i = 0; i < global_table_stats.records; ++i) { ++ restore_record(table, s->default_values); ++ TABLE_STATS *table_stats = ++ (TABLE_STATS*)hash_element(&global_table_stats, i); ++ ++ table_full_name= thd->strdup(table_stats->table); ++ table_schema= strsep(&table_full_name, "."); ++ ++ TABLE_LIST tmp_table; ++ bzero((char*) &tmp_table,sizeof(tmp_table)); ++ tmp_table.table_name= table_full_name; ++ tmp_table.db= table_schema; ++ tmp_table.grant.privilege= 0; ++ if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db, ++ &tmp_table.grant.privilege, 0, 0, ++ is_schema_db(table_schema)) || ++ grant_option && check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1)) ++ continue; ++ ++ table->field[0]->store(table_schema, strlen(table_schema), system_charset_info); ++ table->field[1]->store(table_full_name, strlen(table_full_name), system_charset_info); ++ table->field[2]->store((longlong)table_stats->rows_read, TRUE); ++ table->field[3]->store((longlong)table_stats->rows_changed, TRUE); ++ table->field[4]->store((longlong)table_stats->rows_changed_x_indexes, TRUE); ++ ++ if (schema_table_store_record(thd, table)) ++ { ++ VOID(pthread_mutex_unlock(&LOCK_global_table_stats)); ++ DBUG_RETURN(1); ++ } ++ } ++ pthread_mutex_unlock(&LOCK_global_table_stats); ++ DBUG_RETURN(0); ++} ++ ++// Sends the global index stats back to the client. ++int fill_schema_index_stats(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ DBUG_ENTER("fill_schema_index_stats"); ++ char *index_full_name, *table_schema, *table_name; ++ ++ pthread_mutex_lock(&LOCK_global_index_stats); ++ for (int i = 0; i < global_index_stats.records; ++i) { ++ restore_record(table, s->default_values); ++ INDEX_STATS *index_stats = ++ (INDEX_STATS*)hash_element(&global_index_stats, i); ++ ++ index_full_name= thd->strdup(index_stats->index); ++ table_schema= strsep(&index_full_name, "."); ++ table_name= strsep(&index_full_name, "."); ++ ++ TABLE_LIST tmp_table; ++ bzero((char*) &tmp_table,sizeof(tmp_table)); ++ tmp_table.table_name= table_name; ++ tmp_table.db= table_schema; ++ tmp_table.grant.privilege= 0; ++ if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db, ++ &tmp_table.grant.privilege, 0, 0, ++ is_schema_db(table_schema)) || ++ grant_option && check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1)) ++ continue; ++ ++ table->field[0]->store(table_schema, strlen(table_schema), system_charset_info); ++ table->field[1]->store(table_name, strlen(table_name), system_charset_info); ++ table->field[2]->store(index_full_name, strlen(index_full_name), system_charset_info); ++ table->field[3]->store((longlong)index_stats->rows_read, TRUE); ++ ++ if (schema_table_store_record(thd, table)) ++ { ++ VOID(pthread_mutex_unlock(&LOCK_global_index_stats)); ++ DBUG_RETURN(1); ++ } ++ } ++ pthread_mutex_unlock(&LOCK_global_index_stats); ++ DBUG_RETURN(0); ++} + + /* collect status for all running threads */ + +@@ -4468,6 +4763,77 @@ + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} + }; + ++ST_FIELD_INFO user_stats_fields_info[]= ++{ ++ {"USER", 16, MYSQL_TYPE_STRING, 0, 0, "User"}, ++ {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections"}, ++ {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections"}, ++ {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time"}, ++ {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time"}, ++ {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time"}, ++ {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received"}, ++ {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent"}, ++ {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written"}, ++ {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched"}, ++ {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated"}, ++ {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read"}, ++ {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands"}, ++ {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands"}, ++ {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands"}, ++ {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions"}, ++ {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions"}, ++ {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections"}, ++ {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections"}, ++ {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied"}, ++ {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ST_FIELD_INFO client_stats_fields_info[]= ++{ ++ {"CLIENT", 16, MYSQL_TYPE_STRING, 0, 0, "Client"}, ++ {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections"}, ++ {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections"}, ++ {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time"}, ++ {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time"}, ++ {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time"}, ++ {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received"}, ++ {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent"}, ++ {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written"}, ++ {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched"}, ++ {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated"}, ++ {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read"}, ++ {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands"}, ++ {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands"}, ++ {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands"}, ++ {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions"}, ++ {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions"}, ++ {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections"}, ++ {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections"}, ++ {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied"}, ++ {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ ++ST_FIELD_INFO table_stats_fields_info[]= ++{ ++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema"}, ++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name"}, ++ {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_read"}, ++ {"ROWS_CHANGED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_changed"}, ++ {"ROWS_CHANGED_X_INDEXES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_changed_x_#indexes"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ST_FIELD_INFO index_stats_fields_info[]= ++{ ++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema"}, ++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name"}, ++ {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name"}, ++ {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_read"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; + + /* + Description of ST_FIELD_INFO in table.h +@@ -4477,6 +4843,8 @@ + { + {"CHARACTER_SETS", charsets_fields_info, create_schema_table, + fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0}, ++ {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table, ++ fill_schema_client_stats, make_old_format, 0, -1, -1, 0}, + {"COLLATIONS", collation_fields_info, create_schema_table, + fill_schema_collation, make_old_format, 0, -1, -1, 0}, + {"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info, +@@ -4485,6 +4853,8 @@ + get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0}, + {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table, + fill_schema_column_privileges, 0, 0, -1, -1, 0}, ++ {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table, ++ fill_schema_index_stats, make_old_format, 0, -1, -1, 0}, + {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table, + get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0}, + {"OPEN_TABLES", open_tables_fields_info, create_schema_table, +@@ -4510,10 +4880,14 @@ + get_all_tables, make_table_names_old_format, 0, 1, 2, 1}, + {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table, + fill_schema_table_privileges, 0, 0, -1, -1, 0}, ++ {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table, ++ fill_schema_table_stats, make_old_format, 0, -1, -1, 0}, + {"TRIGGERS", triggers_fields_info, create_schema_table, + get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0}, + {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table, + fill_schema_user_privileges, 0, 0, -1, -1, 0}, ++ {"USER_STATISTICS", user_stats_fields_info, create_schema_table, ++ fill_schema_user_stats, make_old_format, 0, -1, -1, 0}, + {"VARIABLES", variables_fields_info, create_schema_table, fill_variables, + make_old_format, 0, -1, -1, 1}, + {"VIEWS", view_fields_info, create_schema_table, +diff -r 1270c564d514 sql/sql_update.cc +--- a/sql/sql_update.cc Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_update.cc Mon Dec 22 00:31:13 2008 -0800 +@@ -601,7 +601,8 @@ + (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated; + send_ok(thd, (ulong) thd->row_count_func, + thd->insert_id_used ? thd->last_insert_id : 0L,buff); +- DBUG_PRINT("info",("%ld records updated", (long) updated)); ++ thd->updated_row_count += thd->row_count_func; ++ DBUG_PRINT("info",("%d records updated",updated)); + } + thd->count_cuted_fields= CHECK_FIELD_IGNORE; /* calc cuted fields */ + thd->abort_on_warning= 0; +@@ -1832,5 +1833,6 @@ + (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated; + ::send_ok(thd, (ulong) thd->row_count_func, + thd->insert_id_used ? thd->last_insert_id : 0L,buff); ++ thd->updated_row_count += thd->row_count_func; + return FALSE; + } +diff -r 1270c564d514 sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/sql_yacc.yy Mon Dec 22 00:31:13 2008 -0800 +@@ -523,6 +523,7 @@ + %token CHECK_SYM + %token CIPHER_SYM + %token CLIENT_SYM ++%token CLIENT_STATS_SYM + %token CLOSE_SYM + %token COALESCE + %token CODE_SYM +@@ -680,6 +681,7 @@ + %token IMPORT + %token INDEXES + %token INDEX_SYM ++%token INDEX_STATS_SYM + %token INFILE + %token INNER_SYM + %token INNOBASE_SYM +@@ -909,6 +911,7 @@ + %token SIGNED_SYM + %token SIMPLE_SYM + %token SLAVE ++%token SLOW_SYM + %token SMALLINT + %token SNAPSHOT_SYM + %token SOUNDS_SYM +@@ -949,6 +952,7 @@ + %token TABLES + %token TABLESPACE + %token TABLE_SYM ++%token TABLE_STATS_SYM + %token TEMPORARY + %token TEMPTABLE_SYM + %token TERMINATED +@@ -991,6 +995,7 @@ + %token UPGRADE_SYM + %token USAGE + %token USER ++%token USER_STATS_SYM + %token USE_FRM + %token USE_SYM + %token USING +@@ -8244,6 +8249,38 @@ + { + Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT; + } ++ | CLIENT_STATS_SYM wild_and_where ++ { ++ LEX *lex= Lex; ++ Lex->sql_command = SQLCOM_SELECT; ++ lex->orig_sql_command= SQLCOM_SHOW_CLIENT_STATS; ++ if (prepare_schema_table(YYTHD, lex, 0, SCH_CLIENT_STATS)) ++ MYSQL_YYABORT; ++ } ++ | USER_STATS_SYM wild_and_where ++ { ++ LEX *lex= Lex; ++ lex->sql_command = SQLCOM_SELECT; ++ lex->orig_sql_command= SQLCOM_SHOW_USER_STATS; ++ if (prepare_schema_table(YYTHD, lex, 0, SCH_USER_STATS)) ++ MYSQL_YYABORT; ++ } ++ | TABLE_STATS_SYM wild_and_where ++ { ++ LEX *lex= Lex; ++ lex->sql_command= SQLCOM_SELECT; ++ lex->orig_sql_command= SQLCOM_SHOW_TABLE_STATS; ++ if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_STATS)) ++ MYSQL_YYABORT; ++ } ++ | INDEX_STATS_SYM wild_and_where ++ { ++ LEX *lex= Lex; ++ lex->sql_command= SQLCOM_SELECT; ++ lex->orig_sql_command= SQLCOM_SHOW_INDEX_STATS; ++ if (prepare_schema_table(YYTHD, lex, 0, SCH_INDEX_STATS)) ++ MYSQL_YYABORT; ++ } + | CREATE PROCEDURE sp_name + { + LEX *lex= Lex; +@@ -8448,9 +8485,14 @@ + | LOGS_SYM { Lex->type|= REFRESH_LOG; } + | STATUS_SYM { Lex->type|= REFRESH_STATUS; } + | SLAVE { Lex->type|= REFRESH_SLAVE; } ++ | SLOW_SYM QUERY_SYM LOGS_SYM { Lex->type |= REFRESH_SLOW_QUERY_LOG; } + | MASTER_SYM { Lex->type|= REFRESH_MASTER; } + | DES_KEY_FILE { Lex->type|= REFRESH_DES_KEY_FILE; } +- | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; }; ++ | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; } ++ | CLIENT_STATS_SYM { Lex->type|= REFRESH_CLIENT_STATS; } ++ | USER_STATS_SYM { Lex->type|= REFRESH_USER_STATS; } ++ | TABLE_STATS_SYM { Lex->type|= REFRESH_TABLE_STATS; } ++ | INDEX_STATS_SYM { Lex->type|= REFRESH_INDEX_STATS; }; + + opt_table_list: + /* empty */ {;} +@@ -9439,6 +9481,7 @@ + | CHAIN_SYM {} + | CHANGED {} + | CIPHER_SYM {} ++ | CLIENT_STATS_SYM {} + | CLIENT_SYM {} + | CODE_SYM {} + | COLLATION_SYM {} +@@ -9491,6 +9534,7 @@ + | HOSTS_SYM {} + | HOUR_SYM {} + | IDENTIFIED_SYM {} ++ | INDEX_STATS_SYM {} + | INVOKER_SYM {} + | IMPORT {} + | INDEXES {} +@@ -9600,6 +9644,7 @@ + | SIMPLE_SYM {} + | SHARE_SYM {} + | SHUTDOWN {} ++ | SLOW_SYM {} + | SNAPSHOT_SYM {} + | SOUNDS_SYM {} + | SOURCE_SYM {} +@@ -9616,6 +9661,7 @@ + | SUSPEND_SYM {} + | SWAPS_SYM {} + | SWITCHES_SYM {} ++ | TABLE_STATS_SYM {} + | TABLES {} + | TABLESPACE {} + | TEMPORARY {} +@@ -9636,6 +9682,7 @@ + | UNKNOWN_SYM {} + | UNTIL_SYM {} + | USER {} ++ | USER_STATS_SYM {} + | USE_FRM {} + | VARIABLES {} + | VIEW_SYM {} +diff -r 1270c564d514 sql/structs.h +--- a/sql/structs.h Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/structs.h Mon Dec 22 00:31:13 2008 -0800 +@@ -273,6 +273,98 @@ + time_t intime; + } USER_CONN; + ++typedef struct st_user_stats { ++ char user[USERNAME_LENGTH + 1]; ++ // Account name the user is mapped to when this is a user from mapped_user. ++ // Otherwise, the same value as user. ++ char priv_user[USERNAME_LENGTH + 1]; ++ uint total_connections; ++ uint concurrent_connections; ++ time_t connected_time; // in seconds ++ double busy_time; // in seconds ++ double cpu_time; // in seconds ++ ulonglong bytes_received; ++ ulonglong bytes_sent; ++ ulonglong binlog_bytes_written; ++ ha_rows rows_fetched, rows_updated, rows_read; ++ ulonglong select_commands, update_commands, other_commands; ++ ulonglong commit_trans, rollback_trans; ++ ulonglong denied_connections, lost_connections; ++ ulonglong access_denied_errors; ++ ulonglong empty_queries; ++} USER_STATS; ++ ++/* Lookup function for hash tables with USER_STATS entries */ ++extern byte *get_key_user_stats(USER_STATS *user_stats, uint *length, ++ my_bool not_used __attribute__((unused))); ++ ++/* Free all memory for a hash table with USER_STATS entries */ ++extern void free_user_stats(USER_STATS* user_stats); ++ ++/* Intialize an instance of USER_STATS */ ++extern void ++init_user_stats(USER_STATS *user_stats, ++ const char *user, ++ const char *priv_user, ++ uint total_connections, ++ uint concurrent_connections, ++ time_t connected_time, ++ double busy_time, ++ double cpu_time, ++ ulonglong bytes_received, ++ ulonglong bytes_sent, ++ ulonglong binlog_bytes_written, ++ ha_rows rows_fetched, ++ ha_rows rows_updated, ++ ha_rows rows_read, ++ ulonglong select_commands, ++ ulonglong update_commands, ++ ulonglong other_commands, ++ ulonglong commit_trans, ++ ulonglong rollback_trans, ++ ulonglong denied_connections, ++ ulonglong lost_connections, ++ ulonglong access_denied_errors, ++ ulonglong empty_queries); ++ ++/* Increment values of an instance of USER_STATS */ ++extern void ++add_user_stats(USER_STATS *user_stats, ++ uint total_connections, ++ uint concurrent_connections, ++ time_t connected_time, ++ double busy_time, ++ double cpu_time, ++ ulonglong bytes_received, ++ ulonglong bytes_sent, ++ ulonglong binlog_bytes_written, ++ ha_rows rows_fetched, ++ ha_rows rows_updated, ++ ha_rows rows_read, ++ ulonglong select_commands, ++ ulonglong update_commands, ++ ulonglong other_commands, ++ ulonglong commit_trans, ++ ulonglong rollback_trans, ++ ulonglong denied_connections, ++ ulonglong lost_connections, ++ ulonglong access_denied_errors, ++ ulonglong empty_queries); ++ ++typedef struct st_table_stats { ++ char table[NAME_LEN * 2 + 2]; // [db] + '.' + [table] + '\0' ++ ulonglong rows_read, rows_changed; ++ ulonglong rows_changed_x_indexes; ++ /* Stores enum db_type, but forward declarations cannot be done */ ++ int engine_type; ++} TABLE_STATS; ++ ++typedef struct st_index_stats { ++ char index[NAME_LEN * 3 + 3]; // [db] + '.' + [table] + '.' + [index] + '\0' ++ ulonglong rows_read; ++} INDEX_STATS; ++ ++ + /* Bits in form->update */ + #define REG_MAKE_DUPP 1 /* Make a copy of record when read */ + #define REG_NEW_RECORD 2 /* Write a new record if not found */ +diff -r 1270c564d514 sql/table.h +--- a/sql/table.h Mon Dec 22 00:26:39 2008 -0800 ++++ b/sql/table.h Mon Dec 22 00:31:13 2008 -0800 +@@ -371,10 +371,12 @@ + enum enum_schema_tables + { + SCH_CHARSETS= 0, ++ SCH_CLIENT_STATS, + SCH_COLLATIONS, + SCH_COLLATION_CHARACTER_SET_APPLICABILITY, + SCH_COLUMNS, + SCH_COLUMN_PRIVILEGES, ++ SCH_INDEX_STATS, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, + SCH_PROFILES, +@@ -387,8 +389,10 @@ + SCH_TABLE_CONSTRAINTS, + SCH_TABLE_NAMES, + SCH_TABLE_PRIVILEGES, ++ SCH_TABLE_STATS, + SCH_TRIGGERS, + SCH_USER_PRIVILEGES, ++ SCH_USER_STATS, + SCH_VARIABLES, + SCH_VIEWS + }; +diff -r 1270c564d514 strings/Makefile.in +--- a/strings/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/strings/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -361,6 +361,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 support-files/MacOSX/Makefile.in +--- a/support-files/MacOSX/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/support-files/MacOSX/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -171,6 +171,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 support-files/Makefile.in +--- a/support-files/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/support-files/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -191,6 +191,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 support-files/RHEL4-SElinux/Makefile.in +--- a/support-files/RHEL4-SElinux/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/support-files/RHEL4-SElinux/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -169,6 +169,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 tests/Makefile.in +--- a/tests/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/tests/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -215,6 +215,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @CLIENT_LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 tools/Makefile.in +--- a/tools/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/tools/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -189,6 +189,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 vio/Makefile.in +--- a/vio/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/vio/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -198,6 +198,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 win/Makefile.in +--- a/win/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/win/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -167,6 +167,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 1270c564d514 zlib/Makefile.in +--- a/zlib/Makefile.in Mon Dec 22 00:26:39 2008 -0800 ++++ b/zlib/Makefile.in Mon Dec 22 00:31:13 2008 -0800 +@@ -206,6 +206,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = $(NON_THREADED_LIBS) + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ diff --git a/percona/5.0.77-b13/innodb_check_fragmentation.patch b/percona/5.0.77-b13/innodb_check_fragmentation.patch new file mode 100644 index 0000000..4b16731 --- /dev/null +++ b/percona/5.0.77-b13/innodb_check_fragmentation.patch @@ -0,0 +1,275 @@ +diff -r 936d427a9a15 innobase/btr/btr0cur.c +--- a/innobase/btr/btr0cur.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0cur.c Mon Dec 22 00:33:11 2008 -0800 +@@ -516,6 +516,14 @@ + == index->table->comp); + } + ++ if (level == 0) { ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); ++ } ++ + break; + } + +@@ -663,6 +671,12 @@ + btr_cur_add_path_info(cursor, height, + root_height); + } ++ ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); + + break; + } +diff -r 936d427a9a15 innobase/btr/btr0pcur.c +--- a/innobase/btr/btr0pcur.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0pcur.c Mon Dec 22 00:33:11 2008 -0800 +@@ -381,6 +381,7 @@ + last record of the current page */ + mtr_t* mtr) /* in: mtr */ + { ++ ulint page_no; + ulint next_page_no; + ulint space; + page_t* page; +@@ -393,11 +394,22 @@ + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; + + page = btr_pcur_get_page(cursor); ++ page_no = buf_frame_get_page_no(page); + + next_page_no = btr_page_get_next(page, mtr); + space = buf_frame_get_space_id(page); + + ut_ad(next_page_no != FIL_NULL); ++ ++ if (next_page_no - page_no == 1) { ++ innobase_mysql_thd_increment_innodb_scan_cont(1); ++ } else { ++ innobase_mysql_thd_increment_innodb_scan_jump(1); ++ } ++ innobase_mysql_thd_increment_innodb_scan_data( ++ page_get_data_size(page)); ++ innobase_mysql_thd_increment_innodb_scan_garbage( ++ page_header_get_field(page, PAGE_GARBAGE)); + + next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); +@@ -427,6 +439,7 @@ + record of the current page */ + mtr_t* mtr) /* in: mtr */ + { ++ ulint page_no; + ulint prev_page_no; + ulint space; + page_t* page; +@@ -462,9 +475,20 @@ + btr_pcur_restore_position(latch_mode2, cursor, mtr); + + page = btr_pcur_get_page(cursor); ++ page_no = buf_frame_get_page_no(page); + + prev_page_no = btr_page_get_prev(page, mtr); + space = buf_frame_get_space_id(page); ++ ++ if (page_no - prev_page_no == 1) { ++ innobase_mysql_thd_increment_innodb_scan_cont(1); ++ } else { ++ innobase_mysql_thd_increment_innodb_scan_jump(1); ++ } ++ innobase_mysql_thd_increment_innodb_scan_data( ++ page_get_data_size(page)); ++ innobase_mysql_thd_increment_innodb_scan_garbage( ++ page_header_get_field(page, PAGE_GARBAGE)); + + if (btr_pcur_is_before_first_on_page(cursor, mtr) + && (prev_page_no != FIL_NULL)) { +diff -r 936d427a9a15 innobase/btr/btr0sea.c +--- a/innobase/btr/btr0sea.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0sea.c Mon Dec 22 00:33:11 2008 -0800 +@@ -861,6 +861,12 @@ + + buf_pool->n_page_gets++; + ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); ++ + return(TRUE); + + /*-------------------------------------------*/ +diff -r 936d427a9a15 innobase/include/btr0cur.h +--- a/innobase/include/btr0cur.h Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/include/btr0cur.h Mon Dec 22 00:33:11 2008 -0800 +@@ -697,6 +697,17 @@ + extern ulint btr_cur_n_non_sea_old; + extern ulint btr_cur_n_sea_old; + ++/*--------------------------------------*/ ++/* prototypes for new functions added to ha_innodb.cc */ ++void innobase_mysql_thd_init_innodb_scan_cont(); ++void innobase_mysql_thd_increment_innodb_scan_cont(ulong length); ++void innobase_mysql_thd_init_innodb_scan_jump(); ++void innobase_mysql_thd_increment_innodb_scan_jump(ulong length); ++void innobase_mysql_thd_init_innodb_scan_data(); ++void innobase_mysql_thd_increment_innodb_scan_data(ulong length); ++void innobase_mysql_thd_init_innodb_scan_garbage(); ++void innobase_mysql_thd_increment_innodb_scan_garbage(ulong length); ++ + #ifndef UNIV_NONINL + #include "btr0cur.ic" + #endif +diff -r 936d427a9a15 patch_info/innodb_check_fragmentation.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_check_fragmentation.info Mon Dec 22 00:33:11 2008 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_check_fragmentation.patch ++Name=Session status to check fragmentation of the last InnoDB scan ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment=The names are Innodb_scan_* +diff -r 936d427a9a15 sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Dec 22 00:33:11 2008 -0800 +@@ -760,6 +760,102 @@ + } + + /************************************************************************* ++Initializes Innodb_scan_blocks_contiguous. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_cont() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_cont = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_blocks_contiguous. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_cont(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_cont+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_blocks_jumpy. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_jump() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_jump = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_blocks_jumpy. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_jump(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_jump+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_data_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_data() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_data = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_data_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_data(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_data+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_garbages_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_garbage() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_garbage = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_garbages_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_garbage(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_garbage+= length; ++ } ++} ++ ++/************************************************************************* + Gets the InnoDB transaction handle for a MySQL handler object, creates + an InnoDB transaction struct if the corresponding MySQL thread struct still + lacks one. */ +diff -r 936d427a9a15 sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:33:11 2008 -0800 +@@ -6673,6 +6673,10 @@ + {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, + #ifdef HAVE_INNOBASE_DB + {"Innodb_", (char*) &innodb_status_variables, SHOW_VARS}, ++ {"Innodb_scan_pages_contiguous",(char*) offsetof(STATUS_VAR, innodb_scan_cont), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_pages_jumpy", (char*) offsetof(STATUS_VAR, innodb_scan_jump), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_data_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_data), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_garbages_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_garbage), SHOW_LONGLONG_STATUS}, + #endif /*HAVE_INNOBASE_DB*/ + {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG}, + {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG}, +diff -r 936d427a9a15 sql/sql_class.h +--- a/sql/sql_class.h Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/sql_class.h Mon Dec 22 00:33:11 2008 -0800 +@@ -729,6 +729,10 @@ + sense to add to the /global/ status variable counter. + */ + double last_query_cost; ++ ulonglong innodb_scan_cont; ++ ulonglong innodb_scan_jump; ++ ulonglong innodb_scan_data; ++ ulonglong innodb_scan_garbage; + } STATUS_VAR; + + /* diff --git a/percona/5.0.77-b13/innodb_dict_size_limit.patch b/percona/5.0.77-b13/innodb_dict_size_limit.patch new file mode 100644 index 0000000..8ef4e36 --- /dev/null +++ b/percona/5.0.77-b13/innodb_dict_size_limit.patch @@ -0,0 +1,477 @@ +diff -ru mysql-5.0.75_base/innobase/dict/dict0boot.c mysql-5.0.75/innobase/dict/dict0boot.c +--- mysql-5.0.75_base/innobase/dict/dict0boot.c 2008-12-19 02:19:35.000000000 +0900 ++++ mysql-5.0.75/innobase/dict/dict0boot.c 2009-01-23 19:28:25.000000000 +0900 +@@ -247,6 +247,7 @@ + system tables */ + /*-------------------------*/ + table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, FALSE); ++ table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0); + dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0); +@@ -283,6 +284,7 @@ + ut_a(success); + /*-------------------------*/ + table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, FALSE); ++ table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY,0,0,0); + dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0); +@@ -309,6 +311,7 @@ + ut_a(success); + /*-------------------------*/ + table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, FALSE); ++ table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY, 0,0,0); + dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0); +@@ -345,6 +348,7 @@ + ut_a(success); + /*-------------------------*/ + table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, FALSE); ++ table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, "INDEX_ID", DATA_BINARY, 0,0,0); + dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0); +diff -ru mysql-5.0.75_base/innobase/dict/dict0crea.c mysql-5.0.75/innobase/dict/dict0crea.c +--- mysql-5.0.75_base/innobase/dict/dict0crea.c 2008-12-19 02:19:35.000000000 +0900 ++++ mysql-5.0.75/innobase/dict/dict0crea.c 2009-01-23 19:41:38.000000000 +0900 +@@ -1178,6 +1178,9 @@ + /* Foreign constraint system tables have already been + created, and they are ok */ + ++ table1->n_mysql_handles_opened = 1; /* for pin */ ++ table2->n_mysql_handles_opened = 1; /* for pin */ ++ + mutex_exit(&(dict_sys->mutex)); + + return(DB_SUCCESS); +@@ -1266,6 +1269,11 @@ + + trx->op_info = ""; + ++ table1 = dict_table_get_low("SYS_FOREIGN"); ++ table2 = dict_table_get_low("SYS_FOREIGN_COLS"); ++ table1->n_mysql_handles_opened = 1; /* for pin */ ++ table2->n_mysql_handles_opened = 1; /* for pin */ ++ + row_mysql_unlock_data_dictionary(trx); + + trx_free_for_mysql(trx); +diff -ru mysql-5.0.75_base/innobase/dict/dict0dict.c mysql-5.0.75/innobase/dict/dict0dict.c +--- mysql-5.0.75_base/innobase/dict/dict0dict.c 2008-12-19 02:19:35.000000000 +0900 ++++ mysql-5.0.75/innobase/dict/dict0dict.c 2009-01-26 16:03:29.000000000 +0900 +@@ -638,6 +638,8 @@ + mutex_enter(&(dict_sys->mutex)); + + table = dict_table_get_on_id_low(table_id, trx); ++ ++ dict_table_LRU_trim(table); + + mutex_exit(&(dict_sys->mutex)); + +@@ -752,6 +754,8 @@ + + table = dict_table_get_low(table_name); + ++ dict_table_LRU_trim(table); ++ + mutex_exit(&(dict_sys->mutex)); + + if (table != NULL) { +@@ -787,6 +791,8 @@ + table->n_mysql_handles_opened++; + } + ++ dict_table_LRU_trim(table); ++ + mutex_exit(&(dict_sys->mutex)); + + if (table != NULL) { +@@ -1267,20 +1273,64 @@ + too much space. Currently not used! */ + + void +-dict_table_LRU_trim(void) +-/*=====================*/ ++dict_table_LRU_trim( ++/*================*/ ++ dict_table_t* self) + { + dict_table_t* table; + dict_table_t* prev_table; ++ dict_foreign_t* foreign; ++ ulint n_removed; ++ ulint n_have_parent; ++ ulint cached_foreign_tables; + +- ut_error; ++ //ut_error; + + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&(dict_sys->mutex))); + #endif /* UNIV_SYNC_DEBUG */ + ++retry: ++ n_removed = n_have_parent = 0; + table = UT_LIST_GET_LAST(dict_sys->table_LRU); + ++ while ( srv_dict_size_limit && table ++ && ((dict_sys->table_hash->n_cells ++ + dict_sys->table_id_hash->n_cells ++ + dict_sys->col_hash->n_cells) * sizeof(hash_cell_t) ++ + dict_sys->size) > srv_dict_size_limit ) { ++ prev_table = UT_LIST_GET_PREV(table_LRU, table); ++ ++ if (table == self || table->n_mysql_handles_opened) ++ goto next_loop; ++ ++ cached_foreign_tables = 0; ++ foreign = UT_LIST_GET_FIRST(table->foreign_list); ++ while (foreign != NULL) { ++ if (foreign->referenced_table) ++ cached_foreign_tables++; ++ foreign = UT_LIST_GET_NEXT(foreign_list, foreign); ++ } ++ ++ /* TODO: use table->mem_fix also, if it becomes exact. */ ++ ++ if (cached_foreign_tables == 0) { ++ dict_table_remove_from_cache(table); ++ n_removed++; ++ } else { ++ n_have_parent++; ++ } ++next_loop: ++ table = prev_table; ++ } ++ ++ if ( srv_dict_size_limit && n_have_parent && n_removed ++ && ((dict_sys->table_hash->n_cells ++ + dict_sys->table_id_hash->n_cells ++ + dict_sys->col_hash->n_cells) * sizeof(hash_cell_t) ++ + dict_sys->size) > srv_dict_size_limit ) ++ goto retry; ++/* + while (table && (dict_sys->size > + buf_pool_get_max_size() / DICT_POOL_PER_VARYING)) { + +@@ -1292,6 +1341,7 @@ + + table = prev_table; + } ++*/ + } + + /************************************************************************** +diff -ru mysql-5.0.75_base/innobase/ibuf/ibuf0ibuf.c mysql-5.0.75/innobase/ibuf/ibuf0ibuf.c +--- mysql-5.0.75_base/innobase/ibuf/ibuf0ibuf.c 2009-01-23 11:44:18.000000000 +0900 ++++ mysql-5.0.75/innobase/ibuf/ibuf0ibuf.c 2009-01-23 19:22:54.000000000 +0900 +@@ -535,6 +535,7 @@ + sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space); + /* use old-style record format for the insert buffer */ + table = dict_mem_table_create(buf, space, 2, FALSE); ++ table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, "PAGE_NO", DATA_BINARY, 0, 0, 0); + dict_mem_table_add_col(table, "TYPES", DATA_BINARY, 0, 0, 0); +diff -ru mysql-5.0.75_base/innobase/include/dict0dict.h mysql-5.0.75/innobase/include/dict0dict.h +--- mysql-5.0.75_base/innobase/include/dict0dict.h 2008-12-19 02:19:35.000000000 +0900 ++++ mysql-5.0.75/innobase/include/dict0dict.h 2009-01-23 21:46:22.000000000 +0900 +@@ -938,6 +938,11 @@ + const char* ptr, /* in: scan from */ + const char* string);/* in: look for this */ + ++void ++dict_table_LRU_trim( ++/*================*/ ++ dict_table_t* self); ++ + /* Buffers for storing detailed information about the latest foreign key + and unique key errors */ + extern FILE* dict_foreign_err_file; +diff -ru mysql-5.0.75_base/innobase/include/dict0dict.ic mysql-5.0.75/innobase/include/dict0dict.ic +--- mysql-5.0.75_base/innobase/include/dict0dict.ic 2008-12-19 02:19:35.000000000 +0900 ++++ mysql-5.0.75/innobase/include/dict0dict.ic 2009-01-23 18:35:55.000000000 +0900 +@@ -533,6 +533,13 @@ + + HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table, + ut_strcmp(table->name, table_name) == 0); ++ ++ /* make young in table_LRU */ ++ if (table) { ++ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); ++ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); ++ } ++ + return(table); + } + +@@ -592,6 +599,10 @@ + if (table != NULL) { + table->mem_fix++; + ++ /* make young in table_LRU */ ++ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); ++ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); ++ + /* lock_push(trx, table, LOCK_DICT_MEM_FIX) */ + } + +diff -ru mysql-5.0.75_base/innobase/include/srv0srv.h mysql-5.0.75/innobase/include/srv0srv.h +--- mysql-5.0.75_base/innobase/include/srv0srv.h 2009-01-23 11:44:19.000000000 +0900 ++++ mysql-5.0.75/innobase/include/srv0srv.h 2009-01-27 10:47:26.000000000 +0900 +@@ -146,6 +146,8 @@ + extern uint srv_read_ahead; + extern ulint srv_adaptive_checkpoint; + ++extern ulint srv_dict_size_limit; ++ + extern volatile ibool srv_io_pattern; + extern ulong srv_io_pattern_trace; + extern ulong srv_io_pattern_trace_running; +@@ -545,6 +547,7 @@ + ulint innodb_data_writes; + ulint innodb_data_written; + ulint innodb_data_reads; ++ ulint innodb_dict_tables; + ulint innodb_buffer_pool_pages_total; + ulint innodb_buffer_pool_pages_data; + ulint innodb_buffer_pool_pages_dirty; +diff -ru mysql-5.0.75_base/innobase/srv/srv0srv.c mysql-5.0.75/innobase/srv/srv0srv.c +--- mysql-5.0.75_base/innobase/srv/srv0srv.c 2009-01-23 11:44:19.000000000 +0900 ++++ mysql-5.0.75/innobase/srv/srv0srv.c 2009-01-27 10:52:19.000000000 +0900 +@@ -345,6 +345,8 @@ + uint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ + ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */ + ++ulint srv_dict_size_limit = 0; ++ + volatile ibool srv_io_pattern = FALSE; + ulint srv_io_pattern_trace = 0; + ulint srv_io_pattern_trace_running = 0; +@@ -1936,6 +1938,7 @@ + export_vars.innodb_data_reads= os_n_file_reads; + export_vars.innodb_data_writes= os_n_file_writes; + export_vars.innodb_data_written= srv_data_written; ++ export_vars.innodb_dict_tables= (dict_sys ? UT_LIST_GET_LEN(dict_sys->table_LRU) : 0); + export_vars.innodb_buffer_pool_read_requests= buf_pool->n_page_gets; + export_vars.innodb_buffer_pool_write_requests= srv_buf_pool_write_requests; + export_vars.innodb_buffer_pool_wait_free= srv_buf_pool_wait_free; +diff -ru mysql-5.0.75_base/sql/ha_innodb.cc mysql-5.0.75/sql/ha_innodb.cc +--- mysql-5.0.75_base/sql/ha_innodb.cc 2009-01-23 11:44:19.000000000 +0900 ++++ mysql-5.0.75/sql/ha_innodb.cc 2009-01-27 10:54:08.000000000 +0900 +@@ -288,6 +288,8 @@ + (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG}, + {"dblwr_writes", + (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG}, ++ {"dict_tables", ++ (char*) &export_vars.innodb_dict_tables, SHOW_LONG}, + {"log_waits", + (char*) &export_vars.innodb_log_waits, SHOW_LONG}, + {"log_write_requests", +diff -ru mysql-5.0.75_base/sql/ha_innodb.h mysql-5.0.75/sql/ha_innodb.h +--- mysql-5.0.75_base/sql/ha_innodb.h 2009-01-23 11:44:19.000000000 +0900 ++++ mysql-5.0.75/sql/ha_innodb.h 2009-01-26 15:49:37.000000000 +0900 +@@ -242,6 +242,7 @@ + extern ulong srv_flush_neighbor_pages; + extern uint srv_read_ahead; + extern ulong srv_adaptive_checkpoint; ++extern ulong srv_dict_size_limit; + extern ulong srv_show_locks_held; + extern ulong srv_show_verbose_locks; + extern ulong srv_io_pattern_trace; +diff -ru mysql-5.0.75_base/sql/mysqld.cc mysql-5.0.75/sql/mysqld.cc +--- mysql-5.0.75_base/sql/mysqld.cc 2009-01-23 11:44:19.000000000 +0900 ++++ mysql-5.0.75/sql/mysqld.cc 2009-01-26 15:29:45.000000000 +0900 +@@ -5053,6 +5053,7 @@ + OPT_INNODB_ADAPTIVE_CHECKPOINT, + OPT_INNODB_READ_IO_THREADS, + OPT_INNODB_WRITE_IO_THREADS, ++ OPT_INNODB_DICT_SIZE_LIMIT, + OPT_INNODB_ADAPTIVE_HASH_INDEX, + OPT_RPL_MIRROR_BINLOG, + OPT_SYNC_MIRROR_BINLOG, +@@ -5406,6 +5407,10 @@ + "Number of background write I/O threads in InnoDB.", + (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads, + 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, ++ {"innodb_dict_size_limit", OPT_INNODB_DICT_SIZE_LIMIT, ++ "Limit the allocated memory for dictionary cache. (0: unlimited)", ++ (gptr*) &srv_dict_size_limit, (gptr*) &srv_dict_size_limit, 0, ++ GET_ULONG, REQUIRED_ARG, 0, 0, ULONG_MAX, 0, 0 ,0}, + {"innodb_io_pattern_trace", OPT_INNODB_IO_PATTERN_TRACE, + "Create/Drop the internal hash table for IO pattern tracing.", + (gptr*) &srv_io_pattern_trace, (gptr*) &srv_io_pattern_trace, +diff -ru mysql-5.0.75_base/sql/set_var.cc mysql-5.0.75/sql/set_var.cc +--- mysql-5.0.75_base/sql/set_var.cc 2009-01-23 11:44:19.000000000 +0900 ++++ mysql-5.0.75/sql/set_var.cc 2009-01-26 15:46:45.000000000 +0900 +@@ -522,6 +522,8 @@ + &innodb_read_ahead_typelib, fix_innodb_read_ahead); + sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint", + &srv_adaptive_checkpoint); ++sys_var_long_ptr sys_innodb_dict_size_limit("innodb_dict_size_limit", ++ &srv_dict_size_limit); + sys_var_long_ptr sys_innodb_show_locks_held( + "innodb_show_locks_held", + &srv_show_locks_held); +@@ -905,6 +907,7 @@ + &sys_innodb_flush_neighbor_pages, + &sys_innodb_read_ahead, + &sys_innodb_adaptive_checkpoint, ++ &sys_innodb_dict_size_limit, + &sys_innodb_show_locks_held, + &sys_innodb_show_verbose_locks, + &sys_innodb_io_pattern_trace, +@@ -1056,6 +1059,7 @@ + {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS}, + {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG}, + {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG}, ++ {sys_innodb_dict_size_limit.name, (char*) &sys_innodb_dict_size_limit, SHOW_SYS}, + {sys_innodb_io_pattern_trace.name, (char*) &sys_innodb_io_pattern_trace, SHOW_SYS}, + {sys_innodb_io_pattern_trace_running.name, (char*) &sys_innodb_io_pattern_trace_running, SHOW_SYS}, + {sys_innodb_io_pattern_size_limit.name, (char*) &sys_innodb_io_pattern_size_limit, SHOW_SYS}, +diff -ruN mysql-5.0.75_base/mysql-test/r/innodb_dict_size_limit.result mysql-5.0.75/mysql-test/r/innodb_dict_size_limit.result +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.75/mysql-test/r/innodb_dict_size_limit.result 2009-01-27 11:43:46.000000000 +0900 +@@ -0,0 +1,60 @@ ++DROP TABLE IF EXISTS `test_5`; ++DROP TABLE IF EXISTS `test_4`; ++DROP TABLE IF EXISTS `test_3`; ++DROP TABLE IF EXISTS `test_2`; ++DROP TABLE IF EXISTS `test_1`; ++SET storage_engine=InnoDB; ++SET GLOBAL innodb_dict_size_limit=1; ++FLUSH TABLES; ++CREATE TABLE `test_1` (`a` int, `b` int, PRIMARY KEY (`a`)); ++CREATE TABLE `test_2` (`a` int, `b` int, PRIMARY KEY (`a`)); ++CREATE TABLE `test_3` (`a` int, `b` int, PRIMARY KEY (`a`)); ++CREATE TABLE `test_4` (`a` int, `b` int, PRIMARY KEY (`a`)); ++CREATE TABLE `test_5` (`a` int, `b` int, PRIMARY KEY (`a`)); ++ALTER TABLE `test_5` ADD CONSTRAINT FOREIGN KEY(`b`) REFERENCES `test_4`(`a`); ++ALTER TABLE `test_4` ADD CONSTRAINT FOREIGN KEY(`b`) REFERENCES `test_3`(`a`); ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++Variable_name Value ++Innodb_dict_tables 9 ++FLUSH TABLES; ++SELECT * FROM `test_1`; ++a b ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++Variable_name Value ++Innodb_dict_tables 8 ++SELECT * FROM `test_3`; ++a b ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++Variable_name Value ++Innodb_dict_tables 11 ++FLUSH TABLES; ++SELECT * FROM `test_2`; ++a b ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++Variable_name Value ++Innodb_dict_tables 8 ++SELECT * FROM `test_1`; ++a b ++FLUSH TABLES; ++SELECT * FROM `test_4`; ++a b ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++Variable_name Value ++Innodb_dict_tables 9 ++SELECT * FROM `test_3`; ++a b ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++Variable_name Value ++Innodb_dict_tables 10 ++SET GLOBAL innodb_dict_size_limit=0; ++FLUSH TABLES; ++SELECT * FROM `test_2`; ++a b ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++Variable_name Value ++Innodb_dict_tables 11 ++DROP TABLE `test_5`; ++DROP TABLE `test_4`; ++DROP TABLE `test_3`; ++DROP TABLE `test_2`; ++DROP TABLE `test_1`; +diff -ruN mysql-5.0.75_base/mysql-test/t/innodb_dict_size_limit.test mysql-5.0.75/mysql-test/t/innodb_dict_size_limit.test +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.75/mysql-test/t/innodb_dict_size_limit.test 2009-01-27 11:43:36.000000000 +0900 +@@ -0,0 +1,63 @@ ++# ++# Test for new variable innodb_dict_size_limit; ++# ++-- source include/have_innodb.inc ++ ++--disable_warnings ++DROP TABLE IF EXISTS `test_5`; ++DROP TABLE IF EXISTS `test_4`; ++DROP TABLE IF EXISTS `test_3`; ++DROP TABLE IF EXISTS `test_2`; ++DROP TABLE IF EXISTS `test_1`; ++--enable_warnings ++ ++SET storage_engine=InnoDB; ++SET GLOBAL innodb_dict_size_limit=1; ++ ++FLUSH TABLES; ++ ++CREATE TABLE `test_1` (`a` int, `b` int, PRIMARY KEY (`a`)); ++CREATE TABLE `test_2` (`a` int, `b` int, PRIMARY KEY (`a`)); ++CREATE TABLE `test_3` (`a` int, `b` int, PRIMARY KEY (`a`)); ++CREATE TABLE `test_4` (`a` int, `b` int, PRIMARY KEY (`a`)); ++CREATE TABLE `test_5` (`a` int, `b` int, PRIMARY KEY (`a`)); ++ ++ALTER TABLE `test_5` ADD CONSTRAINT FOREIGN KEY(`b`) REFERENCES `test_4`(`a`); ++ALTER TABLE `test_4` ADD CONSTRAINT FOREIGN KEY(`b`) REFERENCES `test_3`(`a`); ++ ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++ ++FLUSH TABLES; ++SELECT * FROM `test_1`; ++ ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++ ++SELECT * FROM `test_3`; ++ ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++ ++FLUSH TABLES; ++SELECT * FROM `test_2`; ++ ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++ ++SELECT * FROM `test_1`; ++FLUSH TABLES; ++SELECT * FROM `test_4`; ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++ ++SELECT * FROM `test_3`; ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++ ++SET GLOBAL innodb_dict_size_limit=0; ++FLUSH TABLES; ++SELECT * FROM `test_2`; ++ ++SHOW GLOBAL STATUS LIKE 'Innodb_dict_tables'; ++ ++DROP TABLE `test_5`; ++DROP TABLE `test_4`; ++DROP TABLE `test_3`; ++DROP TABLE `test_2`; ++DROP TABLE `test_1`; ++ +diff -ruN mysql-5.0.75_base/patch_info/innodb_dict_size_limit.info mysql-5.0.75/patch_info/innodb_dict_size_limit.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.75/patch_info/innodb_dict_size_limit.info 2009-01-26 15:46:45.000000000 +0900 +@@ -0,0 +1,9 @@ ++File=innodb_dict_size_limit.patch ++Name=Limit dictionary cache size ++Version=1.0 ++Author=Percona ++License=GPL ++Comment=Variable innodb_dict_size_limit in bytes ++ChangeLog= ++2009-01-26 ++YK: Initial release diff --git a/percona/5.0.77-b13/innodb_fsync_source.patch b/percona/5.0.77-b13/innodb_fsync_source.patch new file mode 100644 index 0000000..637a7d6 --- /dev/null +++ b/percona/5.0.77-b13/innodb_fsync_source.patch @@ -0,0 +1,594 @@ +diff -r 61031ebb48ce innobase/buf/buf0flu.c +--- a/innobase/buf/buf0flu.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/buf/buf0flu.c Mon Nov 03 05:07:56 2008 -0800 +@@ -341,7 +341,7 @@ + + /* Now flush the doublewrite buffer data to disk */ + +- fil_flush(TRX_SYS_SPACE); ++ fil_flush(TRX_SYS_SPACE, FLUSH_FROM_DIRTY_BUFFER); + + /* We know that the writes have been flushed to disk now + and in recovery we will find them in the doublewrite buffer +@@ -381,7 +381,7 @@ + + /* Now we flush the data to disk (for example, with fsync) */ + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_DIRTY_BUFFER); + + /* We can now reuse the doublewrite memory buffer: */ + +@@ -501,7 +501,8 @@ + } + #else + /* Force the log to the disk before writing the modified block */ +- log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_DIRTY_BUFFER); + #endif + buf_flush_init_for_writing(block->frame, block->newest_modification, + block->space, block->offset); +diff -r 61031ebb48ce innobase/fil/fil0fil.c +--- a/innobase/fil/fil0fil.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/fil/fil0fil.c Mon Nov 03 05:07:56 2008 -0800 +@@ -245,6 +245,7 @@ + request */ + UT_LIST_BASE_NODE_T(fil_space_t) space_list; + /* list of all file spaces */ ++ ulint flush_types[FLUSH_FROM_NUMBER];/* calls to fil_flush by caller */ + }; + + /* The tablespace memory cache. This variable is NULL before the module is +@@ -849,7 +850,7 @@ + /* Flush tablespaces so that we can close modified files in the LRU + list */ + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + count++; + +@@ -1309,7 +1310,10 @@ + + UT_LIST_INIT(system->unflushed_spaces); + UT_LIST_INIT(system->space_list); +- ++ { ++ int x; ++ for (x = 0; x < FLUSH_FROM_NUMBER; ++x) system->flush_types[x] = 0; ++ } + return(system); + } + +@@ -1437,6 +1441,23 @@ + } + + mutex_exit(&(system->mutex)); ++} ++ ++/******************************************************************** ++Prints internal counters */ ++ ++void ++fil_print(FILE *file) ++{ ++ fprintf(file, ++ "fsync callers: %lu buffer pool, %lu other, %lu checkpoint, " ++ "%lu log aio, %lu log sync, %lu archive\n", ++ fil_system->flush_types[FLUSH_FROM_DIRTY_BUFFER], ++ fil_system->flush_types[FLUSH_FROM_OTHER], ++ fil_system->flush_types[FLUSH_FROM_CHECKPOINT], ++ fil_system->flush_types[FLUSH_FROM_LOG_IO_COMPLETE], ++ fil_system->flush_types[FLUSH_FROM_LOG_WRITE_UP_TO], ++ fil_system->flush_types[FLUSH_FROM_ARCHIVE]); + } + + /******************************************************************** +@@ -2256,7 +2277,7 @@ + + os_thread_sleep(20000); + +- fil_flush(id); ++ fil_flush(id, FLUSH_FROM_OTHER); + + goto retry; + +@@ -3574,7 +3595,7 @@ + size_after_extend, *actual_size); */ + mutex_exit(&(system->mutex)); + +- fil_flush(space_id); ++ fil_flush(space_id, FLUSH_FROM_OTHER); + + return(success); + } +@@ -4166,8 +4187,9 @@ + void + fil_flush( + /*======*/ +- ulint space_id) /* in: file space id (this can be a group of ++ ulint space_id, /* in: file space id (this can be a group of + log files or a tablespace of the database) */ ++ flush_from_type flush_type)/* in: identifies the caller */ + { + fil_system_t* system = fil_system; + fil_space_t* space; +@@ -4176,7 +4198,7 @@ + ib_longlong old_mod_counter; + + mutex_enter(&(system->mutex)); +- ++ system->flush_types[flush_type]++; + HASH_SEARCH(hash, system->spaces, space_id, space, + space->id == space_id); + if (!space || space->is_being_deleted) { +@@ -4281,7 +4303,8 @@ + void + fil_flush_file_spaces( + /*==================*/ +- ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */ ++ ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */ ++ flush_from_type flush_type)/* in: identifies the caller */ + { + fil_system_t* system = fil_system; + fil_space_t* space; +@@ -4322,7 +4345,7 @@ + a non-existing space id. */ + for (i = 0; i < n_space_ids; i++) { + +- fil_flush(space_ids[i]); ++ fil_flush(space_ids[i], flush_type); + } + + mem_free(space_ids); +diff -r 61031ebb48ce innobase/include/fil0fil.h +--- a/innobase/include/fil0fil.h Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/include/fil0fil.h Mon Nov 03 05:07:56 2008 -0800 +@@ -197,6 +197,13 @@ + fil_init( + /*=====*/ + ulint max_n_open); /* in: max number of open files */ ++/******************************************************************** ++ * Prints internal counters. */ ++ ++void ++fil_print( ++ /*=====*/ ++ FILE* file); /* in: output stream */ + /*********************************************************************** + Opens all log files and system tablespace data files. They stay open until the + database server shutdown. This should be called at a server startup after the +@@ -621,14 +628,26 @@ + ulint segment); /* in: the number of the segment in the aio + array to wait for */ + /************************************************************************** ++Identifies the caller of fil_flush. */ ++typedef enum { ++ FLUSH_FROM_DIRTY_BUFFER, ++ FLUSH_FROM_OTHER, ++ FLUSH_FROM_CHECKPOINT, ++ FLUSH_FROM_LOG_IO_COMPLETE, ++ FLUSH_FROM_LOG_WRITE_UP_TO, ++ FLUSH_FROM_ARCHIVE, ++ FLUSH_FROM_NUMBER ++} flush_from_type; ++/************************************************************************** + Flushes to disk possible writes cached by the OS. If the space does not exist + or is being dropped, does not do anything. */ + + void + fil_flush( + /*======*/ +- ulint space_id); /* in: file space id (this can be a group of ++ ulint space_id, /* in: file space id (this can be a group of + log files or a tablespace of the database) */ ++ flush_from_type flush_type);/* in: identifies the caller */ + /************************************************************************** + Flushes to disk writes in file spaces of the given type possibly cached by + the OS. */ +@@ -636,7 +655,8 @@ + void + fil_flush_file_spaces( + /*==================*/ +- ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */ ++ ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */ ++ flush_from_type flush_type);/* in: identifies the caller */ + /********************************************************************** + Checks the consistency of the tablespace cache. */ + +diff -r 61031ebb48ce innobase/include/log0log.h +--- a/innobase/include/log0log.h Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/include/log0log.h Mon Nov 03 05:07:56 2008 -0800 +@@ -146,6 +146,22 @@ + log_io_complete( + /*============*/ + log_group_t* group); /* in: log group */ ++ ++/********************************************************** ++Describes the caller of log_write_up_to. */ ++ ++typedef enum { ++ LOG_WRITE_FROM_DIRTY_BUFFER, ++ LOG_WRITE_FROM_BACKGROUND_SYNC, ++ LOG_WRITE_FROM_BACKGROUND_ASYNC, ++ LOG_WRITE_FROM_INTERNAL, ++ LOG_WRITE_FROM_CHECKPOINT_SYNC, ++ LOG_WRITE_FROM_CHECKPOINT_ASYNC, ++ LOG_WRITE_FROM_LOG_ARCHIVE, ++ LOG_WRITE_FROM_COMMIT_SYNC, ++ LOG_WRITE_FROM_COMMIT_ASYNC, ++ LOG_WRITE_FROM_NUMBER ++} log_sync_type; + /********************************************************** + This function is called, e.g., when a transaction wants to commit. It checks + that the log has been written to the log file up to the last log entry written +@@ -159,14 +175,21 @@ + be written, ut_dulint_max if not specified */ + ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ +- ibool flush_to_disk); +- /* in: TRUE if we want the written log also to be +- flushed to disk */ ++ ibool flush_to_disk, ++ /* in: TRUE if we want the written log also to be flushed to disk */ ++ log_sync_type caller);/* in: identifies the caller */ + /******************************************************************** + Does a syncronous flush of the log buffer to disk. */ + + void + log_buffer_flush_to_disk(void); ++/*==========================*/ ++/******************************************************************** ++Flushes the log buffer. Forces it to disk depending on the value of ++the configuration parameter innodb_flush_log_at_trx_commit. */ ++ ++void ++log_buffer_flush_maybe_sync(void); + /*==========================*/ + /******************************************************************** + Advances the smallest lsn for which there are unflushed dirty blocks in the +@@ -744,6 +767,12 @@ + AND flushed to disk */ + ulint n_pending_writes;/* number of currently pending flushes + or writes */ ++ ulint log_sync_callers[LOG_WRITE_FROM_NUMBER]; ++ /* counts calls to log_write_up_to */ ++ ulint log_sync_syncers[LOG_WRITE_FROM_NUMBER]; ++ /* counts calls to log_write_up_to when log file is sync'd */ ++ ulint n_syncs; /* number of fsyncs done for log file */ ++ ulint n_checkpoints; /* number of calls to log_checkpoint */ + /* NOTE on the 'flush' in names of the fields below: starting from + 4.0.14, we separate the write of the log file and the actual fsync() + or other method to flush it to disk. The names below shhould really +diff -r 61031ebb48ce innobase/log/log0log.c +--- a/innobase/log/log0log.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/log/log0log.c Mon Nov 03 05:07:56 2008 -0800 +@@ -782,6 +782,15 @@ + log_sys->written_to_all_lsn = log_sys->lsn; + + log_sys->n_pending_writes = 0; ++ { ++ int x; ++ for (x = 0; x < LOG_WRITE_FROM_NUMBER; ++x) { ++ log_sys->log_sync_callers[x] = 0; ++ log_sys->log_sync_syncers[x] = 0; ++ } ++ } ++ log_sys->n_syncs = 0; ++ log_sys->n_checkpoints = 0; + + log_sys->no_flush_event = os_event_create(NULL); + +@@ -1066,7 +1075,7 @@ + if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE); + } + + #ifdef UNIV_DEBUG +@@ -1088,7 +1097,7 @@ + && srv_unix_file_flush_method != SRV_UNIX_NOSYNC + && srv_flush_log_at_trx_commit != 2) { + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE); + } + + mutex_enter(&(log_sys->mutex)); +@@ -1303,9 +1312,10 @@ + be written, ut_dulint_max if not specified */ + ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ +- ibool flush_to_disk) ++ ibool flush_to_disk, + /* in: TRUE if we want the written log also to be + flushed to disk */ ++ log_sync_type caller) /* in: identifies caller */ + { + log_group_t* group; + ulint start_offset; +@@ -1315,6 +1325,7 @@ + ulint loop_count; + ulint unlock; + ++ log_sys->log_sync_callers[caller]++; + if (recv_no_ibuf_operations) { + /* Recovery is running and no operations on the log files are + allowed yet (the variable name .._no_ibuf_.. is misleading) */ +@@ -1465,13 +1476,17 @@ + so we have also flushed to disk what we have written */ + + log_sys->flushed_to_disk_lsn = log_sys->write_lsn; ++ log_sys->n_syncs++; ++ log_sys->log_sync_syncers[caller]++; + + } else if (flush_to_disk) { + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_WRITE_UP_TO); + log_sys->flushed_to_disk_lsn = log_sys->write_lsn; ++ log_sys->n_syncs++; ++ log_sys->log_sync_syncers[caller]++; + } + + mutex_enter(&(log_sys->mutex)); +@@ -1520,7 +1535,8 @@ + + mutex_exit(&(log_sys->mutex)); + +- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_BACKGROUND_SYNC); + } + + /******************************************************************** +@@ -1551,7 +1567,7 @@ + mutex_exit(&(log->mutex)); + + if (do_flush) { +- log_write_up_to(lsn, LOG_NO_WAIT, FALSE); ++ log_write_up_to(lsn, LOG_NO_WAIT, FALSE, LOG_WRITE_FROM_INTERNAL); + } + } + +@@ -1921,11 +1937,11 @@ + } + + if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_CHECKPOINT); + } + + mutex_enter(&(log_sys->mutex)); +- ++ log_sys->n_checkpoints++; + oldest_lsn = log_buf_pool_get_oldest_modification(); + + mutex_exit(&(log_sys->mutex)); +@@ -1938,7 +1954,8 @@ + write-ahead-logging algorithm ensures that the log has been flushed + up to oldest_lsn. */ + +- log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_CHECKPOINT_SYNC); + + mutex_enter(&(log_sys->mutex)); + +@@ -2566,7 +2583,7 @@ + + mutex_exit(&(log_sys->mutex)); + +- fil_flush(group->archive_space_id); ++ fil_flush(group->archive_space_id, FLUSH_FROM_ARCHIVE); + + mutex_enter(&(log_sys->mutex)); + +@@ -2647,7 +2664,8 @@ + + mutex_exit(&(log_sys->mutex)); + +- log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_LOG_ARCHIVE); + + calc_new_limit = FALSE; + +@@ -3184,8 +3202,8 @@ + } + mutex_exit(&kernel_mutex); + +- fil_flush_file_spaces(FIL_TABLESPACE); +- fil_flush_file_spaces(FIL_LOG); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); ++ fil_flush_file_spaces(FIL_LOG, FLUSH_FROM_OTHER); + + /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer + pool: therefore it is essential that the buffer pool has been +@@ -3218,7 +3236,7 @@ + + fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + fil_close_all_files(); + +@@ -3331,15 +3349,45 @@ + time_elapsed = 0.001 + difftime(current_time, + log_sys->last_printout_time); + fprintf(file, +- "%lu pending log writes, %lu pending chkp writes\n" +- "%lu log i/o's done, %.2f log i/o's/second\n", +- (ulong) log_sys->n_pending_writes, +- (ulong) log_sys->n_pending_checkpoint_writes, +- (ulong) log_sys->n_log_ios, +- ((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed)); ++ "%lu pending log writes, %lu pending chkp writes\n" ++ "%lu log i/o's done, %.2f log i/o's/second, %lu syncs, %lu checkpoints\n", ++ (ulong) log_sys->n_pending_writes, ++ (ulong) log_sys->n_pending_checkpoint_writes, ++ (ulong) log_sys->n_log_ios, ++ (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed, ++ log_sys->n_syncs, ++ log_sys->n_checkpoints); + + log_sys->n_log_ios_old = log_sys->n_log_ios; + log_sys->last_printout_time = current_time; ++ ++ fprintf(file, ++ "log sync callers: %lu buffer pool, background %lu sync and %lu async, " ++ "%lu internal, checkpoint %lu sync and %lu async, %lu archive, " ++ "commit %lu sync and %lu async\n", ++ log_sys->log_sync_callers[LOG_WRITE_FROM_DIRTY_BUFFER], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_ASYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_INTERNAL], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_ASYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_LOG_ARCHIVE], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_ASYNC]); ++ ++ fprintf(file, ++ "log sync syncers: %lu buffer pool, background %lu sync and %lu async, " ++ "%lu internal, checkpoint %lu sync and %lu async, %lu archive, " ++ "commit %lu sync and %lu async\n", ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_DIRTY_BUFFER], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_ASYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_INTERNAL], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_ASYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_LOG_ARCHIVE], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_ASYNC]); + + mutex_exit(&(log_sys->mutex)); + } +diff -r 61031ebb48ce innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Nov 03 05:07:56 2008 -0800 +@@ -1638,6 +1638,12 @@ + (ulong)time_elapsed); + + fputs("----------\n" ++ "BACKGROUND THREAD\n" ++ "----------\n", file); ++ fil_print(file); ++ ++ ++ fputs("----------\n" + "SEMAPHORES\n" + "----------\n", file); + sync_print(file); +diff -r 61031ebb48ce innobase/trx/trx0sys.c +--- a/innobase/trx/trx0sys.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/trx/trx0sys.c Mon Nov 03 05:07:56 2008 -0800 +@@ -511,7 +511,7 @@ + page += UNIV_PAGE_SIZE; + } + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + leave_func: + ut_free(unaligned_read_buf); +diff -r 61031ebb48ce innobase/trx/trx0trx.c +--- a/innobase/trx/trx0trx.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/trx/trx0trx.c Mon Nov 03 05:07:56 2008 -0800 +@@ -916,19 +916,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, +- FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush + them to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +@@ -1659,18 +1661,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush them to + disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +@@ -1906,19 +1911,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, +- FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush + them to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +diff -r 61031ebb48ce patch_info/innodb_fsync_source.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_fsync_source.info Mon Nov 03 05:07:56 2008 -0800 +@@ -0,0 +1,9 @@ ++File=innodb_fsync_source.patch ++Name=Information of fsync callers in InnoDB ++Version=1.0 ++Author=Google ++License=GPL ++Comment= ++ChangeLog= ++2008-11-01 ++VT: Initial porting diff --git a/percona/5.0.77-b13/innodb_io_patches.patch b/percona/5.0.77-b13/innodb_io_patches.patch new file mode 100644 index 0000000..0b3ccef --- /dev/null +++ b/percona/5.0.77-b13/innodb_io_patches.patch @@ -0,0 +1,672 @@ +diff -ruN a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c +--- a/innobase/buf/buf0flu.c 2008-12-19 02:19:35.000000000 +0900 ++++ b/innobase/buf/buf0flu.c 2009-01-09 15:51:10.000000000 +0900 +@@ -898,10 +898,17 @@ + + old_page_count = page_count; + ++ if (srv_flush_neighbor_pages) { + /* Try to flush also all the neighbors */ + page_count += + buf_flush_try_neighbors(space, offset, + flush_type); ++ } else { ++ /* Try to flush the page only */ ++ page_count += ++ buf_flush_try_page(space, offset, ++ flush_type); ++ } + /* fprintf(stderr, + "Flush type %lu, page no %lu, neighb %lu\n", + flush_type, offset, +diff -ruN a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c +--- a/innobase/buf/buf0rea.c 2009-01-09 15:40:22.000000000 +0900 ++++ b/innobase/buf/buf0rea.c 2009-01-09 15:40:46.000000000 +0900 +@@ -189,6 +189,10 @@ + ulint err; + ulint i; + ++ if (!(srv_read_ahead & 1)) { ++ return(0); ++ } ++ + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); +@@ -396,6 +400,10 @@ + ulint err; + ulint i; + ++ if (!(srv_read_ahead & 2)) { ++ return(0); ++ } ++ + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); +diff -ruN a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c +--- a/innobase/ibuf/ibuf0ibuf.c 2008-12-19 02:19:35.000000000 +0900 ++++ b/innobase/ibuf/ibuf0ibuf.c 2009-01-09 15:53:18.000000000 +0900 +@@ -370,8 +370,9 @@ + grow in size, as the references on the upper levels of the tree can + change */ + +- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE +- / IBUF_POOL_SIZE_PER_MAX_SIZE; ++ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE ++ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE); ++ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE; + ibuf->meter = IBUF_THRESHOLD + 1; + + UT_LIST_INIT(ibuf->data_list); +@@ -2258,11 +2259,13 @@ + + mutex_enter(&ibuf_mutex); + ++ if (!srv_ibuf_active_contract) { + if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { + mutex_exit(&ibuf_mutex); + + return; + } ++ } + + sync = FALSE; + +diff -ruN a/innobase/include/os0file.h b/innobase/include/os0file.h +--- a/innobase/include/os0file.h 2009-01-09 15:40:22.000000000 +0900 ++++ b/innobase/include/os0file.h 2009-01-09 15:40:46.000000000 +0900 +@@ -551,8 +551,10 @@ + /*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ +- ulint n_segments, /* in: combined number of segments in the four +- first aio arrays; must be >= 4 */ ++// ulint n_segments, /* in: combined number of segments in the four ++// first aio arrays; must be >= 4 */ ++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */ ++ ulint n_write_threads, /**/ + ulint n_slots_sync); /* in: number of slots in the sync aio array */ + /*********************************************************************** + Requests an asynchronous i/o operation. */ +diff -ruN a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h 2009-01-09 15:40:22.000000000 +0900 ++++ b/innobase/include/srv0srv.h 2009-01-09 15:54:33.000000000 +0900 +@@ -89,6 +89,8 @@ + extern ulint srv_lock_table_size; + + extern ulint srv_n_file_io_threads; ++extern ulint srv_n_read_io_threads; ++extern ulint srv_n_write_io_threads; + + #ifdef UNIV_LOG_ARCHIVE + extern ibool srv_log_archive_on; +@@ -133,6 +135,14 @@ + extern ulong srv_max_purge_lag; + extern ibool srv_use_awe; + extern ibool srv_use_adaptive_hash_indexes; ++ ++extern ulint srv_io_capacity; ++extern long long srv_ibuf_max_size; ++extern ulint srv_ibuf_active_contract; ++extern ulint srv_ibuf_accel_rate; ++extern ulint srv_flush_neighbor_pages; ++extern uint srv_read_ahead; ++extern ulint srv_adaptive_checkpoint; + /*-------------------------------------------*/ + + extern ulint srv_n_rows_inserted; +diff -ruN a/innobase/log/log0log.c b/innobase/log/log0log.c +--- a/innobase/log/log0log.c 2008-12-19 02:19:36.000000000 +0900 ++++ b/innobase/log/log0log.c 2009-01-09 15:40:46.000000000 +0900 +@@ -3326,6 +3326,15 @@ + (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn), + (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn)); + ++ fprintf(file, ++ "Max checkpoint age %lu\n" ++ "Modified age %lu\n" ++ "Checkpoint age %lu\n", ++ (ulong) log_sys->max_checkpoint_age, ++ (ulong) ut_dulint_minus(log_sys->lsn, ++ log_buf_pool_get_oldest_modification()), ++ (ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn)); ++ + current_time = time(NULL); + + time_elapsed = 0.001 + difftime(current_time, +diff -ruN a/innobase/os/os0file.c b/innobase/os/os0file.c +--- a/innobase/os/os0file.c 2009-01-09 15:40:23.000000000 +0900 ++++ b/innobase/os/os0file.c 2009-01-09 15:40:46.000000000 +0900 +@@ -2877,8 +2877,10 @@ + /*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ +- ulint n_segments, /* in: combined number of segments in the four +- first aio arrays; must be >= 4 */ ++// ulint n_segments, /* in: combined number of segments in the four ++// first aio arrays; must be >= 4 */ ++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/ ++ ulint n_write_threads, /**/ + ulint n_slots_sync) /* in: number of slots in the sync aio array */ + { + ulint n_read_segs; +@@ -2888,6 +2890,8 @@ + #ifdef POSIX_ASYNC_IO + sigset_t sigset; + #endif ++ ulint n_segments = 2 + n_read_threads + n_write_threads; ++ + ut_ad(n % n_segments == 0); + ut_ad(n_segments >= 4); + +@@ -2898,8 +2902,8 @@ + } + + n_per_seg = n / n_segments; +- n_write_segs = (n_segments - 2) / 2; +- n_read_segs = n_segments - 2 - n_write_segs; ++ n_write_segs = n_write_threads; ++ n_read_segs = n_read_threads; + + /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */ + +@@ -3180,6 +3184,13 @@ + struct aiocb* control; + #endif + ulint i; ++ ulint prim_segment; ++ ulint n; ++ ++ n = array->n_slots / array->n_segments; ++ /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */ ++ prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments); ++ + loop: + os_mutex_enter(array->mutex); + +@@ -3198,6 +3209,16 @@ + goto loop; + } + ++ for (i = prim_segment * n; i < array->n_slots; i++) { ++ slot = os_aio_array_get_nth_slot(array, i); ++ ++ if (slot->reserved == FALSE) { ++ break; ++ } ++ } ++ ++ if (slot->reserved == TRUE){ ++ /* Not found after the intended segment. So we should search before. */ + for (i = 0;; i++) { + slot = os_aio_array_get_nth_slot(array, i); + +@@ -3205,6 +3226,7 @@ + break; + } + } ++ } + + array->n_reserved++; + +diff -ruN a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c 2009-01-09 15:40:23.000000000 +0900 ++++ b/innobase/srv/srv0srv.c 2009-01-09 15:58:36.000000000 +0900 +@@ -167,6 +167,8 @@ + ulint srv_lock_table_size = ULINT_MAX; + + ulint srv_n_file_io_threads = ULINT_MAX; ++ulint srv_n_read_io_threads = 1; ++ulint srv_n_write_io_threads = 1; + + #ifdef UNIV_LOG_ARCHIVE + ibool srv_log_archive_on = FALSE; +@@ -324,6 +326,22 @@ + ibool srv_use_awe = FALSE; + ibool srv_use_adaptive_hash_indexes = TRUE; + ++ulint srv_io_capacity = 100; ++ ++/* Returns the number of IO operations that is X percent of the capacity. ++PCT_IO(5) -> returns the number of IO operations that is 5% of the max ++where max is srv_io_capacity. */ ++#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0))) ++ ++long long srv_ibuf_max_size = 0; ++ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */ ++ulint srv_ibuf_accel_rate = 100; ++#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0))) ++ ++ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */ ++ ++uint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ ++ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */ + /*-------------------------------------------*/ + ulong srv_n_spin_wait_rounds = 20; + ulong srv_n_free_tickets_to_enter = 500; +@@ -2214,6 +2232,8 @@ + ibool skip_sleep = FALSE; + ulint i; + ++ dulint oldest_lsn; ++ + #ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Master thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +@@ -2302,9 +2322,9 @@ + + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; +- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { ++ if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) { + srv_main_thread_op_info = "doing insert buffer merge"; +- ibuf_contract_for_n_pages(TRUE, 5); ++ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(5)); + + srv_main_thread_op_info = "flushing log"; + +@@ -2317,7 +2337,7 @@ + /* Try to keep the number of modified pages in the + buffer pool under the limit wished by the user */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), + ut_dulint_max); + + /* If we had to do the flush, it may have taken +@@ -2326,6 +2346,49 @@ + iteration of this loop. */ + + skip_sleep = TRUE; ++ } else if (srv_adaptive_checkpoint) { ++ ++ /* Try to keep modified age not to exceed ++ max_checkpoint_age * 7/8 line */ ++ ++ mutex_enter(&(log_sys->mutex)); ++ ++ oldest_lsn = buf_pool_get_oldest_modification(); ++ if (ut_dulint_is_zero(oldest_lsn)) { ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ } else { ++ if (ut_dulint_minus(log_sys->lsn, oldest_lsn) ++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) { ++ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */ ++ /* We should not flush from here. */ ++ mutex_exit(&(log_sys->mutex)); ++ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn) ++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) { ++ ++ /* 2nd defence line (max_checkpoint_age * 3/4) */ ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ++ ut_dulint_max); ++ skip_sleep = TRUE; ++ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn) ++ > (log_sys->max_checkpoint_age)/2 ) { ++ ++ /* 1st defence line (max_checkpoint_age * 1/2) */ ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10), ++ ut_dulint_max); ++ skip_sleep = TRUE; ++ } else { ++ mutex_exit(&(log_sys->mutex)); ++ } ++ } ++ + } + + if (srv_activity_count == old_activity_count) { +@@ -2352,10 +2415,10 @@ + n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; +- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { ++ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) { + + srv_main_thread_op_info = "flushing buffer pool pages"; +- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); ++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); +@@ -2365,7 +2428,7 @@ + even if the server were active */ + + srv_main_thread_op_info = "doing insert buffer merge"; +- ibuf_contract_for_n_pages(TRUE, 5); ++ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(5)); + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); +@@ -2407,14 +2470,14 @@ + (> 70 %), we assume we can afford reserving the disk(s) for + the time it requires to flush 100 pages */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), + ut_dulint_max); + } else { + /* Otherwise, we only flush a small number of pages so that + we do not unnecessarily use much disk i/o capacity from + other work */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10), + ut_dulint_max); + } + +@@ -2503,7 +2566,7 @@ + if (srv_fast_shutdown && srv_shutdown_state > 0) { + n_bytes_merged = 0; + } else { +- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20); ++ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(100)); + } + + srv_main_thread_op_info = "reserving kernel mutex"; +@@ -2520,7 +2583,7 @@ + + if (srv_fast_shutdown < 2) { + n_pages_flushed = +- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); ++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); + } else { + /* In the fastest shutdown we do not flush the buffer pool + to data files: we set n_pages_flushed to 0 artificially. */ +diff -ruN a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c +--- a/innobase/srv/srv0start.c 2008-12-19 02:19:37.000000000 +0900 ++++ b/innobase/srv/srv0start.c 2009-01-09 15:40:46.000000000 +0900 +@@ -1205,24 +1205,28 @@ + return(DB_ERROR); + } + ++ /* over write innodb_file_io_threads */ ++ srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads; ++ + /* Restrict the maximum number of file i/o threads */ + if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { + + srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; ++ srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2; + } + + if (!os_aio_use_native_aio) { + /* In simulated aio we currently have use only for 4 threads */ +- srv_n_file_io_threads = 4; ++ /*srv_n_file_io_threads = 4;*/ + + os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD + * srv_n_file_io_threads, +- srv_n_file_io_threads, +- SRV_MAX_N_PENDING_SYNC_IOS); ++ srv_n_read_io_threads, srv_n_write_io_threads, ++ SRV_MAX_N_PENDING_SYNC_IOS * 8); + } else { + os_aio_init(SRV_N_PENDING_IOS_PER_THREAD + * srv_n_file_io_threads, +- srv_n_file_io_threads, ++ srv_n_read_io_threads, srv_n_write_io_threads, + SRV_MAX_N_PENDING_SYNC_IOS); + } + +diff -ruN a/patch_info/innodb_io_patches.info b/patch_info/innodb_io_patches.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ b/patch_info/innodb_io_patches.info 2009-01-09 15:59:05.000000000 +0900 +@@ -0,0 +1,11 @@ ++File=innodb_io_patches.patch ++Name=Cluster of past InnoDB IO patches ++Version=1.1 ++Author=Percona ++License=GPL ++Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush) ++ChangeLog= ++2008-11-06 ++YK: Initial release ++2009-01-09 ++YK: Some parameters are added +diff -ruN a/sql/ha_innodb.cc b/sql/ha_innodb.cc +--- a/sql/ha_innodb.cc 2009-01-09 15:40:23.000000000 +0900 ++++ b/sql/ha_innodb.cc 2009-01-09 15:40:46.000000000 +0900 +@@ -149,6 +149,7 @@ + innobase_lock_wait_timeout, innobase_force_recovery, + innobase_open_files; + ++long innobase_read_io_threads, innobase_write_io_threads; + longlong innobase_buffer_pool_size, innobase_log_file_size; + + /* The default values for the following char* start-up parameters +@@ -1403,6 +1404,8 @@ + srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; + + srv_n_file_io_threads = (ulint) innobase_file_io_threads; ++ srv_n_read_io_threads = (ulint) innobase_read_io_threads; ++ srv_n_write_io_threads = (ulint) innobase_write_io_threads; + + srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; + srv_force_recovery = (ulint) innobase_force_recovery; +diff -ruN a/sql/ha_innodb.h b/sql/ha_innodb.h +--- a/sql/ha_innodb.h 2009-01-09 15:40:23.000000000 +0900 ++++ b/sql/ha_innodb.h 2009-01-09 15:59:41.000000000 +0900 +@@ -204,6 +204,7 @@ + extern long innobase_additional_mem_pool_size; + extern long innobase_buffer_pool_awe_mem_mb; + extern long innobase_file_io_threads, innobase_lock_wait_timeout; ++extern long innobase_read_io_threads, innobase_write_io_threads; + extern long innobase_force_recovery; + extern long innobase_open_files; + extern char *innobase_data_home_dir, *innobase_data_file_path; +@@ -234,6 +235,13 @@ + extern ulong srv_thread_concurrency; + extern ulong srv_commit_concurrency; + extern ulong srv_flush_log_at_trx_commit; ++extern ulong srv_io_capacity; ++extern long long srv_ibuf_max_size; ++extern ulong srv_ibuf_active_contract; ++extern ulong srv_ibuf_accel_rate; ++extern ulong srv_flush_neighbor_pages; ++extern uint srv_read_ahead; ++extern ulong srv_adaptive_checkpoint; + } + + bool innobase_init(void); +diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc +--- a/sql/mysqld.cc 2009-01-09 15:40:23.000000000 +0900 ++++ b/sql/mysqld.cc 2009-01-09 16:01:25.000000000 +0900 +@@ -5036,6 +5036,15 @@ + OPT_INNODB_ROLLBACK_ON_TIMEOUT, + OPT_SECURE_FILE_PRIV, + OPT_KEEP_FILES_ON_CREATE, ++ OPT_INNODB_IO_CAPACITY, ++ OPT_INNODB_IBUF_MAX_SIZE, ++ OPT_INNODB_IBUF_ACTIVE_CONTRACT, ++ OPT_INNODB_IBUF_ACCEL_RATE, ++ OPT_INNODB_FLUSH_NEIGHBOR_PAGES, ++ OPT_INNODB_READ_AHEAD, ++ OPT_INNODB_ADAPTIVE_CHECKPOINT, ++ OPT_INNODB_READ_IO_THREADS, ++ OPT_INNODB_WRITE_IO_THREADS, + OPT_INNODB_ADAPTIVE_HASH_INDEX, + OPT_FEDERATED + }; +@@ -5344,6 +5353,41 @@ + (gptr*) &global_system_variables.innodb_table_locks, + (gptr*) &global_system_variables.innodb_table_locks, + 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, ++ {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY, ++ "Number of IO operations per second the server can do. Tunes background IO rate.", ++ (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity, ++ 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0}, ++ {"innodb_ibuf_max_size", OPT_INNODB_IBUF_MAX_SIZE, ++ "The maximum size of the insert buffer. (in bytes)", ++ (gptr*) &srv_ibuf_max_size, (gptr*) &srv_ibuf_max_size, 0, ++ GET_LL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, ++ {"innodb_ibuf_active_contract", OPT_INNODB_IBUF_ACTIVE_CONTRACT, ++ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable", ++ (gptr*) &srv_ibuf_active_contract, (gptr*) &srv_ibuf_active_contract, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_ibuf_accel_rate", OPT_INNODB_IBUF_ACCEL_RATE, ++ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)", ++ (gptr*) &srv_ibuf_accel_rate, (gptr*) &srv_ibuf_accel_rate, ++ 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0}, ++ {"innodb_flush_neighbor_pages", OPT_INNODB_FLUSH_NEIGHBOR_PAGES, ++ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable", ++ (gptr*) &srv_flush_neighbor_pages, (gptr*) &srv_flush_neighbor_pages, ++ 0, GET_ULONG, REQUIRED_ARG, 1, 0, 1, 0, 0, 0}, ++ {"innodb_read_ahead", OPT_INNODB_READ_AHEAD, ++ "Control read ahead activity. (none, random, linear, [both])", ++ 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, ++ {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT, ++ "Enable/Diasable flushing along modified age. 0:disable 1:enable", ++ (gptr*) &srv_adaptive_checkpoint, (gptr*) &srv_adaptive_checkpoint, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS, ++ "Number of background read I/O threads in InnoDB.", ++ (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads, ++ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, ++ {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS, ++ "Number of background write I/O threads in InnoDB.", ++ (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads, ++ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, + #endif /* End HAVE_INNOBASE_DB */ + {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", + (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, +@@ -7637,6 +7636,22 @@ + case OPT_INNODB_LOG_ARCHIVE: + innobase_log_archive= argument ? test(atoi(argument)) : 1; + break; ++ case OPT_INNODB_READ_AHEAD: ++ if (argument == disabled_my_option) ++ srv_read_ahead = 0; ++ else if (! argument) ++ srv_read_ahead = 3; ++ else ++ { ++ int type; ++ if ((type=find_type(argument, &innodb_read_ahead_typelib, 2)) <= 0) ++ { ++ fprintf(stderr,"Unknown innodb_read_ahead type: %s\n",argument); ++ exit(1); ++ } ++ srv_read_ahead = (uint) ((type - 1) & 3); ++ } ++ break; + #endif /* HAVE_INNOBASE_DB */ + case OPT_MYISAM_RECOVER: + { +diff -ruN a/sql/set_var.cc b/sql/set_var.cc +--- a/sql/set_var.cc 2009-01-09 15:40:23.000000000 +0900 ++++ b/sql/set_var.cc 2009-01-09 16:05:22.000000000 +0900 +@@ -484,6 +484,37 @@ + sys_var_long_ptr sys_innodb_flush_log_at_trx_commit( + "innodb_flush_log_at_trx_commit", + &srv_flush_log_at_trx_commit); ++sys_var_long_ptr sys_innodb_io_capacity("innodb_io_capacity", ++ &srv_io_capacity); ++sys_var_long_ptr sys_innodb_ibuf_active_contract("innodb_ibuf_active_contract", ++ &srv_ibuf_active_contract); ++sys_var_long_ptr sys_innodb_ibuf_accel_rate("innodb_ibuf_accel_rate", ++ &srv_ibuf_accel_rate); ++sys_var_long_ptr sys_innodb_flush_neighbor_pages("innodb_flush_neighbor_pages", ++ &srv_flush_neighbor_pages); ++ ++const char *innodb_read_ahead_names[]= ++{ ++ "none", /* 0 */ ++ "random", ++ "linear", ++ "both", /* 3 */ ++ /* For compatibility of the older patch */ ++ "0", /* 4 ("none" + 4) */ ++ "1", ++ "2", ++ "3", /* 7 ("both" + 4) */ ++ NullS ++}; ++TYPELIB innodb_read_ahead_typelib= ++{ ++ array_elements(innodb_read_ahead_names) - 1, "innodb_read_ahead_typelib", ++ innodb_read_ahead_names, NULL ++}; ++sys_var_enum sys_innodb_read_ahead("innodb_read_ahead", &srv_read_ahead, ++ &innodb_read_ahead_typelib, fix_innodb_read_ahead); ++sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint", ++ &srv_adaptive_checkpoint); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -847,6 +859,12 @@ + &sys_innodb_thread_concurrency, + &sys_innodb_commit_concurrency, + &sys_innodb_flush_log_at_trx_commit, ++ &sys_innodb_io_capacity, ++ &sys_innodb_ibuf_active_contract, ++ &sys_innodb_ibuf_accel_rate, ++ &sys_innodb_flush_neighbor_pages, ++ &sys_innodb_read_ahead, ++ &sys_innodb_adaptive_checkpoint, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -982,6 +1000,15 @@ + {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, + {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, + {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, ++ {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS}, ++ {"innodb_ibuf_max_size", (char*) &srv_ibuf_max_size, SHOW_LONGLONG}, ++ {sys_innodb_ibuf_active_contract.name, (char*) &sys_innodb_ibuf_active_contract, SHOW_SYS}, ++ {sys_innodb_ibuf_accel_rate.name, (char*) &sys_innodb_ibuf_accel_rate, SHOW_SYS}, ++ {sys_innodb_flush_neighbor_pages.name, (char*) &sys_innodb_flush_neighbor_pages, SHOW_SYS}, ++ {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS}, ++ {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS}, ++ {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG}, ++ {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG}, + #endif + {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, + {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, +@@ -1482,6 +1501,13 @@ + } + } + ++#ifdef HAVE_INNOBASE_DB ++extern void fix_innodb_read_ahead(THD *thd, enum_var_type type) ++{ ++ srv_read_ahead &= 3; ++} ++#endif /* HAVE_INNOBASE_DB */ ++ + static void fix_max_binlog_size(THD *thd, enum_var_type type) + { + DBUG_ENTER("fix_max_binlog_size"); +diff -ruN a/sql/set_var.h b/sql/set_var.h +--- a/sql/set_var.h 2009-01-12 11:20:31.000000000 +0900 ++++ b/sql/set_var.h 2009-01-12 15:26:35.000000000 +0900 +@@ -31,6 +31,10 @@ + + extern TYPELIB bool_typelib, delay_key_write_typelib, sql_mode_typelib; + ++#ifdef HAVE_INNOBASE_DB ++extern TYPELIB innodb_read_ahead_typelib; ++#endif /* HAVE_INNOBASE_DB */ ++ + typedef int (*sys_check_func)(THD *, set_var *); + typedef bool (*sys_update_func)(THD *, set_var *); + typedef void (*sys_after_update_func)(THD *,enum_var_type); +@@ -1114,6 +1118,9 @@ + int sql_set_variables(THD *thd, List<set_var_base> *var_list); + bool not_all_support_one_shot(List<set_var_base> *var_list); + void fix_delay_key_write(THD *thd, enum_var_type type); ++#ifdef HAVE_INNOBASE_DB ++void fix_innodb_read_ahead(THD *thd, enum_var_type type); ++#endif /* HAVE_INNOBASE_DB */ + ulong fix_sql_mode(ulong sql_mode); + extern sys_var_const_str sys_charset_system; + extern sys_var_str sys_init_connect; diff --git a/percona/5.0.77-b13/innodb_io_pattern.patch b/percona/5.0.77-b13/innodb_io_pattern.patch new file mode 100644 index 0000000..26c9b69 --- /dev/null +++ b/percona/5.0.77-b13/innodb_io_pattern.patch @@ -0,0 +1,688 @@ +diff -r 5060df9888d7 include/mysql_com.h +--- a/include/mysql_com.h Tue Feb 17 22:33:54 2009 -0800 ++++ b/include/mysql_com.h Tue Feb 17 22:33:58 2009 -0800 +@@ -121,6 +121,9 @@ + #define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */ + #define REFRESH_DES_KEY_FILE 0x40000L + #define REFRESH_USER_RESOURCES 0x80000L ++ ++/* TRUNCATE INFORMATION_SCHEMA.INNODB_IO_PATTERN */ ++#define REFRESH_INNODB_IO_PATTERN 0x1000000L + + #define CLIENT_LONG_PASSWORD 1 /* new more secure passwords */ + #define CLIENT_FOUND_ROWS 2 /* Found instead of affected rows */ +diff -r 5060df9888d7 innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Tue Feb 17 22:33:54 2009 -0800 ++++ b/innobase/buf/buf0buf.c Tue Feb 17 22:33:58 2009 -0800 +@@ -653,6 +653,9 @@ + } + + buf_pool->page_hash = hash_create(2 * max_size); ++ buf_pool->io_counter_hash = NULL; ++ buf_pool->io_counter_heap = NULL; ++ buf_pool->io_counters = 0; + + buf_pool->n_pend_reads = 0; + +@@ -1966,6 +1969,9 @@ + ulint io_type; + ulint read_page_no; + ++ buf_io_counter_t* io_counter; ++ ulint fold; ++ + ut_ad(block); + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); +@@ -2067,6 +2073,26 @@ + buf_pool->n_pages_read++; + + rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ); ++ /* io_counter here */ ++ if (srv_io_pattern && srv_io_pattern_trace_running) { ++ fold = buf_page_address_fold(block->space, block->offset); ++ HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter, ++ (io_counter->space == block->space) && (io_counter->offset == block->offset)); ++ if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) { ++ io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t))); ++ io_counter->space = block->space; ++ io_counter->offset = block->offset; ++ io_counter->n_read = 0; ++ io_counter->n_write = 0; ++ HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash, ++ buf_page_address_fold(block->space, block->offset), io_counter); ++ buf_pool->io_counters++; ++ } ++ if (io_counter != NULL) { ++ io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block))); ++ io_counter->n_read++; ++ } ++ } + + #ifdef UNIV_DEBUG + if (buf_debug_prints) { +@@ -2082,6 +2108,26 @@ + buf_flush_write_complete(block); + + rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE); ++ /* io_counter here */ ++ if (srv_io_pattern && srv_io_pattern_trace_running) { ++ fold = buf_page_address_fold(block->space, block->offset); ++ HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter, ++ (io_counter->space == block->space) && (io_counter->offset == block->offset)); ++ if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) { ++ io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t))); ++ io_counter->space = block->space; ++ io_counter->offset = block->offset; ++ io_counter->n_read = 0; ++ io_counter->n_write = 0; ++ HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash, ++ buf_page_address_fold(block->space, block->offset), io_counter); ++ buf_pool->io_counters++; ++ } ++ if (io_counter != NULL) { ++ io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block))); ++ io_counter->n_write++; ++ } ++ } + + buf_pool->n_pages_written++; + +@@ -2656,3 +2702,58 @@ + return buf_pool_get_nth_block(buf_pool, i); + + } ++ ++/************************************************************************* ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++buf_io_counter_control(void) ++/*========================*/ ++{ ++ ulint n; ++ ++ mutex_enter(&(buf_pool->mutex)); ++ if (srv_io_pattern_trace) { ++ if (buf_pool->io_counter_hash == NULL) { ++ /* estimating (buf_pool * 10) */ ++ buf_pool->io_counter_hash = hash_create(20 * buf_pool->max_size); ++ buf_pool->io_counter_heap = mem_heap_create(4096 * 1024); ++ buf_pool->io_counters = 0; ++ ++ srv_io_pattern = TRUE; ++ } ++ } else { ++ if (buf_pool->io_counter_hash != NULL) { ++ srv_io_pattern = FALSE; ++ ++ for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ (buf_pool->io_counter_hash->array + n)->node = NULL; ++ } ++ mem_heap_free(buf_pool->io_counter_heap); ++ buf_pool->io_counter_heap = NULL; ++ buf_pool->io_counters = 0; ++ ++ hash_table_free(buf_pool->io_counter_hash); ++ buf_pool->io_counter_hash = NULL; ++ } ++ } ++ mutex_exit(&(buf_pool->mutex)); ++} ++ ++void ++buf_io_counter_clear(void) ++/*======================*/ ++{ ++ ulint n; ++ ++ mutex_enter(&(buf_pool->mutex)); ++ if (buf_pool->io_counter_hash != NULL) { ++ for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ (buf_pool->io_counter_hash->array + n)->node = NULL; ++ } ++ mem_heap_empty(buf_pool->io_counter_heap); ++ buf_pool->io_counters = 0; ++ } ++ mutex_exit(&(buf_pool->mutex)); ++} +diff -r 5060df9888d7 innobase/include/buf0buf.h +--- a/innobase/include/buf0buf.h Tue Feb 17 22:33:54 2009 -0800 ++++ b/innobase/include/buf0buf.h Tue Feb 17 22:33:58 2009 -0800 +@@ -709,6 +709,18 @@ + void buf_pool_dump(void); + buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i); + ++ ++/************************************************************************* ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++buf_io_counter_control(void); ++/*=========================*/ ++ ++void ++buf_io_counter_clear(void); ++/*=======================*/ + + /* The buffer control block structure */ + +@@ -930,6 +942,9 @@ + ulint curr_size; /* current pool size in pages; + currently always the same as + max_size */ ++ hash_table_t* io_counter_hash; ++ mem_heap_t* io_counter_heap; ++ ulint io_counters; + hash_table_t* page_hash; /* hash table of the file pages */ + + ulint n_pend_reads; /* number of pending read operations */ +@@ -1015,6 +1030,15 @@ + locki table, are not in this list */ + }; + ++struct buf_io_counter_struct{ ++ ulint space; ++ ulint offset; ++ buf_io_counter_t* hash; ++ ulint index_id; ++ ulint n_read; ++ ulint n_write; ++}; ++ + /* States of a control block */ + #define BUF_BLOCK_NOT_USED 211 /* is in the free list */ + #define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns +diff -r 5060df9888d7 innobase/include/buf0types.h +--- a/innobase/include/buf0types.h Tue Feb 17 22:33:54 2009 -0800 ++++ b/innobase/include/buf0types.h Tue Feb 17 22:33:58 2009 -0800 +@@ -12,6 +12,8 @@ + typedef struct buf_block_struct buf_block_t; + typedef struct buf_pool_struct buf_pool_t; + ++typedef struct buf_io_counter_struct buf_io_counter_t; ++ + /* The 'type' used of a buffer frame */ + typedef byte buf_frame_t; + +diff -r 5060df9888d7 innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Tue Feb 17 22:33:54 2009 -0800 ++++ b/innobase/include/srv0srv.h Tue Feb 17 22:33:58 2009 -0800 +@@ -145,6 +145,11 @@ + extern ulint srv_flush_neighbor_pages; + extern uint srv_read_ahead; + extern ulint srv_adaptive_checkpoint; ++ ++extern volatile ibool srv_io_pattern; ++extern ulong srv_io_pattern_trace; ++extern ulong srv_io_pattern_trace_running; ++extern ulong srv_io_pattern_size_limit; + /*-------------------------------------------*/ + + extern ulint srv_n_rows_inserted; +diff -r 5060df9888d7 innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Tue Feb 17 22:33:54 2009 -0800 ++++ b/innobase/srv/srv0srv.c Tue Feb 17 22:33:58 2009 -0800 +@@ -344,6 +344,11 @@ + + uint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ + ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */ ++ ++volatile ibool srv_io_pattern = FALSE; ++ulint srv_io_pattern_trace = 0; ++ulint srv_io_pattern_trace_running = 0; ++ulint srv_io_pattern_size_limit = ULINT_MAX - (1024 * 1024); + /*-------------------------------------------*/ + ulong srv_n_spin_wait_rounds = 20; + ulong srv_n_free_tickets_to_enter = 500; +diff -r 5060df9888d7 mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Tue Feb 17 22:33:54 2009 -0800 ++++ b/mysql-test/r/information_schema.result Tue Feb 17 22:33:58 2009 -0800 +@@ -59,6 +59,7 @@ + USER_PRIVILEGES + USER_STATISTICS + VIEWS ++INNODB_IO_PATTERN + columns_priv + db + func +@@ -742,7 +743,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-108 ++109 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -812,12 +813,13 @@ + TABLE_PRIVILEGES TABLE_NAME select + TABLE_STATISTICS TABLE_NAME select + VIEWS TABLE_NAME select ++INNODB_IO_PATTERN TABLE_NAME select + delete from mysql.user where user='mysqltest_4'; + delete from mysql.db where user='mysqltest_4'; + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 23 ++information_schema 24 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1225,6 +1227,7 @@ + USER_PRIVILEGES GRANTEE + USER_STATISTICS USER + VIEWS TABLE_SCHEMA ++INNODB_IO_PATTERN SPACE + SELECT t.table_name, c1.column_name + FROM information_schema.tables t + INNER JOIN +@@ -1263,6 +1266,7 @@ + USER_PRIVILEGES GRANTEE + USER_STATISTICS USER + VIEWS TABLE_SCHEMA ++INNODB_IO_PATTERN SPACE + SELECT MAX(table_name) FROM information_schema.tables; + MAX(table_name) + VIEWS +@@ -1337,6 +1341,7 @@ + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 + INNODB_BUFFER_POOL_CONTENT information_schema.INNODB_BUFFER_POOL_CONTENT 1 ++INNODB_IO_PATTERN information_schema.INNODB_IO_PATTERN 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 + PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 +diff -r 5060df9888d7 mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Tue Feb 17 22:33:54 2009 -0800 ++++ b/mysql-test/r/information_schema_db.result Tue Feb 17 22:33:58 2009 -0800 +@@ -28,6 +28,7 @@ + USER_PRIVILEGES + USER_STATISTICS + VIEWS ++INNODB_IO_PATTERN + show tables from INFORMATION_SCHEMA like 'T%'; + Tables_in_information_schema (T%) + TABLES +diff -r 5060df9888d7 mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Tue Feb 17 22:33:54 2009 -0800 ++++ b/mysql-test/r/mysqlshow.result Tue Feb 17 22:33:58 2009 -0800 +@@ -102,6 +102,7 @@ + | USER_PRIVILEGES | + | USER_STATISTICS | + | VIEWS | ++| INNODB_IO_PATTERN | + +---------------------------------------+ + Database: INFORMATION_SCHEMA + +---------------------------------------+ +@@ -130,6 +131,7 @@ + | USER_PRIVILEGES | + | USER_STATISTICS | + | VIEWS | ++| INNODB_IO_PATTERN | + +---------------------------------------+ + Wildcard: inf_rmation_schema + +--------------------+ +diff -r 5060df9888d7 patch_info/innodb_io_pattern.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_io_pattern.info Tue Feb 17 22:33:58 2009 -0800 +@@ -0,0 +1,8 @@ ++File=innodb_io_pattern.patch ++Name=Information schema table of InnoDB IO counts for each datafile pages ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment=INFORMATION_SCHEMA.INNODB_IO_PATTERN ++2008-12-01 ++YK: fix for mysql-test +diff -r 5060df9888d7 sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/ha_innodb.cc Tue Feb 17 22:33:58 2009 -0800 +@@ -1569,6 +1569,8 @@ + pthread_cond_init(&commit_cond, NULL); + innodb_inited= 1; + ++ buf_io_counter_control(); ++ + /* If this is a replication slave and we needed to do a crash recovery, + set the master binlog position to what InnoDB internally knew about + how far we got transactions durable inside InnoDB. There is a +@@ -6535,6 +6537,28 @@ + } + + /**************************************************************************** ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++innodb_io_pattern_control(void) ++/*===========================*/ ++{ ++ if (innodb_inited) { ++ buf_io_counter_control(); ++ } ++} ++ ++void ++innodb_io_pattern_clear(void) ++/*=========================*/ ++{ ++ if (innodb_inited) { ++ buf_io_counter_clear(); ++ } ++} ++ ++/**************************************************************************** + Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB + Monitor to the client. */ + +diff -r 5060df9888d7 sql/ha_innodb.h +--- a/sql/ha_innodb.h Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/ha_innodb.h Tue Feb 17 22:33:58 2009 -0800 +@@ -244,6 +244,9 @@ + extern ulong srv_adaptive_checkpoint; + extern ulong srv_show_locks_held; + extern ulong srv_show_verbose_locks; ++extern ulong srv_io_pattern_trace; ++extern ulong srv_io_pattern_trace_running; ++extern ulong srv_io_pattern_size_limit; + } + + bool innobase_init(void); +@@ -270,6 +273,9 @@ + bool innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables); + bool innodb_mutex_show_status(THD* thd); + void innodb_export_status(void); ++ ++void innodb_io_pattern_control(void); ++void innodb_io_pattern_clear(void); + + void innobase_release_temporary_latches(THD *thd); + +diff -r 5060df9888d7 sql/lex.h +--- a/sql/lex.h Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/lex.h Tue Feb 17 22:33:58 2009 -0800 +@@ -244,6 +244,7 @@ + { "INNER", SYM(INNER_SYM)}, + { "INNOBASE", SYM(INNOBASE_SYM)}, + { "INNODB", SYM(INNOBASE_SYM)}, ++ { "INNODB_IO_PATTERN", SYM(INNODB_IO_PATTERN)}, + { "INOUT", SYM(INOUT_SYM)}, + { "INSENSITIVE", SYM(INSENSITIVE_SYM)}, + { "INSERT", SYM(INSERT)}, +diff -r 5060df9888d7 sql/mysqld.cc +--- a/sql/mysqld.cc Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/mysqld.cc Tue Feb 17 22:33:58 2009 -0800 +@@ -5026,6 +5026,9 @@ + OPT_INNODB_SYNC_SPIN_LOOPS, + OPT_INNODB_CONCURRENCY_TICKETS, + OPT_INNODB_THREAD_SLEEP_DELAY, ++ OPT_INNODB_IO_PATTERN_TRACE, ++ OPT_INNODB_IO_PATTERN_TRACE_RUNNING, ++ OPT_INNODB_IO_PATTERN_SIZE_LIMIT, + OPT_BDB_CACHE_SIZE, + OPT_BDB_LOG_BUFFER_SIZE, + OPT_BDB_MAX_LOCK, +@@ -5445,6 +5448,18 @@ + "Number of background write I/O threads in InnoDB.", + (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads, + 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, ++ {"innodb_io_pattern_trace", OPT_INNODB_IO_PATTERN_TRACE, ++ "Create/Drop the internal hash table for IO pattern tracing.", ++ (gptr*) &srv_io_pattern_trace, (gptr*) &srv_io_pattern_trace, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_io_pattern_trace_running", OPT_INNODB_IO_PATTERN_TRACE_RUNNING, ++ "Control IO pattern trace running or not.", ++ (gptr*) &srv_io_pattern_trace_running, (gptr*) &srv_io_pattern_trace_running, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_io_pattern_size_limit", OPT_INNODB_IO_PATTERN_SIZE_LIMIT, ++ "Set max number of counters per data pages. (0 = disable counting).", ++ (gptr*) &srv_io_pattern_size_limit, (gptr*) &srv_io_pattern_size_limit, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, ULONG_MAX - (1024 * 1024), 0, 0, 0}, + #endif /* End HAVE_INNOBASE_DB */ + {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", + (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, +diff -r 5060df9888d7 sql/set_var.cc +--- a/sql/set_var.cc Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/set_var.cc Tue Feb 17 22:33:58 2009 -0800 +@@ -528,6 +528,12 @@ + sys_var_long_ptr sys_innodb_show_verbose_locks( + "innodb_show_verbose_locks", + &srv_show_verbose_locks); ++sys_var_innodb_io_pattern_trace sys_innodb_io_pattern_trace("innodb_io_pattern_trace", ++ &srv_io_pattern_trace); ++sys_var_long_ptr sys_innodb_io_pattern_trace_running("innodb_io_pattern_trace_running", ++ &srv_io_pattern_trace_running); ++sys_var_long_ptr sys_innodb_io_pattern_size_limit("innodb_io_pattern_size_limit", ++ &srv_io_pattern_size_limit); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -901,6 +907,9 @@ + &sys_innodb_adaptive_checkpoint, + &sys_innodb_show_locks_held, + &sys_innodb_show_verbose_locks, ++ &sys_innodb_io_pattern_trace, ++ &sys_innodb_io_pattern_trace_running, ++ &sys_innodb_io_pattern_size_limit, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -1047,6 +1056,9 @@ + {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS}, + {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG}, + {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG}, ++ {sys_innodb_io_pattern_trace.name, (char*) &sys_innodb_io_pattern_trace, SHOW_SYS}, ++ {sys_innodb_io_pattern_trace_running.name, (char*) &sys_innodb_io_pattern_trace_running, SHOW_SYS}, ++ {sys_innodb_io_pattern_size_limit.name, (char*) &sys_innodb_io_pattern_size_limit, SHOW_SYS}, + #endif + {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, + {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, +@@ -3160,6 +3172,19 @@ + thd->variables.lc_time_names= global_system_variables.lc_time_names; + } + ++#ifdef HAVE_INNOBASE_DB ++bool sys_var_innodb_io_pattern_trace::update(THD *thd, set_var *var) ++{ ++ bool ret; ++ ++ ret = sys_var_long_ptr_global::update(thd, var); ++ ++ innodb_io_pattern_control(); ++ ++ return ret; ++} ++#endif /* HAVE_INNOBASE_DB */ ++ + /* + Functions to update thd->options bits + */ +diff -r 5060df9888d7 sql/set_var.h +--- a/sql/set_var.h Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/set_var.h Tue Feb 17 22:33:58 2009 -0800 +@@ -989,6 +989,17 @@ + virtual void set_default(THD *thd, enum_var_type type); + }; + ++#ifdef HAVE_INNOBASE_DB ++/* sys_var_innodb_io_pattern_trace */ ++class sys_var_innodb_io_pattern_trace :public sys_var_long_ptr ++{ ++public: ++ sys_var_innodb_io_pattern_trace(const char *name_arg, ulong *value_ptr_arg) ++ :sys_var_long_ptr(name_arg,value_ptr_arg) {} ++ bool update(THD *thd, set_var *var); ++}; ++#endif /* HAVE_INNOBASE_DB */ ++ + /**************************************************************************** + Classes for parsing of the SET command + ****************************************************************************/ +diff -r 5060df9888d7 sql/sql_parse.cc +--- a/sql/sql_parse.cc Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/sql_parse.cc Tue Feb 17 22:33:58 2009 -0800 +@@ -8009,6 +8009,13 @@ + } + pthread_mutex_unlock(&LOCK_global_user_client_stats); + } ++#ifdef HAVE_INNOBASE_DB ++ if (options & REFRESH_INNODB_IO_PATTERN) ++ { ++ tmp_write_to_binlog= 0; ++ innodb_io_pattern_clear(); ++ } ++#endif /* HAVE_INNOBASE_DB */ + *write_to_binlog= tmp_write_to_binlog; + return result; + } +diff -r 5060df9888d7 sql/sql_show.cc +--- a/sql/sql_show.cc Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/sql_show.cc Tue Feb 17 22:33:58 2009 -0800 +@@ -32,6 +32,17 @@ + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + #endif ++ ++#ifdef HAVE_INNOBASE_DB ++#define INSIDE_HA_INNOBASE_CC ++extern "C" { ++#include "srv0srv.h" ++#include "buf0buf.h" ++#include "dict0dict.h" ++} ++/* We need to undef it in InnoDB */ ++#undef byte ++#endif /* HAVE_INNOBASE_DB */ + + #ifndef NO_EMBEDDED_ACCESS_CHECKS + static const char *grant_names[]={ +@@ -4088,6 +4099,67 @@ + DBUG_RETURN(res); + } + ++int innodb_io_pattern_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) ++{ ++ TABLE *table= (TABLE *) tables->table; ++ ++ buf_io_counter_t* io_counter; ++ dict_index_t* index; ++ ++ DBUG_ENTER("innodb_io_pattern_fill_table"); ++ int returnable= 0; ++ ++ /* We cannot use inline functions of InnoDB here */ ++ ++ /* !!!!!ATTENTION!!!!!: This function is not protected by mutex for performance. */ ++ /* Don't use "DROP TABLE innodb_io_pattern" and INFORMATION_SCHEMA.INNODB_IO_PATTERN */ ++ /* at the same time as possible. */ ++ ++ if (srv_io_pattern) { ++ for (ulint n=0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ if (!srv_io_pattern) ++ goto end_func; ++ ++ io_counter = (buf_io_counter_t*)(buf_pool->io_counter_hash->array + n)->node; ++ while (io_counter) { ++ if (!srv_io_pattern) ++ goto end_func; ++ ++ if (dict_sys != NULL) { ++ dulint id; ++ id.high = 0; ++ id.low = io_counter->index_id; ++ index = dict_index_find_on_id_low(id); ++ } else { ++ index = NULL; ++ } ++ ++ table->field[0]->store(io_counter->space); ++ table->field[1]->store(io_counter->offset); ++ table->field[2]->store(io_counter->index_id); ++ if (index != NULL) { ++ table->field[3]->store(index->table_name,strlen(index->table_name),system_charset_info); ++ table->field[4]->store(index->name,strlen(index->name),system_charset_info); ++ } else { ++ table->field[3]->store("",0,system_charset_info); ++ table->field[4]->store("",0,system_charset_info); ++ } ++ table->field[5]->store(io_counter->n_read); ++ table->field[6]->store(io_counter->n_write); ++ if (schema_table_store_record(thd, table)) ++ { ++ returnable= 1; ++ goto end_func; ++ } ++ io_counter = io_counter->hash; ++ } ++ } ++ } ++ ++ end_func: ++ DBUG_RETURN(returnable); ++} ++ + /* + Find schema_tables elment by name + +@@ -4894,6 +4966,19 @@ + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} + }; + ++#ifdef HAVE_INNOBASE_DB ++ST_FIELD_INFO innodb_io_pattern_field_info[]= ++{ ++ {"SPACE", 11, MYSQL_TYPE_LONG, 0, 0, "space_id"}, ++ {"OFFSET", 11, MYSQL_TYPE_LONG, 0, 0, "offset"}, ++ {"INDEX_ID", 11, MYSQL_TYPE_LONG, 0, 0, "index id"}, ++ {"TABLE_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "table name"}, ++ {"INDEX_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "index name"}, ++ {"N_READ", 11, MYSQL_TYPE_LONG, 0, 0, "read ios"}, ++ {"N_WRITE", 11, MYSQL_TYPE_LONG, 0, 0, "write ios"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++#endif + + ST_FIELD_INFO variables_fields_info[]= + { +@@ -5069,6 +5154,10 @@ + make_old_format, 0, -1, -1, 1}, + {"VIEWS", view_fields_info, create_schema_table, + get_all_tables, 0, get_schema_views_record, 1, 2, 0}, ++#ifdef HAVE_INNOBASE_DB ++ {"INNODB_IO_PATTERN", innodb_io_pattern_field_info, create_schema_table, ++ innodb_io_pattern_fill_table, 0, 0, -1, -1, 0}, ++#endif + {0, 0, 0, 0, 0, 0, 0, 0, 0} + }; + +diff -r 5060df9888d7 sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Tue Feb 17 22:33:54 2009 -0800 ++++ b/sql/sql_yacc.yy Tue Feb 17 22:33:58 2009 -0800 +@@ -685,6 +685,7 @@ + %token INFILE + %token INNER_SYM + %token INNOBASE_SYM ++%token INNODB_IO_PATTERN + %token INOUT_SYM + %token INSENSITIVE_SYM + %token INSERT +@@ -8541,6 +8542,7 @@ + | MASTER_SYM { Lex->type|= REFRESH_MASTER; } + | DES_KEY_FILE { Lex->type|= REFRESH_DES_KEY_FILE; } + | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; } ++ | INNODB_IO_PATTERN { Lex->type|= REFRESH_INNODB_IO_PATTERN; } + | CLIENT_STATS_SYM { Lex->type|= REFRESH_CLIENT_STATS; } + | USER_STATS_SYM { Lex->type|= REFRESH_USER_STATS; } + | TABLE_STATS_SYM { Lex->type|= REFRESH_TABLE_STATS; } +@@ -9594,6 +9596,7 @@ + | ISOLATION {} + | ISSUER_SYM {} + | INNOBASE_SYM {} ++ | INNODB_IO_PATTERN {} + | INSERT_METHOD {} + | IO_SYM {} + | IPC_SYM {} diff --git a/percona/5.0.77-b13/innodb_locks_held.patch b/percona/5.0.77-b13/innodb_locks_held.patch new file mode 100644 index 0000000..a4ae8ab --- /dev/null +++ b/percona/5.0.77-b13/innodb_locks_held.patch @@ -0,0 +1,219 @@ +diff -r 7d3d7786b927 innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Tue Feb 17 22:33:33 2009 -0800 ++++ b/innobase/include/srv0srv.h Tue Feb 17 22:33:47 2009 -0800 +@@ -80,6 +80,8 @@ + extern ulint srv_log_file_size; + extern ulint srv_log_buffer_size; + extern ulong srv_flush_log_at_trx_commit; ++extern ulong srv_show_locks_held; ++extern ulong srv_show_verbose_locks; + + extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 + character set */ +diff -r 7d3d7786b927 innobase/lock/lock0lock.c +--- a/innobase/lock/lock0lock.c Tue Feb 17 22:33:33 2009 -0800 ++++ b/innobase/lock/lock0lock.c Tue Feb 17 22:33:47 2009 -0800 +@@ -4181,6 +4181,7 @@ + #endif /* UNIV_SYNC_DEBUG */ + } + ++ if ( srv_show_verbose_locks ) { + for (i = 0; i < lock_rec_get_n_bits(lock); i++) { + + if (lock_rec_get_nth_bit(lock, i)) { +@@ -4198,6 +4199,7 @@ + putc('\n', file); + } + } ++ } /* srv_show_verbose_locks */ + + mtr_commit(&mtr); + if (UNIV_LIKELY_NULL(heap)) { +@@ -4369,7 +4371,7 @@ + } + } + +- if (!srv_print_innodb_lock_monitor) { ++ if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) { + nth_trx++; + goto loop; + } +@@ -4426,9 +4428,9 @@ + + nth_lock++; + +- if (nth_lock >= 10) { ++ if (nth_lock >= srv_show_locks_held) { + fputs( +- "10 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n", ++ "TOO MANY LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n", + file); + + nth_trx++; +diff -r 7d3d7786b927 innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Tue Feb 17 22:33:33 2009 -0800 ++++ b/innobase/srv/srv0srv.c Tue Feb 17 22:33:47 2009 -0800 +@@ -116,6 +116,8 @@ + ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ + ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ + ulong srv_flush_log_at_trx_commit = 1; ++ulint srv_show_locks_held = 10; ++ulint srv_show_verbose_locks = 0; + + byte srv_latin1_ordering[256] /* The sort order table of the latin1 + character set. The following table is +@@ -1703,24 +1705,6 @@ + + mutex_exit(&dict_foreign_err_mutex); + +- lock_print_info_summary(file); +- if (trx_start) { +- long t = ftell(file); +- if (t < 0) { +- *trx_start = ULINT_UNDEFINED; +- } else { +- *trx_start = (ulint) t; +- } +- } +- lock_print_info_all_transactions(file); +- if (trx_end) { +- long t = ftell(file); +- if (t < 0) { +- *trx_end = ULINT_UNDEFINED; +- } else { +- *trx_end = (ulint) t; +- } +- } + fputs("--------\n" + "FILE I/O\n" + "--------\n", file); +@@ -1813,6 +1797,25 @@ + srv_n_rows_updated_old = srv_n_rows_updated; + srv_n_rows_deleted_old = srv_n_rows_deleted; + srv_n_rows_read_old = srv_n_rows_read; ++ ++ lock_print_info_summary(file); ++ if (trx_start) { ++ long t = ftell(file); ++ if (t < 0) { ++ *trx_start = ULINT_UNDEFINED; ++ } else { ++ *trx_start = (ulint) t; ++ } ++ } ++ lock_print_info_all_transactions(file); ++ if (trx_end) { ++ long t = ftell(file); ++ if (t < 0) { ++ *trx_end = ULINT_UNDEFINED; ++ } else { ++ *trx_end = (ulint) t; ++ } ++ } + + fputs("----------------------------\n" + "END OF INNODB MONITOR OUTPUT\n" +diff -r 7d3d7786b927 libmysqld/set_var.cc +--- a/libmysqld/set_var.cc Tue Feb 17 22:33:33 2009 -0800 ++++ b/libmysqld/set_var.cc Tue Feb 17 22:33:47 2009 -0800 +@@ -821,6 +821,8 @@ + &sys_innodb_thread_concurrency, + &sys_innodb_commit_concurrency, + &sys_innodb_flush_log_at_trx_commit, ++ &sys_innodb_show_locks_held, ++ &sys_innodb_show_verbose_locks, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -936,6 +938,8 @@ + {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, + {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL}, + {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS}, ++ {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS }, ++ {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS }, + {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR}, + {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG }, + {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG }, +diff -r 7d3d7786b927 patch_info/innodb_locks_held.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_locks_held.info Tue Feb 17 22:33:47 2009 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_locks_held.patch ++Name=Add locks held, remove locked records in SHOW INNODB STATUS ++Version=1.0 ++Author=Baron Schwartz <baron@xaprb.com> ++License=GPL ++Comment=Bug #29126 fix +diff -r 7d3d7786b927 sql/ha_innodb.h +--- a/sql/ha_innodb.h Tue Feb 17 22:33:33 2009 -0800 ++++ b/sql/ha_innodb.h Tue Feb 17 22:33:47 2009 -0800 +@@ -242,6 +242,8 @@ + extern ulong srv_flush_neighbor_pages; + extern uint srv_read_ahead; + extern ulong srv_adaptive_checkpoint; ++extern ulong srv_show_locks_held; ++extern ulong srv_show_verbose_locks; + } + + bool innobase_init(void); +diff -r 7d3d7786b927 sql/mysqld.cc +--- a/sql/mysqld.cc Tue Feb 17 22:33:33 2009 -0800 ++++ b/sql/mysqld.cc Tue Feb 17 22:33:47 2009 -0800 +@@ -5012,6 +5012,8 @@ + OPT_INNODB_MAX_PURGE_LAG, + OPT_INNODB_FILE_IO_THREADS, + OPT_INNODB_LOCK_WAIT_TIMEOUT, ++ OPT_INNODB_SHOW_LOCKS_HELD, ++ OPT_INNODB_SHOW_VERBOSE_LOCKS, + OPT_INNODB_THREAD_CONCURRENCY, + OPT_INNODB_COMMIT_CONCURRENCY, + OPT_INNODB_FORCE_RECOVERY, +@@ -5356,6 +5358,14 @@ + (gptr*) &srv_flush_log_at_trx_commit, + (gptr*) &srv_flush_log_at_trx_commit, + 0, GET_ULONG, OPT_ARG, 1, 0, 2, 0, 0, 0}, ++ {"innodb_show_locks_held", OPT_INNODB_SHOW_LOCKS_HELD, ++ "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.", ++ (gptr*) &srv_show_locks_held, (gptr*) &srv_show_locks_held, ++ 0, GET_LONG, OPT_ARG, 10, 0, 1000, 0, 1, 0}, ++ {"innodb_show_verbose_locks", OPT_INNODB_SHOW_VERBOSE_LOCKS, ++ "Whether to show records locked in SHOW INNODB STATUS.", ++ (gptr*) &srv_show_verbose_locks, (gptr*) &srv_show_verbose_locks, ++ 0, GET_LONG, OPT_ARG, 0, 0, 1, 0, 1, 0}, + {"innodb_flush_method", OPT_INNODB_FLUSH_METHOD, + "With which method to flush data.", (gptr*) &innobase_unix_file_flush_method, + (gptr*) &innobase_unix_file_flush_method, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, +diff -r 7d3d7786b927 sql/set_var.cc +--- a/sql/set_var.cc Tue Feb 17 22:33:33 2009 -0800 ++++ b/sql/set_var.cc Tue Feb 17 22:33:47 2009 -0800 +@@ -522,6 +522,12 @@ + &innodb_read_ahead_typelib, fix_innodb_read_ahead); + sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint", + &srv_adaptive_checkpoint); ++sys_var_long_ptr sys_innodb_show_locks_held( ++ "innodb_show_locks_held", ++ &srv_show_locks_held); ++sys_var_long_ptr sys_innodb_show_verbose_locks( ++ "innodb_show_verbose_locks", ++ &srv_show_verbose_locks); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -893,6 +899,8 @@ + &sys_innodb_flush_neighbor_pages, + &sys_innodb_read_ahead, + &sys_innodb_adaptive_checkpoint, ++ &sys_innodb_show_locks_held, ++ &sys_innodb_show_verbose_locks, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -1008,6 +1016,8 @@ + {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, + {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL}, + {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS}, ++ {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS }, ++ {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS }, + {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR}, + {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG }, + {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG }, diff --git a/percona/5.0.77-b13/innodb_rw_lock.patch b/percona/5.0.77-b13/innodb_rw_lock.patch new file mode 100644 index 0000000..f7a3166 --- /dev/null +++ b/percona/5.0.77-b13/innodb_rw_lock.patch @@ -0,0 +1,1472 @@ +diff -r 962aec0d731c innobase/configure +--- a/innobase/configure Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/configure Thu Oct 09 08:30:28 2008 -0700 +@@ -20519,6 +20519,88 @@ + + fi + done ++ ++ ++# as http://lists.mysql.com/commits/40686 does ++{ echo "$as_me:$LINENO: checking whether the compiler provides atomic builtins" >&5 ++echo $ECHO_N "checking whether the compiler provides atomic builtins... $ECHO_C" >&6; } ++if test "${mysql_cv_atomic_builtins+set}" = set; then ++ echo $ECHO_N "(cached) $ECHO_C" >&6 ++else ++ if test "$cross_compiling" = yes; then ++ { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling ++See \`config.log' for more details." >&5 ++echo "$as_me: error: cannot run test program while cross compiling ++See \`config.log' for more details." >&2;} ++ { (exit 1); exit 1; }; } ++else ++ cat >conftest.$ac_ext <<_ACEOF ++/* confdefs.h. */ ++_ACEOF ++cat confdefs.h >>conftest.$ac_ext ++cat >>conftest.$ac_ext <<_ACEOF ++/* end confdefs.h. */ ++ ++ int main() ++ { ++ int foo= -10; int bar= 10; ++ __sync_fetch_and_add(&foo, bar); ++ if (foo) ++ return -1; ++ bar= __sync_lock_test_and_set(&foo, bar); ++ if (bar || foo != 10) ++ return -1; ++ bar= __sync_val_compare_and_swap(&bar, foo, 15); ++ if (bar) ++ return -1; ++ return 0; ++ } ++ ++_ACEOF ++rm -f conftest$ac_exeext ++if { (ac_try="$ac_link" ++case "(($ac_try" in ++ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; ++ *) ac_try_echo=$ac_try;; ++esac ++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ++ (eval "$ac_link") 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); } && { ac_try='./conftest$ac_exeext' ++ { (case "(($ac_try" in ++ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; ++ *) ac_try_echo=$ac_try;; ++esac ++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ++ (eval "$ac_try") 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); }; }; then ++ mysql_cv_atomic_builtins=yes ++else ++ echo "$as_me: program exited with status $ac_status" >&5 ++echo "$as_me: failed program was:" >&5 ++sed 's/^/| /' conftest.$ac_ext >&5 ++ ++( exit $ac_status ) ++mysql_cv_atomic_builtins=no ++fi ++rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext ++fi ++ ++ ++fi ++{ echo "$as_me:$LINENO: result: $mysql_cv_atomic_builtins" >&5 ++echo "${ECHO_T}$mysql_cv_atomic_builtins" >&6; } ++ ++if test "x$mysql_cv_atomic_builtins" = xyes; then ++ ++cat >>confdefs.h <<\_ACEOF ++#define HAVE_ATOMIC_BUILTINS 1 ++_ACEOF ++ ++fi + + #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args. + # Some versions of Unix only take 2 arguments. +diff -r 962aec0d731c innobase/configure.in +--- a/innobase/configure.in Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/configure.in Thu Oct 09 08:30:28 2008 -0700 +@@ -42,6 +42,31 @@ + AC_CHECK_FUNCS(sched_yield) + AC_CHECK_FUNCS(fdatasync) + AC_CHECK_FUNCS(localtime_r) ++ ++# as http://lists.mysql.com/commits/40686 does ++AC_CACHE_CHECK([whether the compiler provides atomic builtins], ++ [mysql_cv_atomic_builtins], [AC_TRY_RUN([ ++ int main() ++ { ++ int foo= -10; int bar= 10; ++ __sync_fetch_and_add(&foo, bar); ++ if (foo) ++ return -1; ++ bar= __sync_lock_test_and_set(&foo, bar); ++ if (bar || foo != 10) ++ return -1; ++ bar= __sync_val_compare_and_swap(&bar, foo, 15); ++ if (bar) ++ return -1; ++ return 0; ++ } ++], [mysql_cv_atomic_builtins=yes], [mysql_cv_atomic_builtins=no])]) ++ ++if test "x$mysql_cv_atomic_builtins" = xyes; then ++ AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, ++ [Define to 1 if compiler provides atomic builtins.]) ++fi ++ + #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args. + # Some versions of Unix only take 2 arguments. + #AC_C_INLINE Already checked in MySQL +diff -r 962aec0d731c innobase/ib_config.h +--- a/innobase/ib_config.h Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/ib_config.h Thu Oct 09 08:30:28 2008 -0700 +@@ -3,6 +3,9 @@ + + /* Define to 1 if you have the <aio.h> header file. */ + #define HAVE_AIO_H 1 ++ ++/* Define to 1 if compiler provides atomic builtins. */ ++#define HAVE_ATOMIC_BUILTINS 1 + + /* Define to 1 if you have the <dlfcn.h> header file. */ + #define HAVE_DLFCN_H 1 +diff -r 962aec0d731c innobase/ib_config.h.in +--- a/innobase/ib_config.h.in Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/ib_config.h.in Thu Oct 09 08:30:28 2008 -0700 +@@ -2,6 +2,9 @@ + + /* Define to 1 if you have the <aio.h> header file. */ + #undef HAVE_AIO_H ++ ++/* Define to 1 if compiler provides atomic builtins. */ ++#undef HAVE_ATOMIC_BUILTINS + + /* Define to 1 if you have the <dlfcn.h> header file. */ + #undef HAVE_DLFCN_H +diff -r 962aec0d731c innobase/include/sync0rw.h +--- a/innobase/include/sync0rw.h Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/include/sync0rw.h Thu Oct 09 08:30:28 2008 -0700 +@@ -325,7 +325,17 @@ + Accessor functions for rw lock. */ + UNIV_INLINE + ulint +-rw_lock_get_waiters( ++rw_lock_get_s_waiters( ++/*==================*/ ++ rw_lock_t* lock); ++UNIV_INLINE ++ulint ++rw_lock_get_x_waiters( ++/*==================*/ ++ rw_lock_t* lock); ++UNIV_INLINE ++ulint ++rw_lock_get_wx_waiters( + /*================*/ + rw_lock_t* lock); + UNIV_INLINE +@@ -408,6 +418,17 @@ + rw_lock_debug_t* info); /* in: debug struct */ + #endif /* UNIV_SYNC_DEBUG */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++/* This value means NOT_LOCKED */ ++#define RW_LOCK_BIAS 0x00100000 ++#else ++#error HAVE_ATOMIC_BUILTINS is not defined. Do you use enough new GCC or compatibles? ++#error Or do you use exact options for CFLAGS? ++#error e.g. (for x86_32): "-m32 -march=i586 -mtune=i686" ++#error e.g. (for Sparc_64): "-m64 -mcpu=v9" ++#error Otherwise, this build may be slower than normal version. ++#endif ++ + /* NOTE! The structure appears here only for the compiler to know its size. + Do not use its fields directly! The structure used in the spin lock + implementation of a read-write lock. Several threads may have a shared lock +@@ -417,9 +432,9 @@ + field. Then no new readers are allowed in. */ + + struct rw_lock_struct { +- os_event_t event; /* Used by sync0arr.c for thread queueing */ +- +-#ifdef __WIN__ ++ /* Used by sync0arr.c for thread queueing */ ++ os_event_t s_event; /* Used for s_lock */ ++ os_event_t x_event; /* Used for x_lock */ + os_event_t wait_ex_event; /* This windows specific event is + used by the thread which has set the + lock state to RW_LOCK_WAIT_EX. The +@@ -427,31 +442,35 @@ + thread will be the next one to proceed + once the current the event gets + signalled. See LEMMA 2 in sync0sync.c */ ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ volatile lint lock_word; /* Used by using atomic builtin */ + #endif + +- ulint reader_count; /* Number of readers who have locked this ++ volatile ulint reader_count; /* Number of readers who have locked this + lock in the shared mode */ +- ulint writer; /* This field is set to RW_LOCK_EX if there ++ volatile ulint writer; /* This field is set to RW_LOCK_EX if there + is a writer owning the lock (in exclusive + mode), RW_LOCK_WAIT_EX if a writer is + queueing for the lock, and + RW_LOCK_NOT_LOCKED, otherwise. */ +- os_thread_id_t writer_thread; ++ volatile os_thread_id_t writer_thread; + /* Thread id of a possible writer thread */ +- ulint writer_count; /* Number of times the same thread has ++ volatile ulint writer_count; /* Number of times the same thread has + recursively locked the lock in the exclusive + mode */ ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_t mutex; /* The mutex protecting rw_lock_struct */ ++#endif + ulint pass; /* Default value 0. This is set to some + value != 0 given by the caller of an x-lock + operation, if the x-lock is to be passed to + another thread to unlock (which happens in + asynchronous i/o). */ +- ulint waiters; /* This ulint is set to 1 if there are +- waiters (readers or writers) in the global +- wait array, waiting for this rw_lock. +- Otherwise, == 0. */ +- ibool writer_is_wait_ex; ++ volatile ulint s_waiters; /* 1: there are waiters (s_lock) */ ++ volatile ulint x_waiters; /* 1: there are waiters (x_lock) */ ++ volatile ulint wait_ex_waiters; /* 1: there are waiters (wait_ex) */ ++ volatile ibool writer_is_wait_ex; + /* This is TRUE if the writer field is + RW_LOCK_WAIT_EX; this field is located far + from the memory update hotspot fields which +diff -r 962aec0d731c innobase/include/sync0rw.ic +--- a/innobase/include/sync0rw.ic Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/include/sync0rw.ic Thu Oct 09 08:30:28 2008 -0700 +@@ -47,20 +47,52 @@ + Accessor functions for rw lock. */ + UNIV_INLINE + ulint +-rw_lock_get_waiters( ++rw_lock_get_s_waiters( + /*================*/ + rw_lock_t* lock) + { +- return(lock->waiters); ++ return(lock->s_waiters); ++} ++UNIV_INLINE ++ulint ++rw_lock_get_x_waiters( ++/*================*/ ++ rw_lock_t* lock) ++{ ++ return(lock->x_waiters); ++} ++UNIV_INLINE ++ulint ++rw_lock_get_wx_waiters( ++/*================*/ ++ rw_lock_t* lock) ++{ ++ return(lock->wait_ex_waiters); + } + UNIV_INLINE + void +-rw_lock_set_waiters( +-/*================*/ ++rw_lock_set_s_waiters( + rw_lock_t* lock, + ulint flag) + { +- lock->waiters = flag; ++ lock->s_waiters = flag; ++} ++UNIV_INLINE ++void ++rw_lock_set_x_waiters( ++ rw_lock_t* lock, ++ ulint flag) ++{ ++ lock->x_waiters = flag; ++} ++UNIV_INLINE ++void ++rw_lock_set_wx_waiters( ++/*================*/ ++ rw_lock_t* lock, ++ ulint flag) ++{ ++ lock->wait_ex_waiters = flag; + } + UNIV_INLINE + ulint +@@ -68,7 +100,19 @@ + /*===============*/ + rw_lock_t* lock) + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (lock->writer == RW_LOCK_NOT_LOCKED) { ++ return(RW_LOCK_NOT_LOCKED); ++ } ++ ++ if (lock->writer_is_wait_ex) { ++ return(RW_LOCK_WAIT_EX); ++ } else { ++ return(RW_LOCK_EX); ++ } ++#else + return(lock->writer); ++#endif + } + UNIV_INLINE + void +@@ -96,6 +140,7 @@ + { + lock->reader_count = count; + } ++#ifndef HAVE_ATOMIC_BUILTINS + UNIV_INLINE + mutex_t* + rw_lock_get_mutex( +@@ -104,6 +149,7 @@ + { + return(&(lock->mutex)); + } ++#endif + + /********************************************************************** + Returns the value of writer_count for the lock. Does not reserve the lock +@@ -133,14 +179,26 @@ + const char* file_name, /* in: file name where lock requested */ + ulint line) /* in: line where requested */ + { +-#ifdef UNIV_SYNC_DEBUG ++#if defined(UNIV_SYNC_DEBUG) && !defined(HAVE_ATOMIC_BUILTINS) + ut_ad(mutex_own(rw_lock_get_mutex(lock))); + #endif /* UNIV_SYNC_DEBUG */ + /* Check if the writer field is free */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) { ++ /* try s-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) { ++ /* fail */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ return(FALSE); /* locking did not succeed */ ++ } ++ /* success */ ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) { + /* Set the shared lock by incrementing the reader count */ + lock->reader_count++; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, +@@ -167,11 +225,15 @@ + const char* file_name, /* in: file name where requested */ + ulint line) /* in: line where lock requested */ + { +- ut_ad(lock->writer == RW_LOCK_NOT_LOCKED); ++ ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); + ut_ad(rw_lock_get_reader_count(lock) == 0); + + /* Set the shared lock by incrementing the reader count */ ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + lock->reader_count++; ++#endif + + lock->last_s_file_name = file_name; + lock->last_s_line = line; +@@ -199,7 +261,11 @@ + + rw_lock_set_writer(lock, RW_LOCK_EX); + lock->writer_thread = os_thread_get_curr_id(); ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(lock->writer_count),1); ++#else + lock->writer_count++; ++#endif + lock->pass = 0; + + lock->last_x_file_name = file_name; +@@ -241,15 +307,21 @@ + ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */ + #endif /* UNIV_SYNC_DEBUG */ + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Success */ + } else { + /* Did not succeed, try spin wait */ ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + rw_lock_s_lock_spin(lock, pass, file_name, line); + +@@ -272,11 +344,23 @@ + { + ibool success = FALSE; + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { ++ /* try s-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) { ++ /* fail */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ return(FALSE); /* locking did not succeed */ ++ } ++ /* success */ ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + mutex_enter(rw_lock_get_mutex(lock)); + + if (lock->writer == RW_LOCK_NOT_LOCKED) { + /* Set the shared lock by incrementing the reader count */ + lock->reader_count++; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, +@@ -289,7 +373,9 @@ + success = TRUE; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return(success); + } +@@ -309,6 +395,55 @@ + { + ibool success = FALSE; + os_thread_id_t curr_thread = os_thread_get_curr_id(); ++#ifdef HAVE_ATOMIC_BUILTINS ++ if ((lock->lock_word == RW_LOCK_BIAS) ++ && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { ++ /* try x-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word), ++ RW_LOCK_BIAS) == 0) { ++ /* success */ ++ /* try to lock writer */ ++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX) ++ == RW_LOCK_NOT_LOCKED) { ++ /* success */ ++ lock->writer_thread = curr_thread; ++ lock->pass = 0; ++ lock->writer_is_wait_ex = FALSE; ++ /* next function may work as memory barrier */ ++ relock: ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ ut_ad(rw_lock_validate(lock)); ++ ++ return(TRUE); ++ } else { ++ /* x-unlock */ ++ __sync_fetch_and_add(&(lock->lock_word), ++ RW_LOCK_BIAS); ++ } ++ } else { ++ /* fail (x-lock) */ ++ __sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS); ++ } ++ } ++ ++ if (lock->pass == 0 ++ && os_thread_eq(lock->writer_thread, curr_thread) ++ && rw_lock_get_writer(lock) == RW_LOCK_EX) { ++ goto relock; ++ } ++ ++ ut_ad(rw_lock_validate(lock)); ++ ++ return(FALSE); ++#else + mutex_enter(rw_lock_get_mutex(lock)); + + if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) { +@@ -339,6 +474,7 @@ + ut_ad(rw_lock_validate(lock)); + + return(success); ++#endif + } + + /********************************************************************** +@@ -354,16 +490,33 @@ + #endif + ) + { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_t* mutex = &(lock->mutex); +- ibool sg = FALSE; ++#endif ++ ibool x_sg = FALSE; ++ ibool wx_sg = FALSE; ++#ifdef HAVE_ATOMIC_BUILTINS ++ ibool last = FALSE; ++#endif + ++#ifndef HAVE_ATOMIC_BUILTINS + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter(mutex); ++#endif + + /* Reset the shared lock by decrementing the reader count */ + + ut_a(lock->reader_count > 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* unlock lock_word */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ ++ if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) { ++ last = TRUE; ++ } ++#else + lock->reader_count--; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED); +@@ -372,20 +525,36 @@ + /* If there may be waiters and this was the last s-lock, + signal the object */ + +- if (UNIV_UNLIKELY(lock->waiters) ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (UNIV_UNLIKELY(last && lock->wait_ex_waiters)) { ++#else ++ if (UNIV_UNLIKELY(lock->wait_ex_waiters) + && lock->reader_count == 0) { +- sg = TRUE; ++#endif ++ wx_sg = TRUE; + +- rw_lock_set_waiters(lock, 0); ++ rw_lock_set_wx_waiters(lock, 0); ++ } ++#ifdef HAVE_ATOMIC_BUILTINS ++ else if (UNIV_UNLIKELY(last && lock->x_waiters)) { ++#else ++ else if (UNIV_UNLIKELY(lock->x_waiters) ++ && lock->reader_count == 0) { ++#endif ++ x_sg = TRUE; ++ ++ rw_lock_set_x_waiters(lock, 0); + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(mutex); ++#endif + +- if (UNIV_UNLIKELY(sg)) { +-#ifdef __WIN__ ++ if (UNIV_UNLIKELY(wx_sg)) { + os_event_set(lock->wait_ex_event); +-#endif +- os_event_set(lock->event); ++ sync_array_object_signalled(sync_primary_wait_array); ++ } else if (UNIV_UNLIKELY(x_sg)) { ++ os_event_set(lock->x_event); + sync_array_object_signalled(sync_primary_wait_array); + } + +@@ -409,13 +578,22 @@ + + ut_ad(lock->reader_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_sub_and_fetch(&(lock->reader_count),1); ++#else + lock->reader_count--; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED); + #endif + ++#ifdef HAVE_ATOMIC_BUILTINS ++ ut_ad(!lock->s_waiters); ++ ut_ad(!lock->x_waiters); ++#else + ut_ad(!lock->waiters); ++#endif + ut_ad(rw_lock_validate(lock)); + #ifdef UNIV_SYNC_PERF_STAT + rw_s_exit_count++; +@@ -435,41 +613,81 @@ + #endif + ) + { +- ibool sg = FALSE; ++#ifdef HAVE_ATOMIC_BUILTINS ++ ibool last = FALSE; ++#endif ++ ibool s_sg = FALSE; ++ ibool x_sg = FALSE; + ++#ifndef HAVE_ATOMIC_BUILTINS + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter(&(lock->mutex)); ++#endif + + /* Reset the exclusive lock if this thread no longer has an x-mode + lock */ + + ut_ad(lock->writer_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) { ++ last = TRUE; ++ } ++ ++ if (last) { ++ /* unlock lock_word */ ++ __sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS); ++ ++ /* FIXME: It is a value of bad manners for pthread. ++ But we shouldn't keep an ID of not-owner. */ ++ lock->writer_thread = -1; ++ ++ /* atomic operation may be safer about memory order. */ ++ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); ++ __sync_synchronize(); ++ } ++#else + lock->writer_count--; + + if (lock->writer_count == 0) { + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + } ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX); + #endif + + /* If there may be waiters, signal the lock */ +- if (UNIV_UNLIKELY(lock->waiters) +- && lock->writer_count == 0) { +- +- sg = TRUE; +- rw_lock_set_waiters(lock, 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (last) { ++#else ++ if (lock->writer_count == 0) { ++#endif ++ if(lock->s_waiters){ ++ s_sg = TRUE; ++ rw_lock_set_s_waiters(lock, 0); ++ } ++ if(lock->x_waiters){ ++ x_sg = TRUE; ++ rw_lock_set_x_waiters(lock, 0); ++ } + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + +- if (UNIV_UNLIKELY(sg)) { ++ if (UNIV_UNLIKELY(s_sg)) { ++ os_event_set(lock->s_event); ++ sync_array_object_signalled(sync_primary_wait_array); ++ } ++ if (UNIV_UNLIKELY(x_sg)) { + #ifdef __WIN__ ++ /* I doubt the necessity of it. */ + os_event_set(lock->wait_ex_event); + #endif +- os_event_set(lock->event); ++ os_event_set(lock->x_event); + sync_array_object_signalled(sync_primary_wait_array); + } + +@@ -494,9 +712,13 @@ + + ut_ad(lock->writer_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) { ++#else + lock->writer_count--; + + if (lock->writer_count == 0) { ++#endif + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + } + +@@ -504,7 +726,12 @@ + rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX); + #endif + ++#ifdef HAVE_ATOMIC_BUILTINS ++ ut_ad(!lock->s_waiters); ++ ut_ad(!lock->x_waiters); ++#else + ut_ad(!lock->waiters); ++#endif + ut_ad(rw_lock_validate(lock)); + + #ifdef UNIV_SYNC_PERF_STAT +diff -r 962aec0d731c innobase/sync/sync0arr.c +--- a/innobase/sync/sync0arr.c Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/sync/sync0arr.c Thu Oct 09 08:30:28 2008 -0700 +@@ -309,13 +309,13 @@ + { + if (type == SYNC_MUTEX) { + return(os_event_reset(((mutex_t *) object)->event)); +-#ifdef __WIN__ + } else if (type == RW_LOCK_WAIT_EX) { + return(os_event_reset( + ((rw_lock_t *) object)->wait_ex_event)); +-#endif +- } else { +- return(os_event_reset(((rw_lock_t *) object)->event)); ++ } else if (type == RW_LOCK_SHARED) { ++ return(os_event_reset(((rw_lock_t *) object)->s_event)); ++ } else { /* RW_LOCK_EX */ ++ return(os_event_reset(((rw_lock_t *) object)->x_event)); + } + } + +@@ -415,15 +415,12 @@ + + if (cell->request_type == SYNC_MUTEX) { + event = ((mutex_t*) cell->wait_object)->event; +-#ifdef __WIN__ +- /* On windows if the thread about to wait is the one which +- has set the state of the rw_lock to RW_LOCK_WAIT_EX, then +- it waits on a special event i.e.: wait_ex_event. */ + } else if (cell->request_type == RW_LOCK_WAIT_EX) { + event = ((rw_lock_t*) cell->wait_object)->wait_ex_event; +-#endif +- } else { +- event = ((rw_lock_t*) cell->wait_object)->event; ++ } else if (cell->request_type == RW_LOCK_SHARED) { ++ event = ((rw_lock_t*) cell->wait_object)->s_event; ++ } else { ++ event = ((rw_lock_t*) cell->wait_object)->x_event; + } + + cell->waiting = TRUE; +@@ -464,6 +461,7 @@ + mutex_t* mutex; + rw_lock_t* rwlock; + ulint type; ++ ulint writer; + + type = cell->request_type; + +@@ -492,12 +490,10 @@ + (ulong) mutex->waiters); + + } else if (type == RW_LOCK_EX +-#ifdef __WIN__ + || type == RW_LOCK_WAIT_EX +-#endif + || type == RW_LOCK_SHARED) { + +- fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file); ++ fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file); + + rwlock = cell->old_wait_rw_lock; + +@@ -505,21 +501,23 @@ + " RW-latch at %p created in file %s line %lu\n", + rwlock, rwlock->cfile_name, + (ulong) rwlock->cline); +- if (rwlock->writer != RW_LOCK_NOT_LOCKED) { ++ writer = rw_lock_get_writer(rwlock); ++ if (writer != RW_LOCK_NOT_LOCKED) { + fprintf(file, + "a writer (thread id %lu) has reserved it in mode %s", + (ulong) os_thread_pf(rwlock->writer_thread), +- rwlock->writer == RW_LOCK_EX ++ writer == RW_LOCK_EX + ? " exclusive\n" + : " wait exclusive\n"); + } + + fprintf(file, +- "number of readers %lu, waiters flag %lu\n" ++ "number of readers %lu, s_waiters flag %lu, x_waiters flag %lu\n" + "Last time read locked in file %s line %lu\n" + "Last time write locked in file %s line %lu\n", + (ulong) rwlock->reader_count, +- (ulong) rwlock->waiters, ++ (ulong) rwlock->s_waiters, ++ (ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters), + rwlock->last_s_file_name, + (ulong) rwlock->last_s_line, + rwlock->last_x_file_name, +@@ -839,11 +837,15 @@ + /*========================*/ + sync_array_t* arr) /* in: wait array */ + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(arr->sg_count),1); ++#else + sync_array_enter(arr); + + arr->sg_count++; + + sync_array_exit(arr); ++#endif + } + + /************************************************************************** +@@ -880,19 +882,23 @@ + + mutex = cell->wait_object; + os_event_set(mutex->event); +-#ifdef __WIN__ + } else if (cell->request_type + == RW_LOCK_WAIT_EX) { + rw_lock_t* lock; + + lock = cell->wait_object; + os_event_set(lock->wait_ex_event); +-#endif +- } else { ++ } else if (cell->request_type ++ == RW_LOCK_SHARED) { + rw_lock_t* lock; + + lock = cell->wait_object; +- os_event_set(lock->event); ++ os_event_set(lock->s_event); ++ } else { ++ rw_lock_t* lock; ++ ++ lock = cell->wait_object; ++ os_event_set(lock->x_event); + } + } + } +diff -r 962aec0d731c innobase/sync/sync0rw.c +--- a/innobase/sync/sync0rw.c Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/sync/sync0rw.c Thu Oct 09 08:30:28 2008 -0700 +@@ -99,6 +99,7 @@ + object is created, then the following call initializes + the sync system. */ + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_create(rw_lock_get_mutex(lock)); + mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK); + +@@ -108,8 +109,14 @@ + lock->mutex.cmutex_name = cmutex_name; + lock->mutex.mutex_type = 1; + #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ ++#endif /* !HAVE_ATOMIC_BUILTINS */ + +- rw_lock_set_waiters(lock, 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ lock->lock_word = RW_LOCK_BIAS; ++#endif ++ rw_lock_set_s_waiters(lock, 0); ++ rw_lock_set_x_waiters(lock, 0); ++ rw_lock_set_wx_waiters(lock, 0); + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + lock->writer_count = 0; + rw_lock_set_reader_count(lock, 0); +@@ -130,11 +137,9 @@ + lock->last_x_file_name = "not yet reserved"; + lock->last_s_line = 0; + lock->last_x_line = 0; +- lock->event = os_event_create(NULL); +- +-#ifdef __WIN__ ++ lock->s_event = os_event_create(NULL); ++ lock->x_event = os_event_create(NULL); + lock->wait_ex_event = os_event_create(NULL); +-#endif + + mutex_enter(&rw_lock_list_mutex); + +@@ -162,19 +167,21 @@ + ut_a(rw_lock_validate(lock)); + #endif /* UNIV_DEBUG */ + ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); +- ut_a(rw_lock_get_waiters(lock) == 0); ++ ut_a(rw_lock_get_s_waiters(lock) == 0); ++ ut_a(rw_lock_get_x_waiters(lock) == 0); ++ ut_a(rw_lock_get_wx_waiters(lock) == 0); + ut_a(rw_lock_get_reader_count(lock) == 0); + + lock->magic_n = 0; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_free(rw_lock_get_mutex(lock)); ++#endif + + mutex_enter(&rw_lock_list_mutex); +- os_event_free(lock->event); +- +-#ifdef __WIN__ ++ os_event_free(lock->s_event); ++ os_event_free(lock->x_event); + os_event_free(lock->wait_ex_event); +-#endif + + if (UT_LIST_GET_PREV(list, lock)) { + ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); +@@ -192,6 +199,8 @@ + Checks that the rw-lock has been initialized and that there are no + simultaneous shared and exclusive locks. */ + ++/* MEMO: If HAVE_ATOMIC_BUILTINS, we should use this function statically. */ ++ + ibool + rw_lock_validate( + /*=============*/ +@@ -199,7 +208,9 @@ + { + ut_a(lock); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + ut_a(lock->magic_n == RW_LOCK_MAGIC_N); + ut_a((rw_lock_get_reader_count(lock) == 0) +@@ -207,11 +218,17 @@ + ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX) + || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX) + || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)); +- ut_a((rw_lock_get_waiters(lock) == 0) +- || (rw_lock_get_waiters(lock) == 1)); ++ ut_a((rw_lock_get_s_waiters(lock) == 0) ++ || (rw_lock_get_s_waiters(lock) == 1)); ++ ut_a((rw_lock_get_x_waiters(lock) == 0) ++ || (rw_lock_get_x_waiters(lock) == 1)); ++ ut_a((rw_lock_get_wx_waiters(lock) == 0) ++ || (rw_lock_get_wx_waiters(lock) == 1)); + ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return(TRUE); + } +@@ -237,13 +254,14 @@ + ut_ad(rw_lock_validate(lock)); + + lock_loop: ++ i = 0; ++spin_loop: + rw_s_spin_wait_count++; + + /* Spin waiting for the writer field to become free */ +- i = 0; + +- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED +- && i < SYNC_SPIN_ROUNDS) { ++ while (i < SYNC_SPIN_ROUNDS ++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); + } +@@ -262,15 +280,27 @@ + lock->cfile_name, (ulong) lock->cline, (ulong) i); + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + /* We try once again to obtain the lock */ + + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Success */ + } else { ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ i++; ++ ++ if (i < SYNC_SPIN_ROUNDS) { ++ goto spin_loop; ++ } ++#endif + /* If we get here, locking did not succeed, we may + suspend the thread to wait in the wait array */ + +@@ -281,9 +311,26 @@ + file_name, line, + &index); + +- rw_lock_set_waiters(lock, 1); ++ rw_lock_set_s_waiters(lock, 1); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ for (i = 0; i < 4; i++) { ++ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { ++ sync_array_free_cell(sync_primary_wait_array, index); ++ return; /* Success */ ++ } ++ } ++ ++ /* If wait_ex_waiter stalls, wakes it. */ ++ if (lock->wait_ex_waiters && lock->lock_word == RW_LOCK_BIAS) { ++ rw_lock_set_wx_waiters(lock, 0); ++ os_event_set(lock->wait_ex_event); ++ sync_array_object_signalled(sync_primary_wait_array); ++ } ++#else + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + if (srv_print_latch_waits) { + fprintf(stderr, +@@ -318,13 +358,19 @@ + { + ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + lock->writer_thread = os_thread_get_curr_id(); + + lock->pass = 0; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#else ++ __sync_synchronize(); ++#endif + } + + /********************************************************************** +@@ -342,6 +388,89 @@ + const char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ os_thread_id_t curr_thread = os_thread_get_curr_id(); ++ ++ /* try to lock writer */ ++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX) ++ == RW_LOCK_NOT_LOCKED) { ++ /* success */ ++ /* obtain RW_LOCK_WAIT_EX right */ ++ lock->writer_thread = curr_thread; ++ lock->pass = pass; ++ lock->writer_is_wait_ex = TRUE; ++ /* atomic operation may be safer about memory order. */ ++ __sync_synchronize(); ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX, ++ file_name, line); ++#endif ++ } ++ ++ if (!os_thread_eq(lock->writer_thread, curr_thread)) { ++ return(RW_LOCK_NOT_LOCKED); ++ } ++ ++ switch(rw_lock_get_writer(lock)) { ++ case RW_LOCK_WAIT_EX: ++ /* have right to try x-lock */ ++retry_x_lock: ++ /* try x-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word), ++ RW_LOCK_BIAS) == 0) { ++ /* success */ ++ lock->pass = pass; ++ lock->writer_is_wait_ex = FALSE; ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX); ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, ++ file_name, line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ /* Locking succeeded, we may return */ ++ return(RW_LOCK_EX); ++ } else if(__sync_fetch_and_add(&(lock->lock_word), ++ RW_LOCK_BIAS) == 0) { ++ /* retry x-lock */ ++ goto retry_x_lock; ++ } ++ ++ /* There are readers, we have to wait */ ++ return(RW_LOCK_WAIT_EX); ++ ++ break; ++ ++ case RW_LOCK_EX: ++ /* already have x-lock */ ++ if ((lock->pass == 0)&&(pass == 0)) { ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, ++ line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ /* Locking succeeded, we may return */ ++ return(RW_LOCK_EX); ++ } ++ ++ return(RW_LOCK_NOT_LOCKED); ++ ++ break; ++ ++ default: /* ??? */ ++ return(RW_LOCK_NOT_LOCKED); ++ } ++#else /* HAVE_ATOMIC_BUILTINS */ ++ + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(rw_lock_get_mutex(lock))); + #endif /* UNIV_SYNC_DEBUG */ +@@ -423,6 +552,7 @@ + /* Locking succeeded, we may return */ + return(RW_LOCK_EX); + } ++#endif /* HAVE_ATOMIC_BUILTINS */ + + /* Locking did not succeed */ + return(RW_LOCK_NOT_LOCKED); +@@ -448,19 +578,33 @@ + ulint line) /* in: line where requested */ + { + ulint index; /* index of the reserved wait cell */ +- ulint state; /* lock state acquired */ ++ ulint state = RW_LOCK_NOT_LOCKED; /* lock state acquired */ ++#ifdef HAVE_ATOMIC_BUILTINS ++ ulint prev_state = RW_LOCK_NOT_LOCKED; ++#endif + ulint i; /* spin round count */ + + ut_ad(rw_lock_validate(lock)); + + lock_loop: ++ i = 0; ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ prev_state = state; ++#else + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter_fast(&(lock->mutex)); ++#endif + + state = rw_lock_x_lock_low(lock, pass, file_name, line); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (state != prev_state) i=0; /* if progress, reset counter. */ ++#else + mutex_exit(&(lock->mutex)); ++#endif + ++spin_loop: + if (state == RW_LOCK_EX) { + + return; /* Locking succeeded */ +@@ -468,10 +612,9 @@ + } else if (state == RW_LOCK_NOT_LOCKED) { + + /* Spin waiting for the writer field to become free */ +- i = 0; + +- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED +- && i < SYNC_SPIN_ROUNDS) { ++ while (i < SYNC_SPIN_ROUNDS ++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, + srv_spin_wait_delay)); +@@ -485,9 +628,12 @@ + } else if (state == RW_LOCK_WAIT_EX) { + + /* Spin waiting for the reader count field to become zero */ +- i = 0; + ++#ifdef HAVE_ATOMIC_BUILTINS ++ while (lock->lock_word != RW_LOCK_BIAS ++#else + while (rw_lock_get_reader_count(lock) != 0 ++#endif + && i < SYNC_SPIN_ROUNDS) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, +@@ -500,7 +646,6 @@ + os_thread_yield(); + } + } else { +- i = 0; /* Eliminate a compiler warning */ + ut_error; + } + +@@ -516,34 +661,69 @@ + /* We try once again to obtain the lock. Acquire the mutex protecting + the rw-lock fields */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++ prev_state = state; ++#else + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + state = rw_lock_x_lock_low(lock, pass, file_name, line); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (state != prev_state) i=0; /* if progress, reset counter. */ ++#endif ++ + if (state == RW_LOCK_EX) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Locking succeeded */ + } ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ i++; ++ ++ if (i < SYNC_SPIN_ROUNDS) { ++ goto spin_loop; ++ } ++#endif + + rw_x_system_call_count++; + + sync_array_reserve_cell(sync_primary_wait_array, + lock, +-#ifdef __WIN__ +- /* On windows RW_LOCK_WAIT_EX signifies +- that this thread should wait on the +- special wait_ex_event. */ + (state == RW_LOCK_WAIT_EX) + ? RW_LOCK_WAIT_EX : +-#endif + RW_LOCK_EX, + file_name, line, + &index); + +- rw_lock_set_waiters(lock, 1); ++ if (state == RW_LOCK_WAIT_EX) { ++ rw_lock_set_wx_waiters(lock, 1); ++ } else { ++ rw_lock_set_x_waiters(lock, 1); ++ } + ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ for (i = 0; i < 4; i++) { ++ prev_state = state; ++ state = rw_lock_x_lock_low(lock, pass, file_name, line); ++ if (state == RW_LOCK_EX) { ++ sync_array_free_cell(sync_primary_wait_array, index); ++ return; /* Locking succeeded */ ++ } ++ if (state != prev_state) { ++ /* retry! */ ++ sync_array_free_cell(sync_primary_wait_array, index); ++ goto lock_loop; ++ } ++ } ++#else + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + if (srv_print_latch_waits) { + fprintf(stderr, +@@ -718,7 +898,9 @@ + ut_ad(lock); + ut_ad(rw_lock_validate(lock)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + info = UT_LIST_GET_FIRST(lock->debug_list); + +@@ -728,7 +910,9 @@ + && (info->pass == 0) + && (info->lock_type == lock_type)) { + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + /* Found! */ + + return(TRUE); +@@ -736,7 +920,9 @@ + + info = UT_LIST_GET_NEXT(list, info); + } ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + + return(FALSE); + } +@@ -758,21 +944,25 @@ + ut_ad(lock); + ut_ad(rw_lock_validate(lock)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + if (lock_type == RW_LOCK_SHARED) { + if (lock->reader_count > 0) { + ret = TRUE; + } + } else if (lock_type == RW_LOCK_EX) { +- if (lock->writer == RW_LOCK_EX) { ++ if (rw_lock_get_writer(lock) == RW_LOCK_EX) { + ret = TRUE; + } + } else { + ut_error; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + + return(ret); + } +@@ -801,16 +991,26 @@ + + count++; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0) +- || (rw_lock_get_waiters(lock) != 0)) { ++ || (rw_lock_get_s_waiters(lock) != 0) ++ || (rw_lock_get_x_waiters(lock) != 0) ++ || (rw_lock_get_wx_waiters(lock) != 0)) { + + fprintf(stderr, "RW-LOCK: %p ", lock); + +- if (rw_lock_get_waiters(lock)) { +- fputs(" Waiters for the lock exist\n", stderr); ++ if (rw_lock_get_s_waiters(lock)) { ++ fputs(" s_waiters for the lock exist,", stderr); ++ } ++ if (rw_lock_get_x_waiters(lock)) { ++ fputs(" x_waiters for the lock exist\n", stderr); ++ } ++ if (rw_lock_get_wx_waiters(lock)) { ++ fputs(" wait_ex_waiters for the lock exist\n", stderr); + } else { + putc('\n', stderr); + } +@@ -822,7 +1022,9 @@ + } + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + lock = UT_LIST_GET_NEXT(list, lock); + } + +@@ -847,10 +1049,18 @@ + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0) +- || (rw_lock_get_waiters(lock) != 0)) { ++ || (rw_lock_get_s_waiters(lock) != 0) ++ || (rw_lock_get_x_waiters(lock) != 0) ++ || (rw_lock_get_wx_waiters(lock) != 0)) { + +- if (rw_lock_get_waiters(lock)) { +- fputs(" Waiters for the lock exist\n", stderr); ++ if (rw_lock_get_s_waiters(lock)) { ++ fputs(" s_waiters for the lock exist,", stderr); ++ } ++ if (rw_lock_get_x_waiters(lock)) { ++ fputs(" x_waiters for the lock exist\n", stderr); ++ } ++ if (rw_lock_get_wx_waiters(lock)) { ++ fputs(" wait_ex_waiters for the lock exist\n", stderr); + } else { + putc('\n', stderr); + } +@@ -909,14 +1119,18 @@ + lock = UT_LIST_GET_FIRST(rw_lock_list); + + while (lock != NULL) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0)) { + count++; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + lock = UT_LIST_GET_NEXT(list, lock); + } + +diff -r 962aec0d731c patch_info/innodb_rw_lock.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_rw_lock.info Thu Oct 09 08:30:28 2008 -0700 +@@ -0,0 +1,6 @@ ++File=innodb_rw_lock.patch ++Name=Fix of InnoDB rw_locks ++Version=1.0 ++Author=Yasufumi Kinoshita ++License=BSD ++Comment= diff --git a/percona/5.0.77-b13/innodb_show_bp.patch b/percona/5.0.77-b13/innodb_show_bp.patch new file mode 100644 index 0000000..a56ae9a --- /dev/null +++ b/percona/5.0.77-b13/innodb_show_bp.patch @@ -0,0 +1,447 @@ +diff -r fe944d2c6e1f innobase/btr/btr0btr.c +--- a/innobase/btr/btr0btr.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/btr/btr0btr.c Mon Nov 10 19:48:24 2008 -0800 +@@ -2989,3 +2989,11 @@ + + return(TRUE); + } ++ ++dulint ++btr_page_get_index_id_noninline( ++/*============*/ ++ page_t* page) /* in: index page */ ++{ ++ return btr_page_get_index_id(page); ++} +diff -r fe944d2c6e1f innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/buf/buf0buf.c Mon Nov 10 19:48:24 2008 -0800 +@@ -2629,3 +2629,13 @@ + buf_block_print(block); + } + ++buf_block_t* ++buf_pool_get_nth_block_no_inline( ++/*===================*/ ++ /* out: pointer to block */ ++ buf_pool_t* buf_pool,/* in: buf_pool */ ++ ulint i) /* in: index of the block */{ ++ ++return buf_pool_get_nth_block(buf_pool, i); ++ ++} +diff -r fe944d2c6e1f innobase/include/btr0btr.h +--- a/innobase/include/btr0btr.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/btr0btr.h Mon Nov 10 19:48:24 2008 -0800 +@@ -69,6 +69,12 @@ + UNIV_INLINE + dulint + btr_page_get_index_id( ++/*==================*/ ++ /* out: index id */ ++ page_t* page); /* in: index page */ ++ ++dulint ++btr_page_get_index_id_noninline( + /*==================*/ + /* out: index id */ + page_t* page); /* in: index page */ +diff -r fe944d2c6e1f innobase/include/buf0buf.h +--- a/innobase/include/buf0buf.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/buf0buf.h Mon Nov 10 19:48:24 2008 -0800 +@@ -703,6 +703,8 @@ + buf_get_free_list_len(void); + /*=======================*/ + ++void buf_pool_dump(void); ++buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i); + + + /* The buffer control block structure */ +diff -r fe944d2c6e1f innobase/include/page0page.h +--- a/innobase/include/page0page.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/page0page.h Mon Nov 10 19:48:24 2008 -0800 +@@ -260,6 +260,12 @@ + /*============*/ + /* out: number of user records */ + page_t* page); /* in: index page */ ++ ++ulint ++page_get_n_recs_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page); /* in: index page */ + /******************************************************************* + Returns the number of records before the given record in chain. + The number includes infimum and supremum records. */ +@@ -519,6 +525,12 @@ + UNIV_INLINE + ulint + page_get_data_size( ++/*===============*/ ++ /* out: data in bytes */ ++ page_t* page); /* in: index page */ ++ ++ulint ++page_get_data_size_noninline( + /*===============*/ + /* out: data in bytes */ + page_t* page); /* in: index page */ +diff -r fe944d2c6e1f innobase/page/page0page.c +--- a/innobase/page/page0page.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/page/page0page.c Mon Nov 10 19:48:24 2008 -0800 +@@ -1994,3 +1994,25 @@ + page_cur_move_to_next(&cur); + } + } ++ ++ulint ++page_get_n_recs_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page) /* in: index page */ ++{ ++ return page_get_n_recs(page); ++} ++ ++ ++ulint ++page_get_data_size_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page) /* in: index page */ ++{ ++ return page_get_data_size(page); ++} ++ ++ ++ +diff -r fe944d2c6e1f mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/information_schema.result Mon Nov 10 19:48:25 2008 -0800 +@@ -42,6 +42,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INNODB_BUFFER_POOL_CONTENT + INDEX_STATISTICS + KEY_COLUMN_USAGE + PROCESSLIST +@@ -741,7 +742,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-107 ++108 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -802,6 +803,7 @@ + TABLE_NAME COLUMN_NAME PRIVILEGES + COLUMNS TABLE_NAME select + COLUMN_PRIVILEGES TABLE_NAME select ++INNODB_BUFFER_POOL_CONTENT TABLE_NAME select + INDEX_STATISTICS TABLE_NAME select + KEY_COLUMN_USAGE TABLE_NAME select + STATISTICS TABLE_NAME select +@@ -815,7 +817,7 @@ + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 22 ++information_schema 23 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1206,6 +1208,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INNODB_BUFFER_POOL_CONTENT TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROCESSLIST ID +@@ -1243,6 +1246,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INNODB_BUFFER_POOL_CONTENT TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROCESSLIST ID +@@ -1332,6 +1336,7 @@ + COLUMNS information_schema.COLUMNS 1 + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 ++INNODB_BUFFER_POOL_CONTENT information_schema.INNODB_BUFFER_POOL_CONTENT 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 + PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 +diff -r fe944d2c6e1f mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Mon Nov 10 19:48:25 2008 -0800 +@@ -11,6 +11,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INNODB_BUFFER_POOL_CONTENT + INDEX_STATISTICS + KEY_COLUMN_USAGE + PROCESSLIST +diff -r fe944d2c6e1f mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Mon Nov 10 19:48:25 2008 -0800 +@@ -85,6 +85,7 @@ + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INNODB_BUFFER_POOL_CONTENT | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROCESSLIST | +@@ -112,6 +113,7 @@ + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INNODB_BUFFER_POOL_CONTENT | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROCESSLIST | +diff -r fe944d2c6e1f patch_info/innodb_show_bp.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_show_bp.info Mon Nov 10 19:48:25 2008 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_show_bp.patch ++Name=show innodb buffer pool content ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment= +diff -r fe944d2c6e1f sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -128,10 +128,12 @@ + #include "../innobase/include/lock0lock.h" + #include "../innobase/include/dict0crea.h" + #include "../innobase/include/btr0cur.h" ++#include "../innobase/include/buf0buf.h" + #include "../innobase/include/btr0btr.h" + #include "../innobase/include/fsp0fsp.h" + #include "../innobase/include/sync0sync.h" + #include "../innobase/include/fil0fil.h" ++#include "../innobase/include/page0page.h" + #include "../innobase/include/trx0xa.h" + } + +@@ -6483,6 +6485,116 @@ + DBUG_RETURN(FALSE); + } + ++bool ++innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables) ++{ ++ ulint size; ++ ulint i; ++ dulint id; ++ ulint n_found; ++ buf_frame_t* frame; ++ dict_index_t* index; ++ buf_block_t* block; ++ ++ char *p; ++ char db_name_raw[NAME_LEN*5+1]; ++ char table_name_raw[NAME_LEN*5+1]; ++ ++ DBUG_ENTER("innodb_I_S_buffer_pool_content"); ++ ++ ++ size = buf_pool->curr_size; ++ ++ n_found = 0; ++ ++ TABLE *table= tables->table; ++ ++ ++ //buf_pool_dump(); ++ ++ ++ for (i = 0; i < size; i++) { ++ block = buf_pool_get_nth_block_no_inline(buf_pool, i); ++ frame = block->frame; ++ if (fil_page_get_type(frame)==0) continue; ++ ++ char page_type[64]; ++ ++ switch(fil_page_get_type(frame)) ++ { ++ case FIL_PAGE_INDEX: ++ strcpy(page_type, "index"); ++ break; ++ case FIL_PAGE_UNDO_LOG: ++ strcpy(page_type, "undo_log"); ++ break; ++ case FIL_PAGE_INODE: ++ strcpy(page_type, "inode"); ++ break; ++ case FIL_PAGE_IBUF_FREE_LIST: ++ strcpy(page_type, "ibuf_free_list"); ++ break; ++ default: ++ sprintf(page_type, "unknown", fil_page_get_type(frame)); ++ } ++ ++ table->field[0]->store((longlong)i, TRUE); ++ table->field[1]->store((longlong)block->space, TRUE); ++ table->field[2]->store((longlong)block->offset, TRUE); ++ table->field[3]->store((longlong)page_get_n_recs_noninline(block->frame), TRUE); ++ table->field[4]->store( ( fil_page_get_type(frame) == FIL_PAGE_INDEX ) ? (longlong)page_get_data_size_noninline(block->frame):0, TRUE); ++ table->field[5]->store((longlong)block->flush_type, TRUE); ++ table->field[6]->store((longlong)block->buf_fix_count, TRUE); ++ table->field[7]->store((longlong)block->LRU_position, TRUE); ++ table->field[8]->store((longlong)fil_page_get_type(frame), TRUE); ++ ++ table->field[9]->store(page_type, strlen(page_type), system_charset_info); ++ ++ //fprintf(stderr, "block N %d, space %d, offset %d, records %d, datasize %d, page_type %s, flush_type %d, buf_fix_count %d, LRU_position %d", i, block->space, block->offset, page_get_n_recs_noninline(block->frame), page_get_data_size_noninline(block->frame), page_type,block->flush_type, block->buf_fix_count, block->LRU_position); ++ ++ // flush_type, buf_fix_count, LRU_position ++ ++ if (fil_page_get_type(frame) == FIL_PAGE_INDEX) { ++ ++ id = btr_page_get_index_id_noninline(frame); ++ index = dict_index_get_if_in_cache(id); ++ if (index) { ++ table->field[10]->store(index->name, strlen(index->name), system_charset_info); ++ // fprintf(stderr, " index %s, table %s", index->name, index->table_name); ++ ++ if((p = strchr(index->table_name, '/'))) ++ { ++ strncpy(db_name_raw, index->table_name, p-index->table_name); ++ db_name_raw[p-index->table_name] = 0; ++ table->field[11]->store(db_name_raw, strlen(db_name_raw), system_charset_info); ++ p++; ++ } else { ++ table->field[11]->store(NULL, 0, system_charset_info); ++ p = (char *)index->table_name; ++ } ++ strcpy(table_name_raw, p); ++ ++ table->field[12]->store(table_name_raw, strlen(table_name_raw), system_charset_info); ++ } else { ++ table->field[10]->store(NULL, 0, system_charset_info); ++ table->field[11]->store(NULL, 0, system_charset_info); ++ table->field[12]->store(NULL, 0, system_charset_info); ++ } ++ }else{ ++ table->field[10]->store(NULL, 0, system_charset_info); ++ table->field[11]->store(NULL, 0, system_charset_info); ++ table->field[12]->store(NULL, 0, system_charset_info); ++ } ++ //fprintf(stderr, "\n"); ++ if (schema_table_store_record(thd, table)) ++ { ++ DBUG_RETURN(1); ++ } ++ } ++ ++ DBUG_RETURN(0); ++} ++ + /**************************************************************************** + Implements the SHOW MUTEX STATUS command. . */ + +diff -r fe944d2c6e1f sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/ha_innodb.h Mon Nov 10 19:48:25 2008 -0800 +@@ -263,6 +263,7 @@ + + int innobase_drop_database(char *path); + bool innodb_show_status(THD* thd); ++bool innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables); + bool innodb_mutex_show_status(THD* thd); + void innodb_export_status(void); + +diff -r fe944d2c6e1f sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/sql_parse.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -2926,6 +2926,7 @@ + case SCH_COLUMN_PRIVILEGES: + case SCH_TABLE_CONSTRAINTS: + case SCH_KEY_COLUMN_USAGE: ++ case SCH_INNODB_I_S_BUFFER_POOL_CONTENT: + default: + break; + } +diff -r fe944d2c6e1f sql/sql_show.cc +--- a/sql/sql_show.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/sql_show.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -27,6 +27,10 @@ + + #ifdef HAVE_BERKELEY_DB + #include "ha_berkeley.h" // For berkeley_show_logs ++#endif ++ ++#ifdef HAVE_INNOBASE_DB ++#include "ha_innodb.h" + #endif + + #ifndef NO_EMBEDDED_ACCESS_CHECKS +@@ -4042,6 +4046,13 @@ + DBUG_RETURN(res); + } + ++int fill_innodb_bp_content(THD *thd, TABLE_LIST *tables, COND *cond) ++{ ++ DBUG_ENTER("fill_innodb_bp_content"); ++ int res= 0; ++ innodb_I_S_buffer_pool_content(thd, tables); ++ DBUG_RETURN(res); ++} + + /* + Find schema_tables elment by name +@@ -4951,6 +4962,24 @@ + }; + + ++ST_FIELD_INFO innodb_bp_content_fields_info[]= ++{ ++ {"BLOCK_NUM", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Block_num"}, ++ {"SPACE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Space"}, ++ {"OFFSET", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Offset"}, ++ {"RECORDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Records"}, ++ {"DATASIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Datasize"}, ++ {"FLUSH_TYPE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Flush_type"}, ++ {"FIX_COUNT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Fix_count"}, ++ {"LRU_POSITION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "LRU_position"}, ++ {"PAGE_TYPE_ID", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Page_type_id"}, ++ {"PAGE_TYPE", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Page_type"}, ++ {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name"}, ++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schem"}, ++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ + /* + Description of ST_FIELD_INFO in table.h + */ +@@ -4969,6 +4998,8 @@ + get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0}, + {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table, + fill_schema_column_privileges, 0, 0, -1, -1, 0}, ++ {"INNODB_BUFFER_POOL_CONTENT", innodb_bp_content_fields_info, create_schema_table, ++ fill_innodb_bp_content, 0, 0, -1, -1, 0}, + {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table, + fill_schema_index_stats, make_old_format, 0, -1, -1, 0}, + {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table, +diff -r fe944d2c6e1f sql/table.h +--- a/sql/table.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/table.h Mon Nov 10 19:48:25 2008 -0800 +@@ -375,6 +375,7 @@ + SCH_COLLATION_CHARACTER_SET_APPLICABILITY, + SCH_COLUMNS, + SCH_COLUMN_PRIVILEGES, ++ SCH_INNODB_I_S_BUFFER_POOL_CONTENT, + SCH_INDEX_STATS, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, diff --git a/percona/5.0.77-b13/innodb_show_hashed_memory.patch b/percona/5.0.77-b13/innodb_show_hashed_memory.patch new file mode 100644 index 0000000..191193e --- /dev/null +++ b/percona/5.0.77-b13/innodb_show_hashed_memory.patch @@ -0,0 +1,275 @@ +diff -ruN mysql-5.0.67_highperf/innobase/buf/buf0buf.c mysql-5.0.67_highperf_tmp/innobase/buf/buf0buf.c +--- mysql-5.0.67_highperf/innobase/buf/buf0buf.c 2008-11-12 09:25:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/buf/buf0buf.c 2008-11-12 09:27:52.000000000 +0900 +@@ -2454,13 +2454,15 @@ + (ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped)); + } + fprintf(file, +- "Buffer pool size %lu\n" +- "Free buffers %lu\n" +- "Database pages %lu\n" +- "Modified db pages %lu\n" ++ "Buffer pool size %lu\n" ++ "Buffer pool size, bytes %lu\n" ++ "Free buffers %lu\n" ++ "Database pages %lu\n" ++ "Modified db pages %lu\n" + "Pending reads %lu\n" + "Pending writes: LRU %lu, flush list %lu, single page %lu\n", + (ulong) size, ++ (ulong) size * UNIV_PAGE_SIZE, + (ulong) UT_LIST_GET_LEN(buf_pool->free), + (ulong) UT_LIST_GET_LEN(buf_pool->LRU), + (ulong) UT_LIST_GET_LEN(buf_pool->flush_list), +diff -ruN mysql-5.0.67_highperf/innobase/fil/fil0fil.c mysql-5.0.67_highperf_tmp/innobase/fil/fil0fil.c +--- mysql-5.0.67_highperf/innobase/fil/fil0fil.c 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/fil/fil0fil.c 2008-11-12 09:27:52.000000000 +0900 +@@ -4472,3 +4472,30 @@ + + return(mach_read_from_2(page + FIL_PAGE_TYPE)); + } ++ ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++fil_system_hash_cells(void) ++/*=======================*/ ++{ ++ if (fil_system) { ++ return (fil_system->spaces->n_cells ++ + fil_system->name_hash->n_cells); ++ } else { ++ return 0; ++ } ++} ++ ++ulint ++fil_system_hash_nodes(void) ++/*=======================*/ ++{ ++ if (fil_system) { ++ return (UT_LIST_GET_LEN(fil_system->space_list) ++ * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE)); ++ } else { ++ return 0; ++ } ++} +diff -ruN mysql-5.0.67_highperf/innobase/include/fil0fil.h mysql-5.0.67_highperf_tmp/innobase/include/fil0fil.h +--- mysql-5.0.67_highperf/innobase/include/fil0fil.h 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/include/fil0fil.h 2008-11-12 09:27:52.000000000 +0900 +@@ -701,6 +701,16 @@ + written to page, the return value not defined */ + byte* page); /* in: file page */ + ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++fil_system_hash_cells(void); ++/*========================*/ ++ ++ulint ++fil_system_hash_nodes(void); ++/*========================*/ + + typedef struct fil_space_struct fil_space_t; + +diff -ruN mysql-5.0.67_highperf/innobase/include/thr0loc.h mysql-5.0.67_highperf_tmp/innobase/include/thr0loc.h +--- mysql-5.0.67_highperf/innobase/include/thr0loc.h 2008-11-12 09:24:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/include/thr0loc.h 2008-11-12 09:27:52.000000000 +0900 +@@ -77,6 +77,17 @@ + /*=============================*/ + /* out: pointer to the in_ibuf field */ + ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++thr_local_hash_cells(void); ++/*=======================*/ ++ ++ulint ++thr_local_hash_nodes(void); ++/*=======================*/ ++ + #ifndef UNIV_NONINL + #include "thr0loc.ic" + #endif +diff -ruN mysql-5.0.67_highperf/innobase/srv/srv0srv.c mysql-5.0.67_highperf_tmp/innobase/srv/srv0srv.c +--- mysql-5.0.67_highperf/innobase/srv/srv0srv.c 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/srv/srv0srv.c 2008-11-12 09:54:19.000000000 +0900 +@@ -1645,6 +1645,14 @@ + time_t current_time; + ulint n_reserved; + ++ ulint btr_search_sys_subtotal; ++ ulint lock_sys_subtotal; ++ ulint recv_sys_subtotal; ++ ulint io_counter_subtotal; ++ ++ ulint i; ++ trx_t* trx; ++ + mutex_enter(&srv_innodb_monitor_mutex); + + current_time = time(NULL); +@@ -1747,6 +1755,91 @@ + ut_total_allocated_memory, + mem_pool_get_reserved(mem_comm_pool)); + ++ /* Calcurate reserved memories */ ++ if (btr_search_sys && btr_search_sys->hash_index->heap) { ++ btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap); ++ } else { ++ btr_search_sys_subtotal = 0; ++ for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) { ++ btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]); ++ } ++ } ++ ++ lock_sys_subtotal = 0; ++ if (trx_sys) { ++ mutex_enter(&kernel_mutex); ++ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); ++ while (trx) { ++ lock_sys_subtotal += ((trx->lock_heap) ? mem_heap_get_size(trx->lock_heap) : 0); ++ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); ++ } ++ mutex_exit(&kernel_mutex); ++ } ++ ++ recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash) ++ ? mem_heap_get_size(recv_sys->heap) : 0); ++ ++ io_counter_subtotal = ((buf_pool->io_counter_heap) ++ ? mem_heap_get_size(buf_pool->io_counter_heap) : 0); ++ ++ fprintf(file, ++ "Internal hash tables (constant factor + variable factor)\n" ++ " Adaptive hash index %lu \t(%lu + %lu)\n" ++ " Page hash %lu\n" ++ " Dictionary cache %lu \t(%lu + %lu)\n" ++ " File system %lu \t(%lu + %lu)\n" ++ " Lock system %lu \t(%lu + %lu)\n" ++ " Recovery system %lu \t(%lu + %lu)\n" ++ " Threads %lu \t(%lu + %lu)\n" ++ " innodb_io_pattern %lu \t(%lu + %lu)\n", ++ ++ (ulong) (btr_search_sys ++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0) ++ + btr_search_sys_subtotal, ++ (ulong) (btr_search_sys ++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) btr_search_sys_subtotal, ++ ++ (ulong) (buf_pool->page_hash->n_cells * sizeof(hash_cell_t)), ++ ++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells ++ + dict_sys->table_id_hash->n_cells ++ + dict_sys->col_hash->n_cells) * sizeof(hash_cell_t) ++ + dict_sys->size) : 0), ++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells ++ + dict_sys->table_id_hash->n_cells ++ + dict_sys->col_hash->n_cells) * sizeof(hash_cell_t)) : 0), ++ (ulong) (dict_sys ? (dict_sys->size) : 0), ++ ++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t) ++ + fil_system_hash_nodes()), ++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)), ++ (ulong) fil_system_hash_nodes(), ++ ++ (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + lock_sys_subtotal), ++ (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) lock_sys_subtotal, ++ ++ (ulong) (((recv_sys && recv_sys->addr_hash) ++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + recv_sys_subtotal), ++ (ulong) ((recv_sys && recv_sys->addr_hash) ++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) recv_sys_subtotal, ++ ++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t) ++ + thr_local_hash_nodes()), ++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)), ++ (ulong) thr_local_hash_nodes(), ++ ++ (ulong) (((buf_pool->io_counter_hash) /* needs &(buf_pool->mutex) ? */ ++ ? (buf_pool->io_counter_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + io_counter_subtotal), ++ (ulong) ((buf_pool->io_counter_hash) /* needs &(buf_pool->mutex) ? */ ++ ? (buf_pool->io_counter_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) io_counter_subtotal); ++ + if (srv_use_awe) { + fprintf(file, + "In addition to that %lu MB of AWE memory allocated\n", +diff -ruN mysql-5.0.67_highperf/innobase/thr/thr0loc.c mysql-5.0.67_highperf_tmp/innobase/thr/thr0loc.c +--- mysql-5.0.67_highperf/innobase/thr/thr0loc.c 2008-11-12 09:24:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/thr/thr0loc.c 2008-11-12 09:27:52.000000000 +0900 +@@ -32,6 +32,7 @@ + + /* The hash table. The module is not yet initialized when it is NULL. */ + hash_table_t* thr_local_hash = NULL; ++ulint thr_local_hash_n_nodes = 0; + + /* The private data for each thread should be put to + the structure below and the accessor functions written +@@ -223,6 +224,7 @@ + HASH_INSERT(thr_local_t, hash, thr_local_hash, + os_thread_pf(os_thread_get_curr_id()), + local); ++ thr_local_hash_n_nodes++; + + mutex_exit(&thr_local_mutex); + } +@@ -251,6 +253,7 @@ + + HASH_DELETE(thr_local_t, hash, thr_local_hash, + os_thread_pf(id), local); ++ thr_local_hash_n_nodes--; + + mutex_exit(&thr_local_mutex); + +@@ -274,3 +277,29 @@ + mutex_create(&thr_local_mutex); + mutex_set_level(&thr_local_mutex, SYNC_THR_LOCAL); + } ++ ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++thr_local_hash_cells(void) ++/*======================*/ ++{ ++ if (thr_local_hash) { ++ return (thr_local_hash->n_cells); ++ } else { ++ return 0; ++ } ++} ++ ++ulint ++thr_local_hash_nodes(void) ++/*======================*/ ++{ ++ if (thr_local_hash) { ++ return (thr_local_hash_n_nodes ++ * (sizeof(thr_local_t) + MEM_BLOCK_HEADER_SIZE)); ++ } else { ++ return 0; ++ } ++} +diff -ruN mysql-5.0.67_highperf/patch_info/innodb_show_hashed_memory.info mysql-5.0.67_highperf_tmp/patch_info/innodb_show_hashed_memory.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/patch_info/innodb_show_hashed_memory.info 2008-11-12 09:27:52.000000000 +0900 +@@ -0,0 +1,6 @@ ++File=innodb_show_hashed_memory.patch ++Name=Adds additional information of InnoDB internal hash table memories in SHOW INNODB STATUS ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment= diff --git a/percona/5.0.77-b13/microsec_process.patch b/percona/5.0.77-b13/microsec_process.patch new file mode 100644 index 0000000..0e094e5 --- /dev/null +++ b/percona/5.0.77-b13/microsec_process.patch @@ -0,0 +1,281 @@ +diff -r 327ce7a34c91 mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/information_schema.result Fri Nov 07 15:52:53 2008 -0800 +@@ -44,6 +44,7 @@ + COLUMN_PRIVILEGES + INDEX_STATISTICS + KEY_COLUMN_USAGE ++PROCESSLIST + PROFILING + ROUTINES + SCHEMATA +@@ -740,7 +741,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-106 ++107 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -749,6 +750,7 @@ + table_schema table_name column_name + information_schema COLUMNS COLUMN_DEFAULT + information_schema COLUMNS COLUMN_TYPE ++information_schema PROCESSLIST INFO + information_schema ROUTINES ROUTINE_DEFINITION + information_schema ROUTINES SQL_MODE + information_schema TRIGGERS ACTION_CONDITION +@@ -813,7 +815,7 @@ + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 21 ++information_schema 22 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1206,6 +1208,7 @@ + COLUMN_PRIVILEGES TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA ++PROCESSLIST ID + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA + SCHEMATA SCHEMA_NAME +@@ -1242,6 +1245,7 @@ + COLUMN_PRIVILEGES TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA ++PROCESSLIST ID + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA + SCHEMATA SCHEMA_NAME +@@ -1329,6 +1333,7 @@ + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 ++PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 + ROUTINES information_schema.ROUTINES 1 + SCHEMATA information_schema.SCHEMATA 1 +diff -r 327ce7a34c91 mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Fri Nov 07 15:52:53 2008 -0800 +@@ -13,6 +13,7 @@ + COLUMN_PRIVILEGES + INDEX_STATISTICS + KEY_COLUMN_USAGE ++PROCESSLIST + PROFILING + ROUTINES + SCHEMATA +diff -r 327ce7a34c91 mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Fri Nov 07 15:52:53 2008 -0800 +@@ -87,6 +87,7 @@ + | COLUMN_PRIVILEGES | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | ++| PROCESSLIST | + | PROFILING | + | ROUTINES | + | SCHEMATA | +@@ -113,6 +114,7 @@ + | COLUMN_PRIVILEGES | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | ++| PROCESSLIST | + | PROFILING | + | ROUTINES | + | SCHEMATA | +diff -r 327ce7a34c91 sql/mysql_priv.h +--- a/sql/mysql_priv.h Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/mysql_priv.h Fri Nov 07 15:52:53 2008 -0800 +@@ -244,6 +244,8 @@ + + /* Characters shown for the command in 'show processlist' */ + #define PROCESS_LIST_WIDTH 100 ++/* Characters shown for the command in 'information_schema.processlist' */ ++#define PROCESS_LIST_INFO_WIDTH 65535 + + #define PRECISION_FOR_DOUBLE 53 + #define PRECISION_FOR_FLOAT 24 +diff -r 327ce7a34c91 sql/sql_show.cc +--- a/sql/sql_show.cc Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/sql_show.cc Fri Nov 07 15:52:53 2008 -0800 +@@ -1466,6 +1466,120 @@ + } + send_eof(thd); + DBUG_VOID_RETURN; ++} ++ ++int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ CHARSET_INFO *cs= system_charset_info; ++ char *user; ++ ulonglong current_timer= my_timer(¤t_timer, frequency); ++ DBUG_ENTER("fill_process_list"); ++ ++ user= thd->security_ctx->master_access & PROCESS_ACL ? ++ NullS : thd->security_ctx->priv_user; ++ ++ VOID(pthread_mutex_lock(&LOCK_thread_count)); ++ ++ if (!thd->killed) ++ { ++ I_List_iterator<THD> it(threads); ++ THD* tmp; ++ ++ while ((tmp= it++)) ++ { ++ Security_context *tmp_sctx= tmp->security_ctx; ++ struct st_my_thread_var *mysys_var; ++ const char *val; ++ ++ if ((!tmp->vio_ok() && !tmp->system_thread) || ++ (user && (!tmp_sctx->user || strcmp(tmp_sctx->user, user)))) ++ continue; ++ ++ restore_record(table, s->default_values); ++ /* ID */ ++ table->field[0]->store((longlong) tmp->thread_id, TRUE); ++ /* USER */ ++ val= tmp_sctx->user ? tmp_sctx->user : ++ (tmp->system_thread ? "system user" : "unauthenticated user"); ++ table->field[1]->store(val, strlen(val), cs); ++ /* HOST */ ++ if (tmp->peer_port && (tmp_sctx->host || tmp_sctx->ip) && ++ thd->security_ctx->host_or_ip[0]) ++ { ++ char host[LIST_PROCESS_HOST_LEN + 1]; ++ my_snprintf(host, LIST_PROCESS_HOST_LEN, "%s:%u", ++ tmp_sctx->host_or_ip, tmp->peer_port); ++ table->field[2]->store(host, strlen(host), cs); ++ } ++ else ++ table->field[2]->store(tmp_sctx->host_or_ip, ++ strlen(tmp_sctx->host_or_ip), cs); ++ /* DB */ ++ if (tmp->db) ++ { ++ table->field[3]->store(tmp->db, strlen(tmp->db), cs); ++ table->field[3]->set_notnull(); ++ } ++ ++ if ((mysys_var= tmp->mysys_var)) ++ pthread_mutex_lock(&mysys_var->mutex); ++ /* COMMAND */ ++ if ((val= (char *) (tmp->killed == THD::KILL_CONNECTION? "Killed" : 0))) ++ table->field[4]->store(val, strlen(val), cs); ++ else ++ table->field[4]->store(command_name[tmp->command], ++ strlen(command_name[tmp->command]), cs); ++ /* MYSQL_TIME */ ++ const ulonglong utime= (tmp->start_timer && current_timer) ? current_timer - tmp->start_timer : 0; ++ table->field[5]->store(utime / 1000000, TRUE); ++ /* STATE */ ++#ifndef EMBEDDED_LIBRARY ++ val= (char*) (tmp->locked ? "Locked" : ++ tmp->net.reading_or_writing ? ++ (tmp->net.reading_or_writing == 2 ? ++ "Writing to net" : ++ tmp->command == COM_SLEEP ? "" : ++ "Reading from net") : ++ tmp->proc_info ? tmp->proc_info : ++ tmp->mysys_var && ++ tmp->mysys_var->current_cond ? ++ "Waiting on cond" : NullS); ++#else ++ val= (char *) "Writing to net"; ++#endif ++ if (val) ++ { ++ table->field[6]->store(val, strlen(val), cs); ++ table->field[6]->set_notnull(); ++ } ++ ++ if (mysys_var) ++ pthread_mutex_unlock(&mysys_var->mutex); ++ ++ /* INFO */ ++ if (tmp->query) ++ { ++ table->field[7]->store(tmp->query, ++ min(PROCESS_LIST_INFO_WIDTH, ++ tmp->query_length), cs); ++ table->field[7]->set_notnull(); ++ } ++ ++ /* TIME_MS */ ++ table->field[8]->store((double)(utime / 1000.0)); ++ ++ if (schema_table_store_record(thd, table)) ++ { ++ VOID(pthread_mutex_unlock(&LOCK_thread_count)); ++ DBUG_RETURN(1); ++ } ++ ++ } ++ } ++ ++ VOID(pthread_mutex_unlock(&LOCK_thread_count)); ++ DBUG_RETURN(0); + } + + /***************************************************************************** +@@ -4821,6 +4941,22 @@ + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} + }; + ++ST_FIELD_INFO processlist_fields_info[]= ++{ ++ {"ID", 4, MYSQL_TYPE_LONG, 0, 0, "Id"}, ++ {"USER", 16, MYSQL_TYPE_STRING, 0, 0, "User"}, ++ {"HOST", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Host"}, ++ {"DB", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, "Db"}, ++ {"COMMAND", 16, MYSQL_TYPE_STRING, 0, 0, "Command"}, ++ {"TIME", 7, MYSQL_TYPE_LONG, 0, 0, "Time"}, ++ {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State"}, ++ {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info"}, ++ {"TIME_MS", 100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3, MYSQL_TYPE_DECIMAL, ++ 0, 0, "Time_ms"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ + /* + Description of ST_FIELD_INFO in table.h + */ +@@ -4845,6 +4981,8 @@ + get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0}, + {"OPEN_TABLES", open_tables_fields_info, create_schema_table, + fill_open_tables, make_old_format, 0, -1, -1, 1}, ++ {"PROCESSLIST", processlist_fields_info, create_schema_table, ++ fill_schema_processlist, make_old_format, 0, -1, -1, 0}, + {"PROFILING", query_profile_statistics_info, create_schema_table, + fill_query_profile_statistics_info, make_profile_table_for_show, + NULL, -1, -1, false}, +diff -r 327ce7a34c91 sql/table.h +--- a/sql/table.h Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/table.h Fri Nov 07 15:52:53 2008 -0800 +@@ -378,6 +378,7 @@ + SCH_INDEX_STATS, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, ++ SCH_PROCESSLIST, + SCH_PROFILES, + SCH_PROCEDURES, + SCH_SCHEMATA, +diff -ruN mysql-5.0.67_highperf/patch_info/microsec_process.info mysql-5.0.67_highperf_tmp/patch_info/microsec_process.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/patch_info/microsec_process.info 2008-11-12 09:27:52.000000000 +0900 +@@ -0,0 +1,6 @@ ++File=microsec_process.patch ++Name=Adds INFOMATION_SCHEMA.PROCESSLIST with TIME_MS column ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment= ++ChangeLog= diff --git a/percona/5.0.77-b13/microslow_innodb.patch b/percona/5.0.77-b13/microslow_innodb.patch new file mode 100644 index 0000000..6c21043 --- /dev/null +++ b/percona/5.0.77-b13/microslow_innodb.patch @@ -0,0 +1,2472 @@ +diff -r 04958490fc6d include/my_getopt.h +--- a/include/my_getopt.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/include/my_getopt.h Tue Feb 17 22:33:00 2009 -0800 +@@ -28,7 +28,8 @@ + #define GET_ULL 8 + #define GET_STR 9 + #define GET_STR_ALLOC 10 +-#define GET_DISABLED 11 ++#define GET_DOUBLE 11 ++#define GET_DISABLED 12 + + #define GET_ASK_ADDR 128 + #define GET_TYPE_MASK 127 +diff -r 04958490fc6d include/my_time.h +--- a/include/my_time.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/include/my_time.h Tue Feb 17 22:33:00 2009 -0800 +@@ -140,7 +140,7 @@ + int my_date_to_str(const MYSQL_TIME *l_time, char *to); + int my_datetime_to_str(const MYSQL_TIME *l_time, char *to); + int my_TIME_to_str(const MYSQL_TIME *l_time, char *to); +- ++ulonglong my_timer(ulonglong *ltime, ulonglong frequency); + C_MODE_END + + #endif /* _my_time_h_ */ +diff -r 04958490fc6d innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/buf/buf0buf.c Tue Feb 17 22:33:00 2009 -0800 +@@ -37,6 +37,9 @@ + #include "log0log.h" + #include "trx0undo.h" + #include "srv0srv.h" ++ ++/* prototypes for new functions added to ha_innodb.cc */ ++trx_t* innobase_get_trx(); + + /* + IMPLEMENTATION OF THE BUFFER POOL +@@ -1086,6 +1089,36 @@ + return(block); + } + ++inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) ++{ ++ ulint block_hash; ++ ulint block_hash_byte; ++ byte block_hash_offset; ++ ++ ut_ad(block); ++ ++ if (!srv_slow_log || !trx || !trx->take_stats) ++ return; ++ ++ if (!trx->distinct_page_access_hash) { ++ trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); ++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); ++ } ++ ++ block_hash = ut_hash_ulint((block->space << 20) + block->space + ++ block->offset, DPAH_SIZE << 3); ++ block_hash_byte = block_hash >> 3; ++ block_hash_offset = (byte) block_hash & 0x07; ++ if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE) ++ fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset); ++ if (block_hash_offset < 0 || block_hash_offset > 7) ++ fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset); ++ if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) ++ trx->distinct_page_access++; ++ trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; ++ return; ++} ++ + /************************************************************************ + This is the general function used to get access to a database page. */ + +@@ -1108,6 +1141,11 @@ + ulint fix_type; + ibool success; + ibool must_read; ++ trx_t* trx = NULL; ++ ulint sec; ++ ulint ms; ++ ib_longlong start_time; ++ ib_longlong finish_time; + + ut_ad(mtr); + ut_ad((rw_latch == RW_S_LATCH) +@@ -1119,6 +1157,9 @@ + #ifndef UNIV_LOG_DEBUG + ut_ad(!ibuf_inside() || ibuf_page(space, offset)); + #endif ++ if (srv_slow_log) { ++ trx = innobase_get_trx(); ++ } + buf_pool->n_page_gets++; + loop: + block = NULL; +@@ -1148,7 +1189,7 @@ + return(NULL); + } + +- buf_read_page(space, offset); ++ buf_read_page(space, offset, trx); + + #ifdef UNIV_DEBUG + buf_dbg_counter++; +@@ -1261,6 +1302,11 @@ + /* Let us wait until the read operation + completes */ + ++ if (srv_slow_log && trx && trx->take_stats) ++ { ++ ut_usectime(&sec, &ms); ++ start_time = (ib_longlong)sec * 1000000 + ms; ++ } + for (;;) { + mutex_enter(&block->mutex); + +@@ -1275,6 +1321,12 @@ + + break; + } ++ } ++ if (srv_slow_log && trx && trx->take_stats && start_time) ++ { ++ ut_usectime(&sec, &ms); ++ finish_time = (ib_longlong)sec * 1000000 + ms; ++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } + } + +@@ -1296,12 +1348,17 @@ + /* In the case of a first access, try to apply linear + read-ahead */ + +- buf_read_ahead_linear(space, offset); ++ buf_read_ahead_linear(space, offset, trx); + } + + #ifdef UNIV_IBUF_DEBUG + ut_a(ibuf_count_get(block->space, block->offset) == 0); + #endif ++ ++ if (srv_slow_log) { ++ _increment_page_get_statistics(block, trx); ++ } ++ + return(block->frame); + } + +@@ -1326,6 +1383,7 @@ + ibool accessed; + ibool success; + ulint fix_type; ++ trx_t* trx = NULL; + + ut_ad(mtr && block); + ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); +@@ -1440,13 +1498,18 @@ + read-ahead */ + + buf_read_ahead_linear(buf_frame_get_space_id(guess), +- buf_frame_get_page_no(guess)); ++ buf_frame_get_page_no(guess), trx); + } + + #ifdef UNIV_IBUF_DEBUG + ut_a(ibuf_count_get(block->space, block->offset) == 0); + #endif + buf_pool->n_page_gets++; ++ ++ if (srv_slow_log) { ++ trx = innobase_get_trx(); ++ _increment_page_get_statistics(block, trx); ++ } + + return(TRUE); + } +@@ -1470,6 +1533,7 @@ + buf_block_t* block; + ibool success; + ulint fix_type; ++ trx_t* trx = NULL; + + ut_ad(mtr); + ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); +@@ -1558,6 +1622,11 @@ + || (ibuf_count_get(block->space, block->offset) == 0)); + #endif + buf_pool->n_page_gets++; ++ ++ if (srv_slow_log) { ++ trx = innobase_get_trx(); ++ _increment_page_get_statistics(block, trx); ++ } + + return(TRUE); + } +diff -r 04958490fc6d innobase/buf/buf0rea.c +--- a/innobase/buf/buf0rea.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/buf/buf0rea.c Tue Feb 17 22:33:00 2009 -0800 +@@ -70,7 +70,8 @@ + treat the tablespace as dropped; this is a timestamp we + use to stop dangling page reads from a tablespace + which we have DISCARDed + IMPORTed back */ +- ulint offset) /* in: page number */ ++ ulint offset, /* in: page number */ ++ trx_t* trx) + { + buf_block_t* block; + ulint wake_later; +@@ -140,10 +141,10 @@ + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + +- *err = fil_io(OS_FILE_READ | wake_later, ++ *err = _fil_io(OS_FILE_READ | wake_later, + sync, space, + offset, 0, UNIV_PAGE_SIZE, +- (void*)block->frame, (void*)block); ++ (void*)block->frame, (void*)block, trx); + ut_a(*err == DB_SUCCESS); + + if (sync) { +@@ -174,8 +175,9 @@ + the page at the given page number does not get + read even if we return a value > 0! */ + ulint space, /* in: space id */ +- ulint offset) /* in: page number of a page which the current thread ++ ulint offset, /* in: page number of a page which the current thread + wants to access */ ++ trx_t* trx) + { + ib_longlong tablespace_version; + buf_block_t* block; +@@ -270,7 +272,7 @@ + if (!ibuf_bitmap_page(i)) { + count += buf_read_page_low(&err, FALSE, ibuf_mode + | OS_AIO_SIMULATED_WAKE_LATER, +- space, tablespace_version, i); ++ space, tablespace_version, i, trx); + if (err == DB_TABLESPACE_DELETED) { + ut_print_timestamp(stderr); + fprintf(stderr, +@@ -314,7 +316,8 @@ + /* out: number of page read requests issued: this can + be > 1 if read-ahead occurred */ + ulint space, /* in: space id */ +- ulint offset) /* in: page number */ ++ ulint offset, /* in: page number */ ++ trx_t* trx) + { + ib_longlong tablespace_version; + ulint count; +@@ -323,13 +326,13 @@ + + tablespace_version = fil_space_get_version(space); + +- count = buf_read_ahead_random(space, offset); ++ count = buf_read_ahead_random(space, offset, trx); + + /* We do the i/o in the synchronous aio mode to save thread + switches: hence TRUE */ + + count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, +- tablespace_version, offset); ++ tablespace_version, offset, trx); + srv_buf_pool_reads+= count2; + if (err == DB_TABLESPACE_DELETED) { + ut_print_timestamp(stderr); +@@ -374,8 +377,9 @@ + /*==================*/ + /* out: number of page read requests issued */ + ulint space, /* in: space id */ +- ulint offset) /* in: page number of a page; NOTE: the current thread ++ ulint offset, /* in: page number of a page; NOTE: the current thread + must want access to this page (see NOTE 3 above) */ ++ trx_t* trx) + { + ib_longlong tablespace_version; + buf_block_t* block; +@@ -556,7 +560,7 @@ + if (!ibuf_bitmap_page(i)) { + count += buf_read_page_low(&err, FALSE, ibuf_mode + | OS_AIO_SIMULATED_WAKE_LATER, +- space, tablespace_version, i); ++ space, tablespace_version, i, trx); + if (err == DB_TABLESPACE_DELETED) { + ut_print_timestamp(stderr); + fprintf(stderr, +@@ -625,10 +629,10 @@ + for (i = 0; i < n_stored; i++) { + if ((i + 1 == n_stored) && sync) { + buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, +- space_ids[i], space_versions[i], page_nos[i]); ++ space_ids[i], space_versions[i], page_nos[i], NULL); + } else { + buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE, +- space_ids[i], space_versions[i], page_nos[i]); ++ space_ids[i], space_versions[i], page_nos[i], NULL); + } + + if (err == DB_TABLESPACE_DELETED) { +@@ -704,11 +708,11 @@ + + if ((i + 1 == n_stored) && sync) { + buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, +- tablespace_version, page_nos[i]); ++ tablespace_version, page_nos[i], NULL); + } else { + buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE + | OS_AIO_SIMULATED_WAKE_LATER, +- space, tablespace_version, page_nos[i]); ++ space, tablespace_version, page_nos[i], NULL); + } + } + +diff -r 04958490fc6d innobase/fil/fil0fil.c +--- a/innobase/fil/fil0fil.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/fil/fil0fil.c Tue Feb 17 22:33:00 2009 -0800 +@@ -3527,7 +3527,7 @@ + node->name, node->handle, buf, + offset_low, offset_high, + UNIV_PAGE_SIZE * n_pages, +- NULL, NULL); ++ NULL, NULL, NULL); + #endif + if (success) { + node->size += n_pages; +@@ -3851,7 +3851,7 @@ + Reads or writes data. This operation is asynchronous (aio). */ + + ulint +-fil_io( ++_fil_io( + /*===*/ + /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED + if we are trying to do i/o on a tablespace +@@ -3877,8 +3877,9 @@ + void* buf, /* in/out: buffer where to store read data + or from where to write; in aio this must be + appropriately aligned */ +- void* message) /* in: message for aio handler if non-sync ++ void* message, /* in: message for aio handler if non-sync + aio used, else ignored */ ++ trx_t* trx) + { + fil_system_t* system = fil_system; + ulint mode; +@@ -4018,7 +4019,7 @@ + #else + /* Queue the aio request */ + ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, +- offset_low, offset_high, len, node, message); ++ offset_low, offset_high, len, node, message, trx); + #endif + ut_a(ret); + +diff -r 04958490fc6d innobase/include/buf0rea.h +--- a/innobase/include/buf0rea.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/include/buf0rea.h Tue Feb 17 22:33:00 2009 -0800 +@@ -10,6 +10,7 @@ + #define buf0rea_h + + #include "univ.i" ++#include "trx0types.h" + #include "buf0types.h" + + /************************************************************************ +@@ -25,7 +26,8 @@ + /* out: number of page read requests issued: this can + be > 1 if read-ahead occurred */ + ulint space, /* in: space id */ +- ulint offset);/* in: page number */ ++ ulint offset, /* in: page number */ ++ trx_t* trx); + /************************************************************************ + Applies linear read-ahead if in the buf_pool the page is a border page of + a linear read-ahead area and all the pages in the area have been accessed. +@@ -55,8 +57,9 @@ + /*==================*/ + /* out: number of page read requests issued */ + ulint space, /* in: space id */ +- ulint offset);/* in: page number of a page; NOTE: the current thread ++ ulint offset, /* in: page number of a page; NOTE: the current thread + must want access to this page (see NOTE 3 above) */ ++ trx_t* trx); + /************************************************************************ + Issues read requests for pages which the ibuf module wants to read in, in + order to contract the insert buffer tree. Technically, this function is like +diff -r 04958490fc6d innobase/include/fil0fil.h +--- a/innobase/include/fil0fil.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/include/fil0fil.h Tue Feb 17 22:33:00 2009 -0800 +@@ -534,8 +534,11 @@ + /************************************************************************ + Reads or writes data. This operation is asynchronous (aio). */ + ++#define fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message) \ ++ _fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message, NULL) ++ + ulint +-fil_io( ++_fil_io( + /*===*/ + /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED + if we are trying to do i/o on a tablespace +@@ -561,8 +564,9 @@ + void* buf, /* in/out: buffer where to store read data + or from where to write; in aio this must be + appropriately aligned */ +- void* message); /* in: message for aio handler if non-sync ++ void* message, /* in: message for aio handler if non-sync + aio used, else ignored */ ++ trx_t* trx); + /************************************************************************ + Reads data from a space to a buffer. Remember that the possible incomplete + blocks at the end of file are ignored: they are not taken into account when +diff -r 04958490fc6d innobase/include/os0file.h +--- a/innobase/include/os0file.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/include/os0file.h Tue Feb 17 22:33:00 2009 -0800 +@@ -10,6 +10,8 @@ + #define os0file_h + + #include "univ.i" ++ ++#include "trx0types.h" + + #ifndef __WIN__ + #include <dirent.h> +@@ -421,8 +423,11 @@ + /*********************************************************************** + Requests a synchronous read operation. */ + ++#define os_file_read(file, buf, offset, offset_high, n) \ ++ _os_file_read(file, buf, offset, offset_high, n, NULL) ++ + ibool +-os_file_read( ++_os_file_read( + /*=========*/ + /* out: TRUE if request was + successful, FALSE if fail */ +@@ -432,7 +437,8 @@ + offset where to read */ + ulint offset_high,/* in: most significant 32 bits of + offset */ +- ulint n); /* in: number of bytes to read */ ++ ulint n, /* in: number of bytes to read */ ++ trx_t* trx); + /*********************************************************************** + Rewind file to its start, read at most size - 1 bytes from it to str, and + NUL-terminate str. All errors are silently ignored. This function is +@@ -584,7 +590,8 @@ + can be used to identify a completed aio + operation); if mode is OS_AIO_SYNC, these + are ignored */ +- void* message2); ++ void* message2, ++ trx_t* trx); + /**************************************************************************** + Wakes up all async i/o threads so that they know to exit themselves in + shutdown. */ +diff -r 04958490fc6d innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/include/srv0srv.h Tue Feb 17 22:33:00 2009 -0800 +@@ -26,6 +26,8 @@ + at a time */ + #define SRV_AUTO_EXTEND_INCREMENT \ + (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) ++ ++extern ibool srv_slow_log; + + /* This is set to TRUE if the MySQL user has set it in MySQL */ + extern ibool srv_lower_case_table_names; +diff -r 04958490fc6d innobase/include/trx0trx.h +--- a/innobase/include/trx0trx.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/include/trx0trx.h Tue Feb 17 22:33:00 2009 -0800 +@@ -668,6 +668,17 @@ + /*------------------------------*/ + char detailed_error[256]; /* detailed error message for last + error, or empty. */ ++ /*------------------------------*/ ++ ulint io_reads; ++ ib_longlong io_read; ++ ulint io_reads_wait_timer; ++ ib_longlong lock_que_wait_ustarted; ++ ulint lock_que_wait_timer; ++ ulint innodb_que_wait_timer; ++ ulint distinct_page_access; ++#define DPAH_SIZE 8192 ++ byte* distinct_page_access_hash; ++ ibool take_stats; + }; + + #define TRX_MAX_N_THREADS 32 /* maximum number of concurrent +diff -r 04958490fc6d innobase/lock/lock0lock.c +--- a/innobase/lock/lock0lock.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/lock/lock0lock.c Tue Feb 17 22:33:00 2009 -0800 +@@ -1806,6 +1806,8 @@ + { + lock_t* lock; + trx_t* trx; ++ ulint sec; ++ ulint ms; + + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +@@ -1861,6 +1863,10 @@ + trx->que_state = TRX_QUE_LOCK_WAIT; + trx->was_chosen_as_deadlock_victim = FALSE; + trx->wait_started = time(NULL); ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms; ++ } + + ut_a(que_thr_stop(thr)); + +@@ -3514,7 +3520,9 @@ + { + lock_t* lock; + trx_t* trx; +- ++ ulint sec; ++ ulint ms; ++ + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); + #endif /* UNIV_SYNC_DEBUG */ +@@ -3564,6 +3572,10 @@ + return(DB_SUCCESS); + } + ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms; ++ } + trx->que_state = TRX_QUE_LOCK_WAIT; + trx->was_chosen_as_deadlock_victim = FALSE; + trx->wait_started = time(NULL); +diff -r 04958490fc6d innobase/os/os0file.c +--- a/innobase/os/os0file.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/os/os0file.c Tue Feb 17 22:33:00 2009 -0800 +@@ -14,6 +14,7 @@ + #include "srv0start.h" + #include "fil0fil.h" + #include "buf0buf.h" ++#include "trx0sys.h" + + #if defined(UNIV_HOTBACKUP) && defined(__WIN__) + /* Add includes for the _stat() call to compile on Windows */ +@@ -1903,9 +1904,13 @@ + #ifndef __WIN__ + /*********************************************************************** + Does a synchronous read operation in Posix. */ ++ ++#define os_file_pread(file, buf, n, offset, offset_high) \ ++ _os_file_pread(file, buf, n, offset, offset_high, NULL); ++ + static + ssize_t +-os_file_pread( ++_os_file_pread( + /*==========*/ + /* out: number of bytes read, -1 if error */ + os_file_t file, /* in: handle to a file */ +@@ -1913,12 +1918,17 @@ + ulint n, /* in: number of bytes to read */ + ulint offset, /* in: least significant 32 bits of file + offset from where to read */ +- ulint offset_high) /* in: most significant 32 bits of +- offset */ ++ ulint offset_high, /* in: most significant 32 bits of ++ offset */ ++ trx_t* trx) + { + off_t offs; + ssize_t n_bytes; +- ++ ulint sec; ++ ulint ms; ++ ib_longlong start_time; ++ ib_longlong finish_time; ++ + ut_a((offset & 0xFFFFFFFFUL) == offset); + + /* If off_t is > 4 bytes in size, then we assume we can pass a +@@ -1937,7 +1947,13 @@ + } + + os_n_file_reads++; +- ++ if (srv_slow_log && trx && trx->take_stats) ++ { ++ trx->io_reads++; ++ trx->io_read += n; ++ ut_usectime(&sec, &ms); ++ start_time = (ib_longlong)sec * 1000000 + ms; ++ } + #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) + os_mutex_enter(os_file_count_mutex); + os_file_n_pending_preads++; +@@ -1951,6 +1967,13 @@ + os_n_pending_reads--; + os_mutex_exit(os_file_count_mutex); + ++ if (srv_slow_log && trx && trx->take_stats && start_time) ++ { ++ ut_usectime(&sec, &ms); ++ finish_time = (ib_longlong)sec * 1000000 + ms; ++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time); ++ } ++ + return(n_bytes); + #else + { +@@ -1980,6 +2003,13 @@ + os_mutex_enter(os_file_count_mutex); + os_n_pending_reads--; + os_mutex_exit(os_file_count_mutex); ++ ++ if (srv_slow_log && trx && trx->take_stats && start_time) ++ { ++ ut_usectime(&sec, &ms); ++ finish_time = (ib_longlong)sec * 1000000 + ms; ++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time); ++ } + + return(ret); + } +@@ -2103,7 +2133,7 @@ + Requests a synchronous positioned read operation. */ + + ibool +-os_file_read( ++_os_file_read( + /*=========*/ + /* out: TRUE if request was + successful, FALSE if fail */ +@@ -2113,7 +2143,8 @@ + offset where to read */ + ulint offset_high, /* in: most significant 32 bits of + offset */ +- ulint n) /* in: number of bytes to read */ ++ ulint n, /* in: number of bytes to read */ ++ trx_t* trx) + { + #ifdef __WIN__ + BOOL ret; +@@ -2177,7 +2208,7 @@ + os_bytes_read_since_printout += n; + + try_again: +- ret = os_file_pread(file, buf, n, offset, offset_high); ++ ret = _os_file_pread(file, buf, n, offset, offset_high, trx); + + if ((ulint)ret == n) { + +@@ -3137,7 +3168,8 @@ + offset */ + ulint offset_high, /* in: most significant 32 bits of + offset */ +- ulint len) /* in: length of the block to read or write */ ++ ulint len, /* in: length of the block to read or write */ ++ trx_t* trx) + { + os_aio_slot_t* slot; + #ifdef WIN_ASYNC_IO +@@ -3390,7 +3422,8 @@ + can be used to identify a completed aio + operation); if mode is OS_AIO_SYNC, these + are ignored */ +- void* message2) ++ void* message2, ++ trx_t* trx) + { + os_aio_array_t* array; + os_aio_slot_t* slot; +@@ -3429,8 +3462,8 @@ + wait in the Windows case. */ + + if (type == OS_FILE_READ) { +- return(os_file_read(file, buf, offset, +- offset_high, n)); ++ return(_os_file_read(file, buf, offset, ++ offset_high, n, trx)); + } + + ut_a(type == OS_FILE_WRITE); +@@ -3463,8 +3496,13 @@ + ut_error; + } + ++ if (trx && type == OS_FILE_READ) ++ { ++ trx->io_reads++; ++ trx->io_read += n; ++ } + slot = os_aio_array_reserve_slot(type, array, message1, message2, file, +- name, buf, offset, offset_high, n); ++ name, buf, offset, offset_high, n, trx); + if (type == OS_FILE_READ) { + if (os_aio_use_native_aio) { + #ifdef WIN_ASYNC_IO +diff -r 04958490fc6d innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/srv/srv0srv.c Tue Feb 17 22:33:00 2009 -0800 +@@ -47,6 +47,8 @@ + #include "dict0boot.h" + #include "srv0start.h" + #include "row0mysql.h" ++ ++ibool srv_slow_log = 0; + + /* This is set to TRUE if the MySQL user has set it in MySQL; currently + affects only FOREIGN KEY definition parsing */ +@@ -996,6 +998,10 @@ + ibool has_slept = FALSE; + srv_conc_slot_t* slot = NULL; + ulint i; ++ ib_longlong start_time = 0L; ++ ib_longlong finish_time = 0L; ++ ulint sec; ++ ulint ms; + + /* If trx has 'free tickets' to enter the engine left, then use one + such ticket */ +@@ -1054,6 +1060,7 @@ + if (SRV_THREAD_SLEEP_DELAY > 0) + { + os_thread_sleep(SRV_THREAD_SLEEP_DELAY); ++ trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY; + } + + trx->op_info = ""; +@@ -1109,11 +1116,22 @@ + /* Go to wait for the event; when a thread leaves InnoDB it will + release this thread */ + ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ start_time = (ib_longlong)sec * 1000000 + ms; ++ } ++ + trx->op_info = "waiting in InnoDB queue"; + + os_event_wait(slot->event); + + trx->op_info = ""; ++ ++ if (srv_slow_log && trx->take_stats && start_time) { ++ ut_usectime(&sec, &ms); ++ finish_time = (ib_longlong)sec * 1000000 + ms; ++ trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); ++ } + + os_fast_mutex_lock(&srv_conc_mutex); + +diff -r 04958490fc6d innobase/trx/trx0trx.c +--- a/innobase/trx/trx0trx.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/innobase/trx/trx0trx.c Tue Feb 17 22:33:00 2009 -0800 +@@ -190,6 +190,15 @@ + trx->global_read_view_heap = mem_heap_create(256); + trx->global_read_view = NULL; + trx->read_view = NULL; ++ ++ trx->io_reads = 0; ++ trx->io_read = 0; ++ trx->io_reads_wait_timer = 0; ++ trx->lock_que_wait_timer = 0; ++ trx->innodb_que_wait_timer = 0; ++ trx->distinct_page_access = 0; ++ trx->distinct_page_access_hash = NULL; ++ trx->take_stats = FALSE; + + /* Set X/Open XA transaction identification to NULL */ + memset(&trx->xid, 0, sizeof(trx->xid)); +@@ -230,6 +239,11 @@ + + trx->mysql_process_no = os_proc_get_number(); + ++ if (srv_slow_log && trx->take_stats) { ++ trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); ++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); ++ } ++ + return(trx); + } + +@@ -366,6 +380,12 @@ + /*===============*/ + trx_t* trx) /* in, own: trx object */ + { ++ if (trx->distinct_page_access_hash) ++ { ++ mem_free(trx->distinct_page_access_hash); ++ trx->distinct_page_access_hash= NULL; ++ } ++ + thr_local_free(trx->mysql_thread_id); + + mutex_enter(&kernel_mutex); +@@ -389,6 +409,12 @@ + /*====================*/ + trx_t* trx) /* in, own: trx object */ + { ++ if (trx->distinct_page_access_hash) ++ { ++ mem_free(trx->distinct_page_access_hash); ++ trx->distinct_page_access_hash= NULL; ++ } ++ + mutex_enter(&kernel_mutex); + + trx_free(trx); +@@ -1064,7 +1090,10 @@ + trx_t* trx) /* in: transaction */ + { + que_thr_t* thr; +- ++ ulint sec; ++ ulint ms; ++ ib_longlong now; ++ + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); + #endif /* UNIV_SYNC_DEBUG */ +@@ -1080,6 +1109,11 @@ + thr = UT_LIST_GET_FIRST(trx->wait_thrs); + } + ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ now = (ib_longlong)sec * 1000000 + ms; ++ trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); ++ } + trx->que_state = TRX_QUE_RUNNING; + } + +@@ -1093,6 +1127,9 @@ + trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */ + { + que_thr_t* thr; ++ ulint sec; ++ ulint ms; ++ ib_longlong now; + + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +@@ -1109,6 +1146,11 @@ + thr = UT_LIST_GET_FIRST(trx->wait_thrs); + } + ++ if (srv_slow_log && trx->take_stats) { ++ ut_usectime(&sec, &ms); ++ now = (ib_longlong)sec * 1000000 + ms; ++ trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); ++ } + trx->que_state = TRX_QUE_RUNNING; + } + +diff -r 04958490fc6d mysys/my_getopt.c +--- a/mysys/my_getopt.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/mysys/my_getopt.c Tue Feb 17 22:33:00 2009 -0800 +@@ -1061,6 +1061,9 @@ + case GET_ULONG: + printf("%lu\n", *((ulong*) value)); + break; ++ case GET_DOUBLE: ++ printf("%6f\n", *((double*) value)); ++ break; + case GET_LL: + printf("%s\n", llstr(*((longlong*) value), buff)); + break; +diff -r 04958490fc6d patch_info/microslow_innodb.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/microslow_innodb.info Tue Feb 17 22:33:00 2009 -0800 +@@ -0,0 +1,15 @@ ++File=microslow_innodb.patch ++Name=Extended statistics in slow.log ++Version=1.2 ++Author=Percona <info@percona.com> ++License=GPL ++Comment= ++Changelog ++2008-11-26 ++YK: Fix inefficient determination of trx, Make not to call useless gettimeofday when don't use slow log. Make log_slow_queries dynamic (bool). ++ ++2008-11-07 ++VT: Moved log_slow_rate_limit in SHOW VARIABLE into right place ++ ++2008-11 ++Arjen Lentz: Fixups (backward compatibility) by Arjen Lentz <arjen@openquery.com.au> +diff -r 04958490fc6d scripts/mysqldumpslow.sh +--- a/scripts/mysqldumpslow.sh Tue Feb 17 22:32:27 2009 -0800 ++++ b/scripts/mysqldumpslow.sh Tue Feb 17 22:33:00 2009 -0800 +@@ -83,8 +83,8 @@ + s/^#? Time: \d{6}\s+\d+:\d+:\d+.*\n//; + my ($user,$host) = s/^#? User\@Host:\s+(\S+)\s+\@\s+(\S+).*\n// ? ($1,$2) : ('',''); + +- s/^# Query_time: (\d+) Lock_time: (\d+) Rows_sent: (\d+).*\n//; +- my ($t, $l, $r) = ($1, $2, $3); ++ s/^# Query_time: (\d+(\.\d+)?) Lock_time: (\d+(\.\d+)?) Rows_sent: (\d+(\.\d+)?).*\n//; ++ my ($t, $l, $r) = ($1, $3, $5); + $t -= $l unless $opt{l}; + + # remove fluff that mysqld writes to log when it (re)starts: +diff -r 04958490fc6d sql-common/my_time.c +--- a/sql-common/my_time.c Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql-common/my_time.c Tue Feb 17 22:33:00 2009 -0800 +@@ -1252,3 +1252,37 @@ + return 0; + } + ++/* ++ int my_timer(ulonglong *ltime, ulonglong frequency) ++ ++ For performance measurement this function returns the number ++ of microseconds since the epoch (SVr4, BSD 4.3, POSIX 1003.1-2001) ++ or system start (Windows platforms). ++ ++ For windows platforms frequency value (obtained via ++ QueryPerformanceFrequency) has to be specified. The global frequency ++ value is set in mysqld.cc. ++ ++ If Windows platform doesn't support QueryPerformanceFrequency we will ++ obtain the time via GetClockCount, which supports microseconds only. ++*/ ++ ++ulonglong my_timer(ulonglong *ltime, ulonglong frequency) ++{ ++ ulonglong newtime= 0; ++#ifdef __WIN__ ++ if (frequency) ++ { ++ QueryPerformanceCounter((LARGE_INTEGER *)&newtime); ++ newtime/= (frequency * 1000000); ++ } else ++ newtime= (GetTickCount() * 1000; /* GetTickCount only returns milliseconds */ ++#else ++ struct timeval t; ++ if (gettimeofday(&t, NULL) != -1) ++ newtime= (ulonglong)t.tv_sec * 1000000 + t.tv_usec; ++#endif ++ if (ltime) ++ *ltime= newtime; ++ return newtime; ++} +diff -r 04958490fc6d sql/filesort.cc +--- a/sql/filesort.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/filesort.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -180,6 +180,7 @@ + { + statistic_increment(thd->status_var.filesort_scan_count, &LOCK_status); + } ++ thd->query_plan_flags|= QPLAN_FILESORT; + #ifdef CAN_TRUST_RANGE + if (select && select->quick && select->quick->records > 0L) + { +@@ -245,6 +246,7 @@ + } + else + { ++ thd->query_plan_flags|= QPLAN_FILESORT_DISK; + if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer) + { + x_free(table_sort.buffpek); +@@ -1116,6 +1118,7 @@ + + statistic_increment(current_thd->status_var.filesort_merge_passes, + &LOCK_status); ++ current_thd->query_plan_fsort_passes++; + if (param->not_killable) + { + killed= ¬_killable; +diff -r 04958490fc6d sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/ha_innodb.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -1,3 +1,4 @@ ++ + /* Copyright (C) 2000-2005 MySQL AB & Innobase Oy + + This program is free software; you can redistribute it and/or modify +@@ -805,9 +806,34 @@ + trx->check_unique_secondary = TRUE; + } + ++ if (thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; ++ } ++ + return(trx); + } + ++/************************************************************************* ++Gets current trx. */ ++extern "C" ++trx_t* ++innobase_get_trx() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ return((trx_t*) thd->ha_data[innobase_hton.slot]); ++ } else { ++ return(NULL); ++ } ++} ++ ++void ++innobase_update_var_slow_log() ++{ ++ srv_slow_log = (ibool) opt_slow_log; ++} + + /************************************************************************* + Construct ha_innobase handler. */ +@@ -1309,6 +1335,8 @@ + } + + /* -------------- Log files ---------------------------*/ ++ ++ srv_slow_log = (ibool) opt_slow_log; + + /* The default dir for log files is the datadir of MySQL */ + +@@ -4681,6 +4709,12 @@ + trx->check_unique_secondary = FALSE; + } + ++ if (thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; ++ } ++ + if (lower_case_table_names) { + srv_lower_case_table_names = TRUE; + } else { +@@ -4946,6 +4980,12 @@ + trx->check_unique_secondary = FALSE; + } + ++ if (thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; ++ } ++ + name_len = strlen(name); + + assert(name_len < 1000); +@@ -5033,6 +5073,12 @@ + trx->check_foreigns = FALSE; + } + ++ if (current_thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; ++ } ++ + error = row_drop_database_for_mysql(namebuf, trx); + my_free(namebuf, MYF(0)); + +@@ -5097,6 +5143,12 @@ + + if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) { + trx->check_foreigns = FALSE; ++ } ++ ++ if (current_thd->variables.log_slow_verbosity & SLOG_V_INNODB) { ++ trx->take_stats = TRUE; ++ } else { ++ trx->take_stats = FALSE; + } + + name_len1 = strlen(from); +@@ -6106,6 +6158,7 @@ + { + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + trx_t* trx; ++ int i; + + DBUG_ENTER("ha_innobase::external_lock"); + DBUG_PRINT("enter",("lock_type: %d", lock_type)); +@@ -6229,7 +6282,24 @@ + + if (trx->n_mysql_tables_in_use == 0) { + +- trx->mysql_n_tables_locked = 0; ++ current_thd->innodb_was_used = TRUE; ++ current_thd->innodb_io_reads += trx->io_reads; ++ current_thd->innodb_io_read += trx->io_read; ++ current_thd->innodb_io_reads_wait_timer += trx->io_reads_wait_timer; ++ current_thd->innodb_lock_que_wait_timer += trx->lock_que_wait_timer; ++ current_thd->innodb_innodb_que_wait_timer += trx->innodb_que_wait_timer; ++ current_thd->innodb_page_access += trx->distinct_page_access; ++ ++ trx->io_reads = 0; ++ trx->io_read = 0; ++ trx->io_reads_wait_timer = 0; ++ trx->lock_que_wait_timer = 0; ++ trx->innodb_que_wait_timer = 0; ++ trx->distinct_page_access = 0; ++ if (trx->distinct_page_access_hash) ++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); ++ ++ trx->mysql_n_tables_locked = 0; + prebuilt->used_in_HANDLER = FALSE; + + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { +diff -r 04958490fc6d sql/ha_innodb.h +--- a/sql/ha_innodb.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/ha_innodb.h Tue Feb 17 22:33:00 2009 -0800 +@@ -266,6 +266,8 @@ + + int innobase_start_trx_and_assign_read_view(THD* thd); + ++void innobase_update_var_slow_log(); ++ + /*********************************************************************** + This function is used to prepare X/Open XA distributed transaction */ + +diff -r 04958490fc6d sql/log.cc +--- a/sql/log.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/log.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -2284,11 +2284,12 @@ + */ + + bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, +- time_t query_start_arg) ++ time_t query_start_arg, ulonglong query_start_timer) + { + bool error=0; + time_t current_time; +- if (!is_open()) ++ ulonglong current_timer; ++ if (!opt_slow_log || !is_open()) + return 0; + DBUG_ENTER("MYSQL_LOG::write"); + +@@ -2298,7 +2299,8 @@ + int tmp_errno=0; + char buff[80],*end; + end=buff; +- if (!(thd->options & OPTION_UPDATE_LOG)) ++ if (!(thd->options & OPTION_UPDATE_LOG) && ++ !(thd->slave_thread && opt_log_slow_slave_statements)) + { + VOID(pthread_mutex_unlock(&LOCK_log)); + DBUG_RETURN(0); +@@ -2328,22 +2330,72 @@ + if (my_b_printf(&log_file, "# User@Host: %s[%s] @ %s [%s]\n", + sctx->priv_user ? + sctx->priv_user : "", +- sctx->user ? sctx->user : "", ++ sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), + sctx->host ? sctx->host : "", + sctx->ip ? sctx->ip : "") == + (uint) -1) + tmp_errno=errno; + } +- if (query_start_arg) ++ if (query_start_timer) + { ++ char buf[5][20]; ++ ulonglong current_timer= my_timer(¤t_timer, frequency); ++ snprintf(buf[0], 20, "%.6f", (current_timer ? (current_timer - query_start_timer):0) / 1000000.0); ++ snprintf(buf[1], 20, "%.6f", (thd->timer_after_lock - query_start_timer) / 1000000.0); ++ if (!query_length) ++ { ++ thd->sent_row_count= thd->examined_row_count= 0; ++ thd->row_count= 0; ++ thd->innodb_was_used= FALSE; ++ thd->query_plan_flags= QPLAN_NONE; ++ thd->query_plan_fsort_passes= 0; ++ } ++ + /* For slow query log */ + if (my_b_printf(&log_file, +- "# Query_time: %lu Lock_time: %lu Rows_sent: %lu Rows_examined: %lu\n", +- (ulong) (current_time - query_start_arg), +- (ulong) (thd->time_after_lock - query_start_arg), ++ "# Thread_id: %lu Schema: %s\n" \ ++ "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu Rows_affected: %lu Rows_read: %lu\n", ++ (ulong) thd->thread_id, (thd->db ? thd->db : ""), ++ buf[0], buf[1], + (ulong) thd->sent_row_count, +- (ulong) thd->examined_row_count) == (uint) -1) ++ (ulong) thd->examined_row_count, ++ ((long) thd->row_count_func > 0 ) ? (ulong) thd->row_count_func : 0, ++ (ulong) thd->row_count) == (uint) -1) + tmp_errno=errno; ++ if ((thd->variables.log_slow_verbosity & SLOG_V_QUERY_PLAN) && ++ my_b_printf(&log_file, ++ "# QC_Hit: %s Full_scan: %s Full_join: %s Tmp_table: %s Tmp_table_on_disk: %s\n" \ ++ "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu\n", ++ ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_TMP_TABLE) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_TMP_DISK) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"), ++ ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? "Yes" : "No"), ++ thd->query_plan_fsort_passes) == (uint) -1) ++ tmp_errno=errno; ++ if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && thd->innodb_was_used) ++ { ++ snprintf(buf[2], 20, "%.6f", thd->innodb_io_reads_wait_timer / 1000000.0); ++ snprintf(buf[3], 20, "%.6f", thd->innodb_lock_que_wait_timer / 1000000.0); ++ snprintf(buf[4], 20, "%.6f", thd->innodb_innodb_que_wait_timer / 1000000.0); ++ if (my_b_printf(&log_file, ++ "# InnoDB_IO_r_ops: %lu InnoDB_IO_r_bytes: %lu InnoDB_IO_r_wait: %s\n" \ ++ "# InnoDB_rec_lock_wait: %s InnoDB_queue_wait: %s\n" \ ++ "# InnoDB_pages_distinct: %lu\n", ++ (ulong) thd->innodb_io_reads, ++ (ulong) thd->innodb_io_read, ++ buf[2], buf[3], buf[4], ++ (ulong) thd->innodb_page_access) == (uint) -1) ++ tmp_errno=errno; ++ } ++ else ++ { ++ if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && ++ my_b_printf(&log_file,"# No InnoDB statistics available for this query\n") == (uint) -1) ++ tmp_errno=errno; ++ } + } + if (thd->db && strcmp(thd->db,db)) + { // Database changed +diff -r 04958490fc6d sql/log_event.cc +--- a/sql/log_event.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/log_event.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -2039,6 +2039,7 @@ + /* Execute the query (note that we bypass dispatch_command()) */ + const char* found_semicolon= NULL; + mysql_parse(thd, thd->query, thd->query_length, &found_semicolon); ++ log_slow_statement(thd); + + } + else +diff -r 04958490fc6d sql/mysql_priv.h +--- a/sql/mysql_priv.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/mysql_priv.h Tue Feb 17 22:33:00 2009 -0800 +@@ -494,6 +494,78 @@ + #define WEEK_FIRST_WEEKDAY 4 + + #define STRING_BUFFER_USUAL_SIZE 80 ++ ++/* Slow log */ ++ ++struct msl_opts ++{ ++ ulong val; ++ const char *name; ++}; ++ ++#define SLOG_V_MICROTIME 1 << 0 ++#define SLOG_V_QUERY_PLAN 1 << 1 ++#define SLOG_V_INNODB 1 << 2 ++/* ... */ ++#define SLOG_V_INVALID 1 << 31 ++#define SLOG_V_NONE SLOG_V_MICROTIME ++ ++static const struct msl_opts slog_verb[]= ++{ ++ /* Basic flags */ ++ ++ { SLOG_V_MICROTIME, "microtime" }, ++ { SLOG_V_QUERY_PLAN, "query_plan" }, ++ { SLOG_V_INNODB, "innodb" }, ++ ++ /* End of baisc flags */ ++ ++ { 0, "" }, ++ ++ /* Complex flags */ ++ ++ { SLOG_V_MICROTIME, "minimal" }, ++ { SLOG_V_MICROTIME|SLOG_V_QUERY_PLAN, "standard" }, ++ { SLOG_V_MICROTIME|SLOG_V_QUERY_PLAN|SLOG_V_INNODB, "full" }, ++ ++ /* End of complex flags */ ++ ++ { SLOG_V_INVALID, (char *)0 } ++}; ++ ++#define QPLAN_NONE 0 ++#define QPLAN_QC 1 << 0 ++#define QPLAN_QC_NO 1 << 1 ++#define QPLAN_FULL_SCAN 1 << 2 ++#define QPLAN_FULL_JOIN 1 << 3 ++#define QPLAN_TMP_TABLE 1 << 4 ++#define QPLAN_TMP_DISK 1 << 5 ++#define QPLAN_FILESORT 1 << 6 ++#define QPLAN_FILESORT_DISK 1 << 7 ++/* ... */ ++#define QPLAN_MAX 1 << 31 ++ ++#define SLOG_F_QC_NO QPLAN_QC_NO ++#define SLOG_F_FULL_SCAN QPLAN_FULL_SCAN ++#define SLOG_F_FULL_JOIN QPLAN_FULL_JOIN ++#define SLOG_F_TMP_TABLE QPLAN_TMP_TABLE ++#define SLOG_F_TMP_DISK QPLAN_TMP_DISK ++#define SLOG_F_FILESORT QPLAN_FILESORT ++#define SLOG_F_FILESORT_DISK QPLAN_FILESORT_DISK ++#define SLOG_F_INVALID 1 << 31 ++#define SLOG_F_NONE 0 ++ ++static const struct msl_opts slog_filter[]= ++{ ++ { SLOG_F_QC_NO, "qc_miss" }, ++ { SLOG_F_FULL_SCAN, "full_scan" }, ++ { SLOG_F_FULL_JOIN, "full_join" }, ++ { SLOG_F_TMP_TABLE, "tmp_table" }, ++ { SLOG_F_TMP_DISK, "tmp_table_on_disk" }, ++ { SLOG_F_FILESORT, "filesort" }, ++ { SLOG_F_FILESORT_DISK, "filesort_on_disk" }, ++ { SLOG_F_INVALID, (char *)0 } ++}; + + enum enum_parsing_place + { +@@ -1351,6 +1423,7 @@ + extern bool using_update_log, opt_large_files, server_id_supplied; + extern bool opt_update_log, opt_bin_log, opt_error_log; + extern my_bool opt_log, opt_slow_log, opt_log_queries_not_using_indexes; ++extern char *opt_slow_logname; + extern bool opt_disable_networking, opt_skip_show_db; + extern my_bool opt_character_set_client_handshake; + extern bool volatile abort_loop, shutdown_in_progress, grant_option; +@@ -1362,7 +1435,8 @@ + extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs; + extern my_bool opt_secure_auth; + extern char* opt_secure_file_priv; +-extern my_bool opt_log_slow_admin_statements; ++extern my_bool opt_log_slow_admin_statements, opt_log_slow_slave_statements; ++extern my_bool opt_use_global_long_query_time; + extern my_bool sp_automatic_privileges, opt_noacl; + extern my_bool opt_old_style_user_limits, trust_function_creators; + extern uint opt_crash_binlog_innodb; +diff -r 04958490fc6d sql/mysqld.cc +--- a/sql/mysqld.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/mysqld.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -175,7 +175,6 @@ + static void getvolumename(); + static void getvolumeID(BYTE *volumeName); + #endif /* __NETWARE__ */ +- + + #ifdef _AIX41 + int initgroups(const char *,unsigned int); +@@ -409,10 +408,13 @@ + my_bool opt_secure_auth= 0; + char* opt_secure_file_priv= 0; + my_bool opt_log_slow_admin_statements= 0; ++my_bool opt_log_slow_slave_statements= 0; ++my_bool opt_use_global_long_query_time= 0; + my_bool lower_case_file_system= 0; + my_bool opt_large_pages= 0; + uint opt_large_page_size= 0; + my_bool opt_old_style_user_limits= 0, trust_function_creators= 0; ++char* opt_slow_logname= 0; + /* + True if there is at least one per-hour limit for some user, so we should + check them before each query (and possibly reset counters when hour is +@@ -507,6 +509,7 @@ + Ge_creator ge_creator; + Le_creator le_creator; + ++ulonglong frequency= 0; + + FILE *bootstrap_file; + int bootstrap_error; +@@ -584,7 +587,7 @@ + static int cleanup_done; + static ulong opt_specialflag, opt_myisam_block_size; + static char *opt_logname, *opt_update_logname, *opt_binlog_index_name; +-static char *opt_slow_logname, *opt_tc_heuristic_recover; ++static char *opt_tc_heuristic_recover; + static char *mysql_home_ptr, *pidfile_name_ptr; + static char **defaults_argv; + static char *opt_bin_logname; +@@ -3693,6 +3696,8 @@ + unireg_abort(1); + } + } ++ if (!QueryPerformanceFrequency((LARGE_INTEGER *)&frequency)) ++ frequency= 0; + #endif /* __WIN__ */ + + if (init_common_variables(MYSQL_CONFIG_NAME, +@@ -4943,7 +4948,7 @@ + OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE, + OPT_KEY_BUFFER_SIZE, OPT_KEY_CACHE_BLOCK_SIZE, + OPT_KEY_CACHE_DIVISION_LIMIT, OPT_KEY_CACHE_AGE_THRESHOLD, +- OPT_LONG_QUERY_TIME, ++ OPT_LONG_QUERY_TIME, OPT_MIN_EXAMINED_ROW_LIMIT, + OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET, + OPT_MAX_BINLOG_CACHE_SIZE, OPT_MAX_BINLOG_SIZE, + OPT_MAX_CONNECTIONS, OPT_MAX_CONNECT_ERRORS, +@@ -5034,11 +5039,18 @@ + OPT_TIMED_MUTEXES, + OPT_OLD_STYLE_USER_LIMITS, + OPT_LOG_SLOW_ADMIN_STATEMENTS, ++ OPT_LOG_SLOW_SLAVE_STATEMENTS, ++ OPT_LOG_SLOW_RATE_LIMIT, ++ OPT_LOG_SLOW_VERBOSITY, ++ OPT_LOG_SLOW_FILTER, + OPT_TABLE_LOCK_WAIT_TIMEOUT, + OPT_PLUGIN_DIR, + OPT_PORT_OPEN_TIMEOUT, + OPT_MERGE, + OPT_PROFILING, ++ OPT_SLOW_LOG, ++ OPT_SLOW_QUERY_LOG_FILE, ++ OPT_USE_GLOBAL_LONG_QUERY_TIME, + OPT_INNODB_ROLLBACK_ON_TIMEOUT, + OPT_SECURE_FILE_PRIV, + OPT_KEEP_FILES_ON_CREATE, +@@ -5428,8 +5440,17 @@ + (gptr*) &opt_log_slow_admin_statements, + (gptr*) &opt_log_slow_admin_statements, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, ++ {"log-slow-slave-statements", OPT_LOG_SLOW_SLAVE_STATEMENTS, ++ "Log slow replicated statements to the slow log if it is open.", ++ (gptr*) &opt_log_slow_slave_statements, ++ (gptr*) &opt_log_slow_slave_statements, ++ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"log-slow-queries", OPT_SLOW_QUERY_LOG, + "Log slow queries to this log file. Defaults logging to hostname-slow.log file. Must be enabled to activate other slow log options.", ++ (gptr*) &opt_slow_logname, (gptr*) &opt_slow_logname, 0, GET_STR, OPT_ARG, ++ 0, 0, 0, 0, 0, 0}, ++ {"slow_query_log_file", OPT_SLOW_QUERY_LOG_FILE, ++ "Log slow queries to given log file. Defaults logging to hostname-slow.log. Must be enabled to activate other slow log options.", + (gptr*) &opt_slow_logname, (gptr*) &opt_slow_logname, 0, GET_STR, OPT_ARG, + 0, 0, 0, 0, 0, 0}, + {"log-tc", OPT_LOG_TC, +@@ -5795,6 +5816,9 @@ + "Tells the slave thread to continue replication when a query returns an error from the provided list.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + #endif ++ {"slow-query-log", OPT_SLOW_LOG, ++ "Enable|disable slow query log", (gptr*) &opt_slow_log, ++ (gptr*) &opt_slow_log, 0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"socket", OPT_SOCKET, "Socket file to use for connection.", + (gptr*) &mysqld_unix_port, (gptr*) &mysqld_unix_port, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +@@ -6097,11 +6121,31 @@ + (gptr*) 0, + 0, (GET_ULONG | GET_ASK_ADDR) , REQUIRED_ARG, 100, + 1, 100, 0, 1, 0}, +- {"long_query_time", OPT_LONG_QUERY_TIME, +- "Log all queries that have taken more than long_query_time seconds to execute to file.", +- (gptr*) &global_system_variables.long_query_time, +- (gptr*) &max_system_variables.long_query_time, 0, GET_ULONG, +- REQUIRED_ARG, 10, 1, LONG_TIMEOUT, 0, 1, 0}, ++ {"log_slow_filter", OPT_LOG_SLOW_FILTER, ++ "Log only the queries that followed certain execution plan. Multiple flags allowed in a comma-separated string. [qc_miss, full_scan, full_join, tmp_table, tmp_table_on_disk, filesort, filesort_on_disk]", ++ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, SLOG_F_NONE, 0, 0}, ++ {"log_slow_rate_limit", OPT_LOG_SLOW_RATE_LIMIT, ++ "Rate limit statement writes to slow log to only those from every (1/log_slow_rate_limit) session.", ++ (gptr*) &global_system_variables.log_slow_rate_limit, ++ (gptr*) &max_system_variables.log_slow_rate_limit, 0, GET_ULONG, ++ REQUIRED_ARG, 1, 1, LONG_MAX, 0, 1L, 0}, ++ {"log_slow_verbosity", OPT_LOG_SLOW_VERBOSITY, ++ "Choose how verbose the messages to your slow log will be. Multiple flags allowed in a comma-separated string. [microtime, query_plan, innodb]", ++ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, SLOG_V_MICROTIME, 0, 0}, ++ {"long_query_time", OPT_LONG_QUERY_TIME, ++ "Log all queries that have taken more than long_query_time seconds to execute to file.", ++ (gptr*) &global_system_variables.long_query_time, ++ (gptr*) &max_system_variables.long_query_time, 0, GET_DOUBLE, ++ REQUIRED_ARG, 10000000, 0, LONG_TIMEOUT * 1000000, 0, 1, 0}, ++ {"min_examined_row_limit", OPT_MIN_EXAMINED_ROW_LIMIT, ++ "Don't log queries which examine less than min_examined_row_limit rows to file.", ++ (gptr*) &global_system_variables.min_examined_row_limit, ++ (gptr*) &max_system_variables.min_examined_row_limit, 0, GET_ULONG, ++ REQUIRED_ARG, 0, 0, LONG_MAX, 0, 1L, 0}, ++ {"use_global_long_query_time", OPT_USE_GLOBAL_LONG_QUERY_TIME, ++ "Control always use global long_query_time or local long_query_time.", ++ (gptr*) &opt_use_global_long_query_time, (gptr*) &opt_use_global_long_query_time, ++ 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0}, + {"lower_case_table_names", OPT_LOWER_CASE_TABLE_NAMES, + "If set to 1 table names are stored in lowercase on disk and table names will be case-insensitive. Should be set to 2 if you are using a case insensitive file system", + (gptr*) &lower_case_table_names, +@@ -6878,7 +6922,11 @@ + global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; + max_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; + global_system_variables.old_passwords= 0; +- ++ global_system_variables.long_query_time = 10000000; ++ max_system_variables.long_query_time = LONG_TIMEOUT * 1000000; ++ global_system_variables.log_slow_verbosity= SLOG_V_MICROTIME; ++ global_system_variables.log_slow_filter= SLOG_F_NONE; ++ + /* + Default behavior for 4.1 and 5.0 is to treat NULL values as unequal + when collecting index statistics for MyISAM tables. +@@ -7339,6 +7387,35 @@ + case OPT_BOOTSTRAP: + opt_noacl=opt_bootstrap=1; + break; ++ case OPT_LOG_SLOW_FILTER: ++ if ((global_system_variables.log_slow_filter= ++ msl_flag_resolve_by_name(slog_filter, argument, ++ SLOG_F_NONE, SLOG_F_INVALID)) == SLOG_F_INVALID) ++ { ++ fprintf(stderr,"Invalid argument in log_slow_filter: %s\n", argument); ++ exit(1); ++ } ++ break; ++ case OPT_LOG_SLOW_VERBOSITY: ++ if ((global_system_variables.log_slow_verbosity= ++ msl_flag_resolve_by_name(slog_verb, argument, ++ SLOG_V_NONE, SLOG_V_INVALID)) == SLOG_V_INVALID) ++ { ++ fprintf(stderr,"Invalid argument in log_slow_verbosity: %s\n", argument); ++ exit(1); ++ } ++ break; ++ case OPT_LONG_QUERY_TIME: ++ { ++ double doubleslow = strtod(argument,NULL); ++ if (doubleslow < 0 || doubleslow > (LONG_TIMEOUT)) ++ { ++ fprintf(stderr,"Out of range long_query_time value: %s\n", argument); ++ exit(1); ++ } ++ global_system_variables.long_query_time = (ulonglong) (doubleslow * 1000000); ++ break; ++ } + case OPT_STORAGE_ENGINE: + { + if ((enum db_type)((global_system_variables.table_type= +@@ -7671,10 +7748,14 @@ + if (opt_bdb) + sql_print_warning("this binary does not contain BDB storage engine"); + #endif +- if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes) && ++ if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes || ++ opt_log_slow_slave_statements) && + !opt_slow_log) +- sql_print_warning("options --log-slow-admin-statements and --log-queries-not-using-indexes have no effect if --log-slow-queries is not set"); +- ++ { ++ sql_print_warning("options --log-slow-admin-statements, --log-slow-slave-statements and --log-queries-not-using-indexes have no effect if --log-slow-queries is not set"); ++ opt_log_slow_slave_statements= FALSE; ++ } ++ + if (argc > 0) + { + fprintf(stderr, "%s: Too many arguments (first extra is '%s').\nUse --help to get a list of available options\n", my_progname, *argv); +diff -r 04958490fc6d sql/set_var.cc +--- a/sql/set_var.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/set_var.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -217,9 +217,13 @@ + sys_log_queries_not_using_indexes("log_queries_not_using_indexes", + &opt_log_queries_not_using_indexes); + sys_var_thd_ulong sys_log_warnings("log_warnings", &SV::log_warnings); +-sys_var_thd_ulong sys_long_query_time("long_query_time", ++sys_var_thd_microtime sys_long_query_time("long_query_time", + &SV::long_query_time); ++sys_var_bool_ptr sys_use_global_long_query_time("use_global_long_query_time", ++ &opt_use_global_long_query_time); + sys_var_bool_const_ptr sys_log_slow("log_slow_queries", &opt_slow_log); ++sys_var_log_slow sys_slow_query_log("slow_query_log", &opt_slow_log); ++sys_var_const_str_ptr sys_slow_query_log_file("slow_query_log_file", &opt_slow_logname); + sys_var_thd_bool sys_low_priority_updates("low_priority_updates", + &SV::low_priority_updates, + fix_low_priority_updates); +@@ -283,6 +287,8 @@ + &SV::max_tmp_tables); + sys_var_long_ptr sys_max_write_lock_count("max_write_lock_count", + &max_write_lock_count); ++sys_var_thd_ulong sys_min_examined_row_limit("min_examined_row_limit", ++ &SV::min_examined_row_limit); + sys_var_thd_ulong sys_multi_range_count("multi_range_count", + &SV::multi_range_count); + sys_var_long_ptr sys_myisam_data_pointer_size("myisam_data_pointer_size", +@@ -327,6 +333,20 @@ + sys_var_bool_ptr sys_relay_log_purge("relay_log_purge", + &relay_log_purge); + #endif ++sys_var_thd_ulong sys_log_slow_rate_limit("log_slow_rate_limit", ++ &SV::log_slow_rate_limit); ++sys_var_thd_msl_flag sys_log_slow_filter("log_slow_filter", ++ &SV::log_slow_filter, ++ SLOG_F_NONE, ++ SLOG_F_NONE, ++ SLOG_F_INVALID, ++ slog_filter); ++sys_var_thd_msl_flag sys_log_slow_verbosity("log_slow_verbosity", ++ &SV::log_slow_verbosity, ++ SLOG_V_NONE, ++ SLOG_V_MICROTIME, ++ SLOG_V_INVALID, ++ slog_verb); + sys_var_long_ptr sys_rpl_recovery_rank("rpl_recovery_rank", + &rpl_recovery_rank); + sys_var_long_ptr sys_query_cache_size("query_cache_size", +@@ -694,6 +714,10 @@ + &sys_log_off, + &sys_log_queries_not_using_indexes, + &sys_log_slow, ++ &sys_log_slow_filter, ++ &sys_log_slow_rate_limit, ++ &sys_log_slow_verbosity, ++ &sys_use_global_long_query_time, + &sys_log_update, + &sys_log_warnings, + &sys_long_query_time, +@@ -717,6 +741,7 @@ + &sys_max_tmp_tables, + &sys_max_user_connections, + &sys_max_write_lock_count, ++ &sys_min_examined_row_limit, + &sys_multi_range_count, + &sys_myisam_data_pointer_size, + &sys_myisam_max_sort_file_size, +@@ -770,6 +795,8 @@ + &sys_slave_skip_counter, + #endif + &sys_slow_launch_time, ++ &sys_slow_query_log, ++ &sys_slow_query_log_file, + &sys_sort_buffer, + &sys_sql_big_tables, + &sys_sql_low_priority_updates, +@@ -986,8 +1013,11 @@ + {"log_slave_updates", (char*) &opt_log_slave_updates, SHOW_MY_BOOL}, + #endif + {sys_log_slow.name, (char*) &sys_log_slow, SHOW_SYS}, ++ {sys_log_slow_filter.name, (char*) &sys_log_slow_filter, SHOW_SYS}, ++ {sys_log_slow_rate_limit.name, (char*) &sys_log_slow_rate_limit, SHOW_SYS}, ++ {sys_log_slow_verbosity.name, (char*) &sys_log_slow_verbosity, SHOW_SYS}, + {sys_log_warnings.name, (char*) &sys_log_warnings, SHOW_SYS}, +- {sys_long_query_time.name, (char*) &sys_long_query_time, SHOW_SYS}, ++ {sys_long_query_time.name, (char*) &sys_long_query_time, SHOW_MICROTIME}, + {sys_low_priority_updates.name, (char*) &sys_low_priority_updates, SHOW_SYS}, + {"lower_case_file_system", (char*) &lower_case_file_system, SHOW_MY_BOOL}, + {"lower_case_table_names", (char*) &lower_case_table_names, SHOW_INT}, +@@ -1014,6 +1044,7 @@ + {sys_max_tmp_tables.name, (char*) &sys_max_tmp_tables, SHOW_SYS}, + {sys_max_user_connections.name,(char*) &sys_max_user_connections, SHOW_SYS}, + {sys_max_write_lock_count.name, (char*) &sys_max_write_lock_count,SHOW_SYS}, ++ {sys_min_examined_row_limit.name, (char*) &sys_min_examined_row_limit, SHOW_SYS}, + {sys_multi_range_count.name, (char*) &sys_multi_range_count, SHOW_SYS}, + {sys_myisam_data_pointer_size.name, (char*) &sys_myisam_data_pointer_size, SHOW_SYS}, + {sys_myisam_max_sort_file_size.name, (char*) &sys_myisam_max_sort_file_size, +@@ -1101,6 +1132,8 @@ + {sys_slave_trans_retries.name,(char*) &sys_slave_trans_retries, SHOW_SYS}, + #endif + {sys_slow_launch_time.name, (char*) &sys_slow_launch_time, SHOW_SYS}, ++ {sys_slow_query_log.name, (char*) &sys_slow_query_log, SHOW_SYS}, ++ {sys_slow_query_log_file.name,(char*) &sys_slow_query_log_file, SHOW_SYS}, + #ifdef HAVE_SYS_UN_H + {"socket", (char*) &mysqld_unix_port, SHOW_CHAR_PTR}, + #endif +@@ -1141,6 +1174,7 @@ + {sys_tx_isolation.name, (char*) &sys_tx_isolation, SHOW_SYS}, + {sys_updatable_views_with_limit.name, + (char*) &sys_updatable_views_with_limit,SHOW_SYS}, ++ {sys_use_global_long_query_time.name, (char*) &sys_use_global_long_query_time, SHOW_SYS}, + {sys_version.name, (char*) &sys_version, SHOW_SYS}, + #ifdef HAVE_BERKELEY_DB + {sys_version_bdb.name, (char*) &sys_version_bdb, SHOW_SYS}, +@@ -1769,6 +1803,17 @@ + } + + ++bool sys_var_thd_microtime::check(THD *thd, set_var *var) ++{ ++ if (var->value->result_type() == DECIMAL_RESULT) ++ var->save_result.ulonglong_value= (ulonglong)(var->value->val_real() * 1000000); ++ else ++ var->save_result.ulonglong_value= (ulonglong)(var->value->val_int() * 1000000); ++ ++ return 0; ++} ++ ++ + bool sys_var_thd_bool::update(THD *thd, set_var *var) + { + if (var->type == OPT_GLOBAL) +@@ -1924,6 +1969,19 @@ + value= *(longlong*) value_ptr(thd, var_type, base); + pthread_mutex_unlock(&LOCK_global_system_variables); + return new Item_int(value); ++ } ++ case SHOW_MICROTIME: ++ { ++ longlong value; ++ char buff[80]; ++ int len; ++ ++ pthread_mutex_lock(&LOCK_global_system_variables); ++ value= *(longlong*) value_ptr(thd, var_type, base); ++ pthread_mutex_unlock(&LOCK_global_system_variables); ++ ++ len = snprintf(buff, 80, "%f", ((double) value) / 1000000.0); ++ return new Item_float(buff,len); + } + case SHOW_HA_ROWS: + { +@@ -2757,6 +2815,30 @@ + } + + ++bool sys_var_log_slow::update(THD *thd, set_var *var) ++{ ++ bool ret; ++ ++ pthread_mutex_lock(&LOCK_global_system_variables); ++ if (var->save_result.ulong_value) ++ { ++ if(!mysql_slow_log.is_open()) ++ { ++ mysql_slow_log.open_slow_log(opt_slow_logname); ++ } ++ } ++ pthread_mutex_unlock(&LOCK_global_system_variables); ++ ++ ret = sys_var_bool_ptr::update(thd, var); ++ ++#ifdef HAVE_INNOBASE_DB ++ innobase_update_var_slow_log(); ++#endif ++ ++ return(ret); ++} ++ ++ + #ifdef HAVE_REPLICATION + bool sys_var_slave_skip_counter::check(THD *thd, set_var *var) + { +@@ -3526,6 +3608,191 @@ + #endif + } + ++/* Slow log stuff */ ++ ++ulong msl_option_resolve_by_name(const struct msl_opts *opts, const char *name, ulong len) ++{ ++ ulong i; ++ ++ for (i=0; opts[i].name; i++) ++ { ++ if (!my_strnncoll(&my_charset_latin1, ++ (const uchar *)name, len, ++ (const uchar *)opts[i].name, strlen(opts[i].name))) ++ return opts[i].val; ++ } ++ return opts[i].val; ++} ++ ++ulong msl_flag_resolve_by_name(const struct msl_opts *opts, const char *names_list, ++ const ulong none_val, const ulong invalid_val) ++{ ++ const char *p, *e; ++ ulong val= none_val; ++ ++ if (!*names_list) ++ return val; ++ ++ for (p= e= names_list; ; e++) ++ { ++ ulong i; ++ ++ if (*e != ',' && *e) ++ continue; ++ for (i=0; opts[i].name; i++) ++ { ++ if (!my_strnncoll(&my_charset_latin1, ++ (const uchar *)p, e - p, ++ (const uchar *)opts[i].name, strlen(opts[i].name))) ++ { ++ val= val | opts[i].val; ++ break; ++ } ++ } ++ if (opts[i].val == invalid_val) ++ return invalid_val; ++ if (!*e) ++ break; ++ p= e + 1; ++ } ++ return val; ++} ++ ++const char *msl_option_get_name(const struct msl_opts *opts, ulong val) ++{ ++ for (ulong i=0; opts[i].name && opts[i].name[0]; i++) ++ { ++ if (opts[i].val == val) ++ return opts[i].name; ++ } ++ return "*INVALID*"; ++} ++ ++char *msl_flag_get_name(const struct msl_opts *opts, char *buf, ulong val) ++{ ++ uint offset= 0; ++ ++ *buf= '\0'; ++ for (ulong i=0; opts[i].name && opts[i].name[0]; i++) ++ { ++ if (opts[i].val & val) ++ offset+= snprintf(buf+offset, STRING_BUFFER_USUAL_SIZE - offset - 1, ++ "%s%s", (offset ? "," : ""), opts[i].name); ++ } ++ return buf; ++} ++ ++/**************************************************************************** ++ Functions to handle log_slow_verbosity ++****************************************************************************/ ++ ++/* Based upon sys_var::check_enum() */ ++ ++bool sys_var_thd_msl_option::check(THD *thd, set_var *var) ++{ ++ char buff[STRING_BUFFER_USUAL_SIZE]; ++ String str(buff, sizeof(buff), &my_charset_latin1), *res; ++ ++ if (var->value->result_type() == STRING_RESULT) ++ { ++ ulong verb= this->invalid_val; ++ if (!(res=var->value->val_str(&str)) || ++ (var->save_result.ulong_value= ++ (ulong) (verb= msl_option_resolve_by_name(this->opts, res->ptr(), res->length()))) == this->invalid_val) ++ goto err; ++ return 0; ++ } ++ ++err: ++ my_error(ER_WRONG_ARGUMENTS, MYF(0), var->var->name); ++ return 1; ++} ++ ++byte *sys_var_thd_msl_option::value_ptr(THD *thd, enum_var_type type, ++ LEX_STRING *base) ++{ ++ ulong val; ++ val= ((type == OPT_GLOBAL) ? global_system_variables.*offset : ++ thd->variables.*offset); ++ const char *verbosity= msl_option_get_name(this->opts, val); ++ return (byte *) verbosity; ++} ++ ++ ++void sys_var_thd_msl_option::set_default(THD *thd, enum_var_type type) ++{ ++ if (type == OPT_GLOBAL) ++ global_system_variables.*offset= (ulong) this->default_val; ++ else ++ thd->variables.*offset= (ulong) (global_system_variables.*offset); ++} ++ ++ ++bool sys_var_thd_msl_option::update(THD *thd, set_var *var) ++{ ++ if (var->type == OPT_GLOBAL) ++ global_system_variables.*offset= var->save_result.ulong_value; ++ else ++ thd->variables.*offset= var->save_result.ulong_value; ++ return 0; ++} ++ ++/**************************************************************************** ++ Functions to handle log_slow_filter ++****************************************************************************/ ++ ++/* Based upon sys_var::check_enum() */ ++ ++bool sys_var_thd_msl_flag::check(THD *thd, set_var *var) ++{ ++ char buff[2 * STRING_BUFFER_USUAL_SIZE]; ++ String str(buff, sizeof(buff), &my_charset_latin1), *res; ++ ++ if (var->value->result_type() == STRING_RESULT) ++ { ++ ulong filter= this->none_val; ++ if (!(res=var->value->val_str(&str)) || ++ (var->save_result.ulong_value= ++ (ulong) (filter= msl_flag_resolve_by_name(this->flags, res->ptr(), this->none_val, ++ this->invalid_val))) == this->invalid_val) ++ goto err; ++ return 0; ++ } ++ ++err: ++ my_error(ER_WRONG_ARGUMENTS, MYF(0), var->var->name); ++ return 1; ++} ++ ++byte *sys_var_thd_msl_flag::value_ptr(THD *thd, enum_var_type type, ++ LEX_STRING *base) ++{ ++ ulong val; ++ val= ((type == OPT_GLOBAL) ? global_system_variables.*offset : ++ thd->variables.*offset); ++ msl_flag_get_name(this->flags, this->flags_string, val); ++ return (byte *) this->flags_string; ++} ++ ++ ++void sys_var_thd_msl_flag::set_default(THD *thd, enum_var_type type) ++{ ++ if (type == OPT_GLOBAL) ++ global_system_variables.*offset= (ulong) this->default_val; ++ else ++ thd->variables.*offset= (ulong) (global_system_variables.*offset); ++} ++ ++ ++bool sys_var_thd_msl_flag::update(THD *thd, set_var *var) ++{ ++ if (var->type == OPT_GLOBAL) ++ global_system_variables.*offset= var->save_result.ulong_value; ++ else ++ thd->variables.*offset= var->save_result.ulong_value; ++ return 0; ++} ++ + /**************************************************************************** + Functions to handle table_type + ****************************************************************************/ +diff -r 04958490fc6d sql/set_var.h +--- a/sql/set_var.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/set_var.h Tue Feb 17 22:33:00 2009 -0800 +@@ -132,6 +132,7 @@ + }; + + ++ + class sys_var_ulonglong_ptr :public sys_var + { + public: +@@ -168,6 +169,13 @@ + bool check_update_type(Item_result type) { return 0; } + }; + ++class sys_var_log_slow :public sys_var_bool_ptr ++{ ++public: ++ sys_var_log_slow(const char *name_arg, my_bool *value_arg) ++ :sys_var_bool_ptr(name_arg, value_arg) {} ++ bool update(THD *thd, set_var *var); ++}; + + class sys_var_bool_const_ptr : public sys_var + { +@@ -340,7 +348,6 @@ + } + }; + +- + class sys_var_thd_ulong :public sys_var_thd + { + sys_check_func check_func; +@@ -360,7 +367,6 @@ + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); + }; + +- + class sys_var_thd_ha_rows :public sys_var_thd + { + public: +@@ -377,7 +383,6 @@ + SHOW_TYPE show_type() { return SHOW_HA_ROWS; } + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); + }; +- + + class sys_var_thd_ulonglong :public sys_var_thd + { +@@ -407,6 +412,19 @@ + } + }; + ++class sys_var_thd_microtime :public sys_var_thd_ulonglong ++{ ++public: ++ sys_var_thd_microtime(const char *name_arg, ulonglong SV::*offset_arg) ++ :sys_var_thd_ulonglong(name_arg, offset_arg) ++ {} ++ SHOW_TYPE show_type() { return SHOW_MICROTIME; } ++ bool check(THD *thd, set_var *var); ++ bool check_update_type(Item_result type) ++ { ++ return type != INT_RESULT && type != DECIMAL_RESULT; ++ } ++}; + + class sys_var_thd_bool :public sys_var_thd + { +@@ -477,6 +495,66 @@ + ulong *length); + }; + ++ ++class sys_var_thd_msl_option :public sys_var_thd ++{ ++protected: ++ ulong SV::*offset; ++ const ulong none_val; ++ const ulong default_val; ++ const ulong invalid_val; ++ const struct msl_opts *opts; ++public: ++ sys_var_thd_msl_option(const char *name_arg, ulong SV::*offset_arg, ++ const ulong none_val_arg, ++ const ulong default_val_arg, ++ const ulong invalid_val_arg, ++ const struct msl_opts *opts_arg) ++ :sys_var_thd(name_arg), offset(offset_arg), none_val(none_val_arg), ++ default_val(default_val_arg), invalid_val(invalid_val_arg), ++ opts(opts_arg) ++ {} ++ bool check(THD *thd, set_var *var); ++ SHOW_TYPE show_type() { return SHOW_CHAR; } ++ bool check_update_type(Item_result type) ++ { ++ return type != STRING_RESULT; /* Only accept strings */ ++ } ++ void set_default(THD *thd, enum_var_type type); ++ bool update(THD *thd, set_var *var); ++ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); ++}; ++ ++ ++class sys_var_thd_msl_flag :public sys_var_thd ++{ ++protected: ++ char flags_string[2 * STRING_BUFFER_USUAL_SIZE]; ++ ulong SV::*offset; ++ const ulong none_val; ++ const ulong default_val; ++ const ulong invalid_val; ++ const struct msl_opts *flags; ++public: ++ sys_var_thd_msl_flag(const char *name_arg, ulong SV::*offset_arg, ++ const ulong none_val_arg, ++ const ulong default_val_arg, ++ const ulong invalid_val_arg, ++ const struct msl_opts *flags_arg) ++ :sys_var_thd(name_arg), offset(offset_arg), none_val(none_val_arg), ++ default_val(default_val_arg), invalid_val(invalid_val_arg), ++ flags(flags_arg) ++ {} ++ bool check(THD *thd, set_var *var); ++ SHOW_TYPE show_type() { return SHOW_CHAR; } ++ bool check_update_type(Item_result type) ++ { ++ return type != STRING_RESULT; /* Only accept strings */ ++ } ++ void set_default(THD *thd, enum_var_type type); ++ bool update(THD *thd, set_var *var); ++ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); ++}; + + class sys_var_thd_storage_engine :public sys_var_thd + { +@@ -1087,3 +1165,11 @@ + bool process_key_caches(int (* func) (const char *name, KEY_CACHE *)); + void delete_elements(I_List<NAMED_LIST> *list, + void (*free_element)(const char*, gptr)); ++ ++/* Slow log functions */ ++ ++ulong msl_option_resolve_by_name(const struct msl_opts *opts, const char *name, ulong len); ++ulong msl_flag_resolve_by_name(const struct msl_opts *opts, const char *names_list, ++ const ulong none_val, const ulong invalid_val); ++const char *msl_option_get_name(const struct msl_opts *opts, ulong val); ++char *msl_flag_get_name(const struct msl_opts *opts, char *buf, ulong val); +diff -r 04958490fc6d sql/slave.cc +--- a/sql/slave.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/slave.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -2925,6 +2925,12 @@ + + MAX_LOG_EVENT_HEADER; /* note, incr over the global not session var */ + thd->slave_thread = 1; + set_slave_thread_options(thd); ++ if (opt_log_slow_slave_statements) ++ { ++ thd->enable_slow_log= TRUE; ++ /* Slave thread is excluded from rate limiting the slow log writes. */ ++ thd->write_to_slow_log= TRUE; ++ } + thd->client_capabilities = CLIENT_LOCAL_FILES; + thd->real_id=pthread_self(); + pthread_mutex_lock(&LOCK_thread_count); +diff -r 04958490fc6d sql/sql_cache.cc +--- a/sql/sql_cache.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/sql_cache.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -1341,6 +1341,7 @@ + + thd->limit_found_rows = query->found_rows(); + thd->status_var.last_query_cost= 0.0; ++ thd->query_plan_flags|= QPLAN_QC; + + BLOCK_UNLOCK_RD(query_block); + DBUG_RETURN(1); // Result sent to client +@@ -1348,6 +1349,7 @@ + err_unlock: + STRUCT_UNLOCK(&structure_guard_mutex); + err: ++ thd->query_plan_flags|= QPLAN_QC_NO; + DBUG_RETURN(0); // Query was not cached + } + +diff -r 04958490fc6d sql/sql_class.cc +--- a/sql/sql_class.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/sql_class.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -188,7 +188,7 @@ + lock_id(&main_lock_id), + user_time(0), in_sub_stmt(0), global_read_lock(0), is_fatal_error(0), + transaction_rollback_request(0), is_fatal_sub_stmt_error(0), +- rand_used(0), time_zone_used(0), ++ rand_used(0), time_zone_used(0), user_timer(0), + last_insert_id_used(0), last_insert_id_used_bin_log(0), insert_id_used(0), + clear_next_insert_id(0), in_lock_tables(0), bootstrap(0), + derived_tables_processing(FALSE), spcont(NULL), +@@ -2224,6 +2224,12 @@ + backup->cuted_fields= cuted_fields; + backup->client_capabilities= client_capabilities; + backup->savepoints= transaction.savepoints; ++ backup->innodb_io_reads= innodb_io_reads; ++ backup->innodb_io_read= innodb_io_read; ++ backup->innodb_io_reads_wait_timer= innodb_io_reads_wait_timer; ++ backup->innodb_lock_que_wait_timer= innodb_lock_que_wait_timer; ++ backup->innodb_innodb_que_wait_timer= innodb_innodb_que_wait_timer; ++ backup->innodb_page_access= innodb_page_access; + + if (!lex->requires_prelocking() || is_update_query(lex->sql_command)) + options&= ~OPTION_BIN_LOG; +@@ -2240,7 +2246,13 @@ + sent_row_count= 0; + cuted_fields= 0; + transaction.savepoints= 0; +- ++ innodb_io_reads= 0; ++ innodb_io_read= 0; ++ innodb_io_reads_wait_timer= 0; ++ innodb_lock_que_wait_timer= 0; ++ innodb_innodb_que_wait_timer= 0; ++ innodb_page_access= 0; ++ + /* Surpress OK packets in case if we will execute statements */ + net.no_send_ok= TRUE; + } +@@ -2293,6 +2305,12 @@ + */ + examined_row_count+= backup->examined_row_count; + cuted_fields+= backup->cuted_fields; ++ innodb_io_reads+= backup->innodb_io_reads; ++ innodb_io_read+= backup->innodb_io_read; ++ innodb_io_reads_wait_timer+= backup->innodb_io_reads_wait_timer; ++ innodb_lock_que_wait_timer+= backup->innodb_lock_que_wait_timer; ++ innodb_innodb_que_wait_timer+= backup->innodb_innodb_que_wait_timer; ++ innodb_page_access+= backup->innodb_page_access; + } + + +diff -r 04958490fc6d sql/sql_class.h +--- a/sql/sql_class.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/sql_class.h Tue Feb 17 22:33:00 2009 -0800 +@@ -43,6 +43,7 @@ + extern char internal_table_name[2]; + extern char empty_c_string[1]; + extern const char **errmesg; ++extern ulonglong frequency; + + #define TC_LOG_PAGE_SIZE 8192 + #define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE) +@@ -321,7 +322,7 @@ + bool write(THD *thd, enum enum_server_command command, + const char *format, ...) ATTRIBUTE_FORMAT(printf, 4, 5); + bool write(THD *thd, const char *query, uint query_length, +- time_t query_start=0); ++ time_t query_start=0, ulonglong query_start_timer=0); + bool write(Log_event* event_info); // binary log write + bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event); + +@@ -527,13 +528,14 @@ + ulong auto_increment_increment, auto_increment_offset; + ulong bulk_insert_buff_size; + ulong join_buff_size; +- ulong long_query_time; ++ ulonglong long_query_time; + ulong max_allowed_packet; + ulong max_error_count; + ulong max_length_for_sort_data; + ulong max_sort_length; + ulong max_tmp_tables; + ulong max_insert_delayed_threads; ++ ulong min_examined_row_limit; + ulong multi_range_count; + ulong myisam_repair_threads; + ulong myisam_sort_buff_size; +@@ -549,10 +551,13 @@ + ulong preload_buff_size; + ulong profiling_history_size; + ulong query_cache_type; ++ ulong log_slow_rate_limit; + ulong read_buff_size; + ulong read_rnd_buff_size; + ulong div_precincrement; + ulong sortbuff_size; ++ ulong log_slow_filter; ++ ulong log_slow_verbosity; + ulong table_type; + ulong tx_isolation; + ulong completion_type; +@@ -1128,6 +1133,12 @@ + uint in_sub_stmt; + bool enable_slow_log, insert_id_used, clear_next_insert_id; + bool last_insert_id_used; ++ ulong innodb_io_reads; ++ ulonglong innodb_io_read; ++ ulong innodb_io_reads_wait_timer; ++ ulong innodb_lock_que_wait_timer; ++ ulong innodb_innodb_que_wait_timer; ++ ulong innodb_page_access; + my_bool no_send_ok; + SAVEPOINT *savepoints; + }; +@@ -1184,6 +1195,11 @@ + class THD :public Statement, + public Open_tables_state + { ++private: ++ inline ulonglong query_start_timer() { return start_timer; } ++ inline void set_timer() { if (user_timer) start_timer=timer_after_lock=user_timer; else timer_after_lock=my_timer(&start_timer, frequency); } ++ inline void end_timer() { my_timer(&start_timer, frequency); } ++ inline void lock_timer() { my_timer(&timer_after_lock, frequency); } + public: + /* + Constant for THD::where initialization in the beginning of every query. +@@ -1292,10 +1308,24 @@ + */ + const char *where; + time_t start_time,time_after_lock,user_time; ++ ulonglong start_timer,timer_after_lock, user_timer; + time_t connect_time,thr_create_time; // track down slow pthread_create + thr_lock_type update_lock_default; + Delayed_insert *di; + ++ bool write_to_slow_log; ++ ++ bool innodb_was_used; ++ ulong innodb_io_reads; ++ ulonglong innodb_io_read; ++ ulong innodb_io_reads_wait_timer; ++ ulong innodb_lock_que_wait_timer; ++ ulong innodb_innodb_que_wait_timer; ++ ulong innodb_page_access; ++ ++ ulong query_plan_flags; ++ ulong query_plan_fsort_passes; ++ + /* <> 0 if we are inside of trigger or stored function. */ + uint in_sub_stmt; + +@@ -1685,11 +1715,11 @@ + sql_print_information("time() failed with %d", errno); + } + +- inline time_t query_start() { query_start_used=1; return start_time; } +- inline void set_time() { if (user_time) start_time=time_after_lock=user_time; else { safe_time(&start_time); time_after_lock= start_time; }} +- inline void end_time() { safe_time(&start_time); } +- inline void set_time(time_t t) { time_after_lock=start_time=user_time=t; } +- inline void lock_time() { safe_time(&time_after_lock); } ++ inline time_t query_start() { query_start_timer(); query_start_used=1; return start_time; } ++ inline void set_time() { set_timer(); if (user_time) start_time=time_after_lock=user_time; else { safe_time(&start_time); time_after_lock= start_time; }} ++ inline void end_time() { end_timer(); safe_time(&start_time); } ++ inline void set_time(time_t t) { set_timer(); time_after_lock=start_time=user_time=t; } ++ inline void lock_time() { lock_timer(); safe_time(&time_after_lock); } + inline void insert_id(ulonglong id_arg) + { + last_insert_id= id_arg; +diff -r 04958490fc6d sql/sql_parse.cc +--- a/sql/sql_parse.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/sql_parse.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -20,6 +20,7 @@ + #include <m_ctype.h> + #include <myisam.h> + #include <my_dir.h> ++#include <my_time.h> + + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" +@@ -1227,6 +1228,15 @@ + my_net_set_read_timeout(net, thd->variables.net_read_timeout); + my_net_set_write_timeout(net, thd->variables.net_write_timeout); + ++ /* ++ If rate limiting of slow log writes is enabled, decide whether to log this ++ new thread's queries or not. Uses extremely simple algorithm. :) ++ */ ++ thd->write_to_slow_log= FALSE; ++ if (thd->variables.log_slow_rate_limit <= 1 || ++ (thd->thread_id % thd->variables.log_slow_rate_limit) == 0) ++ thd->write_to_slow_log= TRUE; ++ + while (!net->error && net->vio != 0 && + !(thd->killed == THD::KILL_CONNECTION)) + { +@@ -2353,27 +2363,55 @@ + return; // Don't set time for sub stmt + + start_of_query= thd->start_time; ++ ulonglong start_of_query_timer= thd->start_timer; + thd->end_time(); // Set start time ++ ++ ++ /* Follow the slow log filter configuration. */ ++ if (thd->variables.log_slow_filter != SLOG_F_NONE && ++ (!(thd->variables.log_slow_filter & thd->query_plan_flags) || ++ ((thd->variables.log_slow_filter & SLOG_F_QC_NO) && ++ (thd->query_plan_flags & QPLAN_QC)))) ++ return; ++ ++ /* ++ Low long_query_time value most likely means user is debugging stuff and even ++ though some thread's queries are not supposed to be logged b/c of the rate ++ limit, if one of them takes long enough (>= 1 second) it will be sensible ++ to make an exception and write to slow log anyway. ++ */ ++ if (opt_use_global_long_query_time) ++ thd->variables.long_query_time = global_system_variables.long_query_time; ++ if (thd->write_to_slow_log != TRUE && thd->variables.long_query_time < 1000000 && ++ (ulong) (thd->start_timer - thd->timer_after_lock) >= 1000000) ++ thd->write_to_slow_log= TRUE; ++ ++ /* Do not log this thread's queries due to rate limiting. */ ++ if (thd->write_to_slow_log != TRUE) ++ return; + + /* + Do not log administrative statements unless the appropriate option is + set; do not log into slow log if reading from backup. + */ +- if (thd->enable_slow_log && !thd->user_time) ++ if (thd->enable_slow_log && ++ (!thd->user_time || (thd->slave_thread && opt_log_slow_slave_statements)) ++ ) + { + thd_proc_info(thd, "logging slow query"); + +- if ((ulong) (thd->start_time - thd->time_after_lock) > +- thd->variables.long_query_time || +- (thd->server_status & +- (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) && +- opt_log_queries_not_using_indexes && +- /* == SQLCOM_END unless this is a SHOW command */ +- thd->lex->orig_sql_command == SQLCOM_END) ++ if (((ulong) (thd->start_timer - thd->timer_after_lock) >= ++ thd->variables.long_query_time || ++ (thd->server_status & ++ (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) && ++ opt_log_queries_not_using_indexes && ++ /* == SQLCOM_END unless this is a SHOW command */ ++ thd->lex->orig_sql_command == SQLCOM_END) && ++ thd->examined_row_count >= thd->variables.min_examined_row_limit) + { + thd_proc_info(thd, "logging slow query"); + thd->status_var.long_query_count++; +- mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query); ++ mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query, start_of_query_timer); + } + } + } +@@ -2664,6 +2702,8 @@ + context.resolve_in_table_list_only((TABLE_LIST*)select_lex-> + table_list.first); + ++ /* Reset the counter at all cases for the extended slow query log */ ++ thd->row_count= 1; + /* + Reset warning count for each query that uses tables + A better approach would be to reset this for any commands +@@ -6084,6 +6124,15 @@ + thd->total_warn_count=0; // Warnings for this query + thd->rand_used= 0; + thd->sent_row_count= thd->examined_row_count= 0; ++ thd->innodb_was_used= FALSE; ++ thd->innodb_io_reads= 0; ++ thd->innodb_io_read= 0; ++ thd->innodb_io_reads_wait_timer= 0; ++ thd->innodb_lock_que_wait_timer= 0; ++ thd->innodb_innodb_que_wait_timer= 0; ++ thd->innodb_page_access= 0; ++ thd->query_plan_flags= QPLAN_NONE; ++ thd->query_plan_fsort_passes= 0; + } + DBUG_VOID_RETURN; + } +diff -r 04958490fc6d sql/sql_select.cc +--- a/sql/sql_select.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/sql_select.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -6224,8 +6224,11 @@ + { + join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED; + if (statistics) ++ { + statistic_increment(join->thd->status_var.select_scan_count, + &LOCK_status); ++ join->thd->query_plan_flags|= QPLAN_FULL_SCAN; ++ } + } + } + else +@@ -6240,8 +6243,11 @@ + { + join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED; + if (statistics) ++ { + statistic_increment(join->thd->status_var.select_full_join_count, + &LOCK_status); ++ join->thd->query_plan_flags|= QPLAN_FULL_JOIN; ++ } + } + } + if (!table->no_keyread) +@@ -9305,6 +9311,7 @@ + (ulong) rows_limit,test(group))); + + statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status); ++ thd->query_plan_flags|= QPLAN_TMP_TABLE; + + if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) + temp_pool_slot = bitmap_set_next(&temp_pool); +@@ -10165,6 +10172,7 @@ + } + statistic_increment(table->in_use->status_var.created_tmp_disk_tables, + &LOCK_status); ++ table->in_use->query_plan_flags|= QPLAN_TMP_DISK; + table->s->db_record_offset= 1; + DBUG_RETURN(0); + err: +diff -r 04958490fc6d sql/sql_show.cc +--- a/sql/sql_show.cc Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/sql_show.cc Tue Feb 17 22:33:00 2009 -0800 +@@ -1544,6 +1544,12 @@ + value= ((char *) status_var + (ulonglong) value); + case SHOW_LONGLONG: + end= longlong10_to_str(*(longlong*) value, buff, 10); ++ break; ++ case SHOW_MICROTIME: ++ show_type= ((sys_var*) value)->show_type(); ++ value= (char*) ((sys_var*) value)->value_ptr(thd, value_type, ++ &null_lex_str); ++ end= buff + sprintf(buff, "%f", (((double) (*(ulonglong*)value))) / 1000000.0); + break; + case SHOW_HA_ROWS: + end= longlong10_to_str((longlong) *(ha_rows*) value, buff, 10); +diff -r 04958490fc6d sql/structs.h +--- a/sql/structs.h Tue Feb 17 22:32:27 2009 -0800 ++++ b/sql/structs.h Tue Feb 17 22:33:00 2009 -0800 +@@ -168,8 +168,8 @@ + enum SHOW_TYPE + { + SHOW_UNDEF, +- SHOW_LONG, SHOW_LONGLONG, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, +- SHOW_DOUBLE_STATUS, ++ SHOW_LONG, SHOW_LONGLONG, SHOW_MICROTIME, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, ++ SHOW_DOUBLE_STATUS, + SHOW_BOOL, SHOW_MY_BOOL, SHOW_OPENTABLES, SHOW_STARTTIME, SHOW_QUERIES, + SHOW_LONG_CONST, SHOW_INT_CONST, SHOW_HAVE, SHOW_SYS, SHOW_HA_ROWS, + SHOW_VARS, diff --git a/percona/5.0.77-b13/mirror_binlog.patch b/percona/5.0.77-b13/mirror_binlog.patch new file mode 100644 index 0000000..4dae799 --- /dev/null +++ b/percona/5.0.77-b13/mirror_binlog.patch @@ -0,0 +1,2694 @@ +diff -r 66cc9e0a6768 mysql-test/lib/mtr_cases.pl +--- a/mysql-test/lib/mtr_cases.pl Thu Dec 04 21:37:12 2008 -0800 ++++ b/mysql-test/lib/mtr_cases.pl Thu Dec 04 21:46:15 2008 -0800 +@@ -334,6 +334,10 @@ + + $tinfo->{'slave_num'}= 1; # Default for rpl* tests, use one slave + ++ if ( $tname eq 'rpl_mirror_binlog' ) ++ { ++ $tinfo->{'slave_num'}= 3; ++ } + } + + if ( defined mtr_match_prefix($tname,"federated") ) +@@ -344,15 +348,20 @@ + + my $master_opt_file= "$testdir/$tname-master.opt"; + my $slave_opt_file= "$testdir/$tname-slave.opt"; +- my $slave_mi_file= "$testdir/$tname.slave-mi"; ++ my $slave_mi_files= ["$testdir/$tname.slave-mi", ++ "$testdir/$tname.1.slave-mi", ++ "$testdir/$tname.2.slave-mi"]; + my $master_sh= "$testdir/$tname-master.sh"; + my $slave_sh= "$testdir/$tname-slave.sh"; + my $disabled_file= "$testdir/$tname.disabled"; + my $im_opt_file= "$testdir/$tname-im.opt"; + +- $tinfo->{'master_opt'}= []; +- $tinfo->{'slave_opt'}= []; +- $tinfo->{'slave_mi'}= []; ++ $tinfo->{'master_opt'}= []; ++ $tinfo->{'slave_opt'}= []; ++ $tinfo->{'slave_mi'}= {}; ++ $tinfo->{'slave_mi'}{0}= []; ++ $tinfo->{'slave_mi'}{1}= []; ++ $tinfo->{'slave_mi'}{2}= []; + + if ( -f $master_opt_file ) + { +@@ -427,9 +436,14 @@ + push(@{$tinfo->{'slave_opt'}}, @$slave_opt); + } + +- if ( -f $slave_mi_file ) ++ my $mi_idx= 0; ++ foreach my $slave_mi_file ( @$slave_mi_files ) + { +- $tinfo->{'slave_mi'}= mtr_get_opts_from_file($slave_mi_file); ++ if ( -f $slave_mi_file ) ++ { ++ $tinfo->{'slave_mi'}{$mi_idx}= mtr_get_opts_from_file($slave_mi_file); ++ } ++ $mi_idx+= 1; + } + + if ( -f $master_sh ) +diff -r 66cc9e0a6768 mysql-test/mysql-test-run.pl +--- a/mysql-test/mysql-test-run.pl Thu Dec 04 21:37:12 2008 -0800 ++++ b/mysql-test/mysql-test-run.pl Thu Dec 04 21:46:15 2008 -0800 +@@ -275,6 +275,7 @@ + our $opt_stress_test_file= ""; + + our $opt_warnings; ++our $opt_slave_innodb= 0; + + our $opt_skip_ndbcluster= 0; + our $opt_skip_ndbcluster_slave= 0; +@@ -299,6 +300,8 @@ + our $used_binlog_format; + our $used_default_engine; + our $debug_compiled_binaries; ++ ++our $current_testname= ""; + + our %mysqld_variables; + +@@ -645,6 +648,7 @@ + 'testcase-timeout=i' => \$opt_testcase_timeout, + 'suite-timeout=i' => \$opt_suite_timeout, + 'warnings|log-warnings' => \$opt_warnings, ++ 'slave-innodb' => \$opt_slave_innodb, + + # Options which are no longer used + (map { $_ => \&warn_about_removed_option } @removed_options), +@@ -1001,6 +1005,14 @@ + { + $ENV{'BIG_TEST'}= 1; + } ++ ++ # -------------------------------------------------------------------------- ++ # Big test flags ++ # -------------------------------------------------------------------------- ++ if ( $opt_big_test ) ++ { ++ $ENV{'BIG_TEST'}= 1; ++ } + + # -------------------------------------------------------------------------- + # Gcov flag +@@ -1885,7 +1897,9 @@ + $ENV{'SLAVE_MYSOCK'}= $slave->[0]->{'path_sock'}; + $ENV{'SLAVE_MYPORT'}= $slave->[0]->{'port'}; + $ENV{'SLAVE_MYPORT1'}= $slave->[1]->{'port'}; ++ $ENV{'SLAVE_MYSOCK1'}= $slave->[1]->{'path_sock'}; + $ENV{'SLAVE_MYPORT2'}= $slave->[2]->{'port'}; ++ $ENV{'SLAVE_MYSOCK2'}= $slave->[2]->{'path_sock'}; + $ENV{'MYSQL_TCP_PORT'}= $mysqld_variables{'port'}; + $ENV{'DEFAULT_MASTER_PORT'}= $mysqld_variables{'master-port'}; + +@@ -2375,6 +2389,8 @@ + if ( ! $glob_win32 ) + { + symlink("$glob_mysql_test_dir/std_data", "$opt_vardir/std_data_ln"); ++ my @a = ("chmod", "-R", "o+r", "$glob_mysql_test_dir/std_data"); ++ system(@a) == 0 or die "system @ failed: $?" + } + else + { +@@ -3466,6 +3482,8 @@ + $ENV{'TZ'}= $tinfo->{'timezone'}; + mtr_verbose("Setting timezone: $tinfo->{'timezone'}"); + ++ $current_testname= $tinfo->{'name'}; ++ + my $master_restart= run_testcase_need_master_restart($tinfo); + my $slave_restart= run_testcase_need_slave_restart($tinfo); + +@@ -3881,7 +3899,8 @@ + unless $mysqld->{'type'} eq 'slave'; + + mtr_add_arg($args, "%s--init-rpl-role=slave", $prefix); +- if (! ( $opt_skip_slave_binlog || $skip_binlog )) ++ ++ if (! ($opt_skip_slave_binlog or ($current_testname eq 'rpl_mirror_binlog')) ) + { + mtr_add_arg($args, "%s--log-bin=%s/log/slave%s-bin", $prefix, + $opt_vardir, $sidx); # FIXME use own dir for binlogs +@@ -4568,7 +4587,7 @@ + if ( ! $slave->[$idx]->{'pid'} ) + { + mysqld_start($slave->[$idx],$tinfo->{'slave_opt'}, +- $tinfo->{'slave_mi'}); ++ $tinfo->{'slave_mi'}{$idx}); + + } + } +@@ -4580,7 +4599,6 @@ + # Wait for clusters to start + foreach my $cluster (@{$clusters}) + { +- + next if !$cluster->{'pid'}; + + if (ndbcluster_wait_started($cluster, "")) +@@ -5179,6 +5197,7 @@ + skip-im Don't start IM, and skip the IM test cases + big-test Set the environment variable BIG_TEST, which can be + checked from test cases. ++ + + Options that specify ports + +diff -r 66cc9e0a6768 mysql-test/r/rpl_mirror_binlog.result +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/r/rpl_mirror_binlog.result Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,441 @@ ++stop slave; ++drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; ++reset master; ++reset slave; ++drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; ++start slave; ++drop table if exists t1; ++create table t1(n int) engine = InnoDB; ++insert into t1 values (300); ++insert into t1 values (299); ++insert into t1 values (298); ++insert into t1 values (297); ++insert into t1 values (296); ++insert into t1 values (295); ++insert into t1 values (294); ++insert into t1 values (293); ++insert into t1 values (292); ++insert into t1 values (291); ++insert into t1 values (290); ++insert into t1 values (289); ++insert into t1 values (288); ++insert into t1 values (287); ++insert into t1 values (286); ++insert into t1 values (285); ++insert into t1 values (284); ++insert into t1 values (283); ++insert into t1 values (282); ++insert into t1 values (281); ++insert into t1 values (280); ++insert into t1 values (279); ++insert into t1 values (278); ++insert into t1 values (277); ++insert into t1 values (276); ++insert into t1 values (275); ++insert into t1 values (274); ++insert into t1 values (273); ++insert into t1 values (272); ++insert into t1 values (271); ++insert into t1 values (270); ++insert into t1 values (269); ++insert into t1 values (268); ++insert into t1 values (267); ++insert into t1 values (266); ++insert into t1 values (265); ++insert into t1 values (264); ++insert into t1 values (263); ++insert into t1 values (262); ++insert into t1 values (261); ++insert into t1 values (260); ++insert into t1 values (259); ++insert into t1 values (258); ++insert into t1 values (257); ++insert into t1 values (256); ++insert into t1 values (255); ++insert into t1 values (254); ++insert into t1 values (253); ++insert into t1 values (252); ++insert into t1 values (251); ++insert into t1 values (250); ++insert into t1 values (249); ++insert into t1 values (248); ++insert into t1 values (247); ++insert into t1 values (246); ++insert into t1 values (245); ++insert into t1 values (244); ++insert into t1 values (243); ++insert into t1 values (242); ++insert into t1 values (241); ++insert into t1 values (240); ++insert into t1 values (239); ++insert into t1 values (238); ++insert into t1 values (237); ++insert into t1 values (236); ++insert into t1 values (235); ++insert into t1 values (234); ++insert into t1 values (233); ++insert into t1 values (232); ++insert into t1 values (231); ++insert into t1 values (230); ++insert into t1 values (229); ++insert into t1 values (228); ++insert into t1 values (227); ++insert into t1 values (226); ++insert into t1 values (225); ++insert into t1 values (224); ++insert into t1 values (223); ++insert into t1 values (222); ++insert into t1 values (221); ++insert into t1 values (220); ++insert into t1 values (219); ++insert into t1 values (218); ++insert into t1 values (217); ++insert into t1 values (216); ++insert into t1 values (215); ++insert into t1 values (214); ++insert into t1 values (213); ++insert into t1 values (212); ++insert into t1 values (211); ++insert into t1 values (210); ++insert into t1 values (209); ++insert into t1 values (208); ++insert into t1 values (207); ++insert into t1 values (206); ++insert into t1 values (205); ++insert into t1 values (204); ++insert into t1 values (203); ++insert into t1 values (202); ++insert into t1 values (201); ++insert into t1 values (200); ++insert into t1 values (199); ++insert into t1 values (198); ++insert into t1 values (197); ++insert into t1 values (196); ++insert into t1 values (195); ++insert into t1 values (194); ++insert into t1 values (193); ++insert into t1 values (192); ++insert into t1 values (191); ++insert into t1 values (190); ++insert into t1 values (189); ++insert into t1 values (188); ++insert into t1 values (187); ++insert into t1 values (186); ++insert into t1 values (185); ++insert into t1 values (184); ++insert into t1 values (183); ++insert into t1 values (182); ++insert into t1 values (181); ++insert into t1 values (180); ++insert into t1 values (179); ++insert into t1 values (178); ++insert into t1 values (177); ++insert into t1 values (176); ++insert into t1 values (175); ++insert into t1 values (174); ++insert into t1 values (173); ++insert into t1 values (172); ++insert into t1 values (171); ++insert into t1 values (170); ++insert into t1 values (169); ++insert into t1 values (168); ++insert into t1 values (167); ++insert into t1 values (166); ++insert into t1 values (165); ++insert into t1 values (164); ++insert into t1 values (163); ++insert into t1 values (162); ++insert into t1 values (161); ++insert into t1 values (160); ++insert into t1 values (159); ++insert into t1 values (158); ++insert into t1 values (157); ++insert into t1 values (156); ++insert into t1 values (155); ++insert into t1 values (154); ++insert into t1 values (153); ++insert into t1 values (152); ++insert into t1 values (151); ++insert into t1 values (150); ++insert into t1 values (149); ++insert into t1 values (148); ++insert into t1 values (147); ++insert into t1 values (146); ++insert into t1 values (145); ++insert into t1 values (144); ++insert into t1 values (143); ++insert into t1 values (142); ++insert into t1 values (141); ++insert into t1 values (140); ++insert into t1 values (139); ++insert into t1 values (138); ++insert into t1 values (137); ++insert into t1 values (136); ++insert into t1 values (135); ++insert into t1 values (134); ++insert into t1 values (133); ++insert into t1 values (132); ++insert into t1 values (131); ++insert into t1 values (130); ++insert into t1 values (129); ++insert into t1 values (128); ++insert into t1 values (127); ++insert into t1 values (126); ++insert into t1 values (125); ++insert into t1 values (124); ++insert into t1 values (123); ++insert into t1 values (122); ++insert into t1 values (121); ++insert into t1 values (120); ++insert into t1 values (119); ++insert into t1 values (118); ++insert into t1 values (117); ++insert into t1 values (116); ++insert into t1 values (115); ++insert into t1 values (114); ++insert into t1 values (113); ++insert into t1 values (112); ++insert into t1 values (111); ++insert into t1 values (110); ++insert into t1 values (109); ++insert into t1 values (108); ++insert into t1 values (107); ++insert into t1 values (106); ++insert into t1 values (105); ++insert into t1 values (104); ++insert into t1 values (103); ++insert into t1 values (102); ++insert into t1 values (101); ++insert into t1 values (100); ++insert into t1 values (99); ++insert into t1 values (98); ++insert into t1 values (97); ++insert into t1 values (96); ++insert into t1 values (95); ++insert into t1 values (94); ++insert into t1 values (93); ++insert into t1 values (92); ++insert into t1 values (91); ++insert into t1 values (90); ++insert into t1 values (89); ++insert into t1 values (88); ++insert into t1 values (87); ++insert into t1 values (86); ++insert into t1 values (85); ++insert into t1 values (84); ++insert into t1 values (83); ++insert into t1 values (82); ++insert into t1 values (81); ++insert into t1 values (80); ++insert into t1 values (79); ++insert into t1 values (78); ++insert into t1 values (77); ++insert into t1 values (76); ++insert into t1 values (75); ++insert into t1 values (74); ++insert into t1 values (73); ++insert into t1 values (72); ++insert into t1 values (71); ++insert into t1 values (70); ++insert into t1 values (69); ++insert into t1 values (68); ++insert into t1 values (67); ++insert into t1 values (66); ++insert into t1 values (65); ++insert into t1 values (64); ++insert into t1 values (63); ++insert into t1 values (62); ++insert into t1 values (61); ++insert into t1 values (60); ++insert into t1 values (59); ++insert into t1 values (58); ++insert into t1 values (57); ++insert into t1 values (56); ++insert into t1 values (55); ++insert into t1 values (54); ++insert into t1 values (53); ++insert into t1 values (52); ++insert into t1 values (51); ++insert into t1 values (50); ++insert into t1 values (49); ++insert into t1 values (48); ++insert into t1 values (47); ++insert into t1 values (46); ++insert into t1 values (45); ++insert into t1 values (44); ++insert into t1 values (43); ++insert into t1 values (42); ++insert into t1 values (41); ++insert into t1 values (40); ++insert into t1 values (39); ++insert into t1 values (38); ++insert into t1 values (37); ++insert into t1 values (36); ++insert into t1 values (35); ++insert into t1 values (34); ++insert into t1 values (33); ++insert into t1 values (32); ++insert into t1 values (31); ++insert into t1 values (30); ++insert into t1 values (29); ++insert into t1 values (28); ++insert into t1 values (27); ++insert into t1 values (26); ++insert into t1 values (25); ++insert into t1 values (24); ++insert into t1 values (23); ++insert into t1 values (22); ++insert into t1 values (21); ++insert into t1 values (20); ++insert into t1 values (19); ++insert into t1 values (18); ++insert into t1 values (17); ++insert into t1 values (16); ++insert into t1 values (15); ++insert into t1 values (14); ++insert into t1 values (13); ++insert into t1 values (12); ++insert into t1 values (11); ++insert into t1 values (10); ++insert into t1 values (9); ++insert into t1 values (8); ++insert into t1 values (7); ++insert into t1 values (6); ++insert into t1 values (5); ++insert into t1 values (4); ++insert into t1 values (3); ++insert into t1 values (2); ++insert into t1 values (1); ++"The following are SLAVE." ++select count(distinct n) from t1; ++count(distinct n) ++300 ++select min(n) from t1; ++min(n) ++1 ++select max(n) from t1; ++max(n) ++300 ++show slave status; ++Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master ++Waiting for master to send event 127.0.0.1 root 9306 1 master-bin.000014 2849 # # master-bin.000014 Yes Yes # 0 0 2849 # None 0 No # ++show master status; ++File Position Binlog_Do_DB Binlog_Ignore_DB ++master-bin.000014 2849 ++"The following are SLAVE1." ++start slave; ++select count(distinct n) from t1; ++count(distinct n) ++300 ++select min(n) from t1; ++min(n) ++1 ++select max(n) from t1; ++max(n) ++300 ++show slave status; ++Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master ++Waiting for master to send event 127.0.0.1 root 9308 1 master-bin.000014 2849 # # master-bin.000014 Yes Yes # 0 0 2849 # None 0 No # ++"The following are SLAVE." ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++MASTER_SERVER_ID=2, ++INDEX='replication-log'; ++ERROR HY000: Could not initialize master info structure; more error messages can be found in the MySQL error log ++stop slave; ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++MASTER_SERVER_ID=2, ++INDEX='replication_log'; ++ERROR HY000: Could not initialize master info structure; more error messages can be found in the MySQL error log ++MAKE MASTER REVOKE SESSION WITH KILL; ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++MASTER_SERVER_ID=2, ++INDEX='replication_log' ++ WITH BINLOG; ++MAKE MASTER GRANT SESSION; ++delete from t1 where n > 250; ++select count(distinct n) from t1; ++count(distinct n) ++250 ++"The following are SLAVE1." ++select count(distinct n) from t1; ++count(distinct n) ++250 ++select min(n) from t1; ++min(n) ++1 ++select max(n) from t1; ++max(n) ++250 ++"The following are SLAVE2." ++start slave; ++select count(distinct n) from t1; ++count(distinct n) ++250 ++select min(n) from t1; ++min(n) ++1 ++select max(n) from t1; ++max(n) ++250 ++show slave status; ++Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master ++Waiting for master to send event 127.0.0.1 root 9308 1 master-bin.000015 189 # # master-bin.000015 Yes Yes # 0 0 189 # None 0 No # ++drop table t1; ++drop table t1; ++"The following are SLAVE." ++show master logs; ++Log_name File_size ++master-bin.000001 4214 ++master-bin.000002 4212 ++master-bin.000003 4212 ++master-bin.000004 4212 ++master-bin.000005 4212 ++master-bin.000006 4212 ++master-bin.000007 4212 ++master-bin.000008 4212 ++master-bin.000009 4212 ++master-bin.000010 4194 ++master-bin.000011 4190 ++master-bin.000012 4190 ++master-bin.000013 4190 ++master-bin.000014 2849 ++master-bin.000015 265 ++show master status; ++File Position Binlog_Do_DB Binlog_Ignore_DB ++master-bin.000015 265 ++"The following are SLAVE2." ++show master logs; ++Log_name File_size ++master-bin.000001 4214 ++master-bin.000002 4212 ++master-bin.000003 4212 ++master-bin.000004 4212 ++master-bin.000005 4212 ++master-bin.000006 4212 ++master-bin.000007 4212 ++master-bin.000008 4212 ++master-bin.000009 4212 ++master-bin.000010 4194 ++master-bin.000011 4190 ++master-bin.000012 4190 ++master-bin.000013 4190 ++master-bin.000014 2849 ++master-bin.000015 265 ++show master status; ++File Position Binlog_Do_DB Binlog_Ignore_DB ++master-bin.000015 265 ++purge master logs to 'master-bin.000006'; ++show master logs; ++Log_name File_size ++master-bin.000006 4212 ++master-bin.000007 4212 ++master-bin.000008 4212 ++master-bin.000009 4212 ++master-bin.000010 4194 ++master-bin.000011 4190 ++master-bin.000012 4190 ++master-bin.000013 4190 ++master-bin.000014 2849 ++master-bin.000015 265 ++reset master; ++ERROR HY000: Binlog closed, cannot RESET MASTER +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog-master.opt +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog-master.opt Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,1 @@ ++-O max_binlog_size=4096 +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog-slave.opt +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog-slave.opt Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,1 @@ ++--rpl_mirror_binlog_enabled=1 --log-bin-index=replication_log +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog.1.slave-mi +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog.1.slave-mi Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,1 @@ ++--master-user=root --master-connect-retry=1 --master-host=127.0.0.1 --master-password="" --master-port=9308 --server-id=3 +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog.2.slave-mi +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog.2.slave-mi Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,1 @@ ++--master-user=root --master-connect-retry=1 --master-host=127.0.0.1 --master-password="" --master-port=9308 --server-id=4 +diff -r 66cc9e0a6768 mysql-test/t/rpl_mirror_binlog.test +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mysql-test/t/rpl_mirror_binlog.test Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,119 @@ ++-- source include/master-slave.inc ++-- source include/have_innodb.inc ++connect (slave_sec,localhost,root,,test,$SLAVE_MYPORT1,$SLAVE_MYSOCK1); ++connect (slave_ter,localhost,root,,test,$SLAVE_MYPORT2,$SLAVE_MYSOCK2); ++ ++connection master; ++--disable_warnings ++drop table if exists t1; ++--enable_warnings ++create table t1(n int) engine = InnoDB; ++ ++let $i=300; ++while ($i) ++{ ++ eval insert into t1 values ($i); ++ dec $i; ++} ++ ++save_master_pos; ++ ++connection slave; ++sync_with_master; ++ ++echo "The following are SLAVE."; ++select count(distinct n) from t1; ++select min(n) from t1; ++select max(n) from t1; ++--replace_column 8 # 9 # 18 # 23 # 33 # ++show slave status; ++show master status; ++ ++connection slave_sec; ++echo "The following are SLAVE1."; ++start slave; ++sync_with_master; ++ ++select count(distinct n) from t1; ++select min(n) from t1; ++select max(n) from t1; ++--replace_column 8 # 9 # 18 # 23 # 33 # ++show slave status; ++ ++# make the slave the new master ++connection slave; ++echo "The following are SLAVE."; ++ ++# The first 1201 error is caused by running slave. ++--error 1201 ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++ MASTER_SERVER_ID=2, ++ INDEX='replication-log'; ++stop slave; ++ ++# The second 1201 error is caused by failover mode. ++--error 1201 ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++ MASTER_SERVER_ID=2, ++ INDEX='replication_log'; ++ ++MAKE MASTER REVOKE SESSION WITH KILL; ++MAKE MASTER MASTER_LOG_FILE='master-bin', ++ MASTER_SERVER_ID=2, ++ INDEX='replication_log' ++ WITH BINLOG; ++ ++MAKE MASTER GRANT SESSION; ++ ++delete from t1 where n > 250; ++save_master_pos; ++ ++select count(distinct n) from t1; ++ ++connection slave_sec; ++echo "The following are SLAVE1."; ++ ++sync_with_master; ++select count(distinct n) from t1; ++select min(n) from t1; ++select max(n) from t1; ++ ++connection slave_ter; ++echo "The following are SLAVE2."; ++start slave; ++sync_with_master; ++ ++select count(distinct n) from t1; ++select min(n) from t1; ++select max(n) from t1; ++ ++--replace_column 8 # 9 # 18 # 23 # 33 # ++show slave status; ++ ++connection master; ++drop table t1; ++ ++connection slave; ++drop table t1; ++save_master_pos; ++ ++connection slave_sec; ++sync_with_master; ++ ++connection slave; ++echo "The following are SLAVE."; ++ ++show master logs; ++show master status; ++ ++ ++connection slave_ter; ++echo "The following are SLAVE2."; ++sync_with_master; ++ ++show master logs; ++show master status; ++purge master logs to 'master-bin.000006'; ++show master logs; ++--error 1186 ++reset master; +diff -r 66cc9e0a6768 patch_info/mirror_binlog.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/mirror_binlog.info Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,6 @@ ++File=mirror_binlog.patch ++Name=Mirroring binary logs on slave ++Version=V1 ++Author=Google ++License=GPL ++Comment=contains FastMaster promotion patch +diff -r 66cc9e0a6768 sql/Makefile.am +--- a/sql/Makefile.am Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/Makefile.am Thu Dec 04 21:46:15 2008 -0800 +@@ -68,7 +68,7 @@ + sql_array.h sql_cursor.h \ + examples/ha_example.h ha_archive.h \ + examples/ha_tina.h ha_blackhole.h \ +- ha_federated.h ++ ha_federated.h repl_mule.h + mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + item.cc item_sum.cc item_buff.cc item_func.cc \ + item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \ +@@ -105,7 +105,7 @@ + sp_cache.cc parse_file.cc sql_trigger.cc \ + examples/ha_example.cc ha_archive.cc \ + examples/ha_tina.cc ha_blackhole.cc \ +- ha_federated.cc ++ ha_federated.cc repl_mule.cc + + gen_lex_hash_SOURCES = gen_lex_hash.cc + gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) +diff -r 66cc9e0a6768 sql/Makefile.in +--- a/sql/Makefile.in Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/Makefile.in Thu Dec 04 21:46:15 2008 -0800 +@@ -152,7 +152,7 @@ + sp_rcontext.$(OBJEXT) sp.$(OBJEXT) sp_cache.$(OBJEXT) \ + parse_file.$(OBJEXT) sql_trigger.$(OBJEXT) \ + ha_example.$(OBJEXT) ha_archive.$(OBJEXT) ha_tina.$(OBJEXT) \ +- ha_blackhole.$(OBJEXT) ha_federated.$(OBJEXT) ++ ha_blackhole.$(OBJEXT) ha_federated.$(OBJEXT) repl_mule.$(OBJEXT) + mysqld_OBJECTS = $(am_mysqld_OBJECTS) + mysqld_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) \ + $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_2) \ +@@ -516,7 +516,7 @@ + sql_array.h sql_cursor.h \ + examples/ha_example.h ha_archive.h \ + examples/ha_tina.h ha_blackhole.h \ +- ha_federated.h ++ ha_federated.h repl_mule.h + + mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + item.cc item_sum.cc item_buff.cc item_func.cc \ +@@ -554,7 +554,7 @@ + sp_cache.cc parse_file.cc sql_trigger.cc \ + examples/ha_example.cc ha_archive.cc \ + examples/ha_tina.cc ha_blackhole.cc \ +- ha_federated.cc ++ ha_federated.cc repl_mule.cc + + gen_lex_hash_SOURCES = gen_lex_hash.cc + gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) +@@ -748,6 +748,7 @@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/protocol.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/records.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/repl_failsafe.Po@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/repl_mule.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/set_var.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slave.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sp.Po@am__quote@ +diff -r 66cc9e0a6768 sql/lex.h +--- a/sql/lex.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/lex.h Thu Dec 04 21:46:15 2008 -0800 +@@ -292,6 +292,7 @@ + { "LONGTEXT", SYM(LONGTEXT)}, + { "LOOP", SYM(LOOP_SYM)}, + { "LOW_PRIORITY", SYM(LOW_PRIORITY)}, ++ { "MAKE", SYM(MAKE_SYM)}, + { "MASTER", SYM(MASTER_SYM)}, + { "MASTER_CONNECT_RETRY", SYM(MASTER_CONNECT_RETRY_SYM)}, + { "MASTER_HOST", SYM(MASTER_HOST_SYM)}, +diff -r 66cc9e0a6768 sql/log.cc +--- a/sql/log.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/log.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -79,7 +79,9 @@ + + bool binlog_init() + { +- return !opt_bin_log; ++ if (!opt_bin_log) ++ binlog_hton.prepare = NULL; ++ return 0; /* return !opt_bin_log; */ + } + + static int binlog_close_connection(THD *thd) +@@ -406,6 +408,7 @@ + :bytes_written(0), last_time(0), query_start(0), name(0), + prepared_xids(0), log_type(LOG_CLOSED), file_id(1), open_count(1), + write_error(FALSE), inited(FALSE), need_start_event(TRUE), ++ mule_binlog_(0), + description_event_for_exec(0), description_event_for_queue(0) + { + /* +@@ -506,7 +509,10 @@ + const char *log_name) + { + File index_file_nr= -1; +- DBUG_ASSERT(!my_b_inited(&index_file)); ++ ++ /* If the index is already opened, do not open it again. */ ++ if (my_b_inited(&index_file)) ++ return FALSE; + + /* + First open of this class instance +@@ -750,7 +756,7 @@ + if (file >= 0) + my_close(file,MYF(0)); + end_io_cache(&log_file); +- end_io_cache(&index_file); ++ close_index_file(); + safeFree(name); + log_type= LOG_CLOSED; + DBUG_RETURN(1); +@@ -768,7 +774,10 @@ + int MYSQL_LOG::raw_get_current_log(LOG_INFO* linfo) + { + strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1); +- linfo->pos = my_b_tell(&log_file); ++ if (!mule_binlog_) ++ linfo->pos = my_b_tell(&log_file); ++ else ++ linfo->pos = my_b_filelength(&log_file); + return 0; + } + +@@ -935,6 +944,11 @@ + if (need_lock) + pthread_mutex_lock(&LOCK_index); + safe_mutex_assert_owner(&LOCK_index); ++ ++ if (open_index_file(index_file_name, NULL) != 0) { ++ error = -1; ++ goto err; ++ } + + /* As the file is flushed, we can't get an error here */ + (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0, +@@ -1446,18 +1460,19 @@ + SYNOPSIS + new_file() + need_lock Set to 1 if caller has not locked LOCK_log ++ logfile_name the specified log filename. + + NOTE + The new file name is stored last in the index file + */ + +-void MYSQL_LOG::new_file(bool need_lock) ++void MYSQL_LOG::new_file(bool need_lock, const char* log_filename) + { + char new_name[FN_REFLEN], *new_name_ptr, *old_name; + enum_log_type save_log_type; + + DBUG_ENTER("MYSQL_LOG::new_file"); +- if (!is_open()) ++ if (!is_log_open()) + { + DBUG_PRINT("info",("log is closed")); + DBUG_VOID_RETURN; +@@ -1496,7 +1511,9 @@ + We have to do this here and not in open as we want to store the + new file name in the current binary log file. + */ +- if (generate_new_name(new_name, name)) ++ if (log_filename) { ++ fn_format(new_name,log_filename,mysql_data_home,"",4); ++ } else if (generate_new_name(new_name, name)) + goto end; + new_name_ptr=new_name; + +@@ -1571,7 +1588,7 @@ + bytes_written+= ev->data_written; + DBUG_PRINT("info",("max_size: %lu",max_size)); + if ((uint) my_b_append_tell(&log_file) > max_size) +- new_file(0); ++ new_file(0); + + err: + pthread_mutex_unlock(&LOCK_log); +@@ -1600,8 +1617,14 @@ + bytes_written += len; + } while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint))); + DBUG_PRINT("info",("max_size: %lu",max_size)); +- if ((uint) my_b_append_tell(&log_file) > max_size) +- new_file(0); ++ ++ /* If max_size is BINLOG_NOSWITCH_SIZE, binlog would not switch because ++ * of file size limit. ++ */ ++ if (max_size != BINLOG_NOSWITCH_SIZE && ++ (uint) my_b_append_tell(&log_file) > max_size) { ++ new_file(0); ++ } + + err: + if (!error) +@@ -2492,6 +2515,17 @@ + DBUG_VOID_RETURN; + } + ++int MYSQL_LOG::flush_log_file() { ++ return flush_io_cache(&log_file); ++} ++ ++int MYSQL_LOG::close_index_file() { ++ if (my_b_inited(&index_file)) { ++ end_io_cache(&index_file); ++ my_close(index_file.file, MYF(0)); ++ } ++ return 0; ++} + + /* + Check if a string is a valid number +diff -r 66cc9e0a6768 sql/log_event.h +--- a/sql/log_event.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/log_event.h Thu Dec 04 21:46:15 2008 -0800 +@@ -94,6 +94,14 @@ + #define LINE_TERM_EMPTY 0x4 + #define LINE_START_EMPTY 0x8 + #define ESCAPED_EMPTY 0x10 ++ ++/* This server-id value is used to indicate a special master-info event ++ * in relay-log. ++ * We will enforce in database that replication can not set this value ++ * as the server-id. ++ */ ++#define MASTER_INFO_SERVER_ID 0xffffffff ++ + + /***************************************************************************** + +diff -r 66cc9e0a6768 sql/mysql_priv.h +--- a/sql/mysql_priv.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/mysql_priv.h Thu Dec 04 21:46:15 2008 -0800 +@@ -462,6 +462,7 @@ + /* BINLOG_DUMP options */ + + #define BINLOG_DUMP_NON_BLOCK 1 ++#define BINLOG_MIRROR_CLIENT 0x0004 + + /* sql_show.cc:show_log_files() */ + #define SHOW_LOG_STATUS_FREE "FREE" +@@ -1374,6 +1375,7 @@ + extern const char **errmesg; /* Error messages */ + extern const char *myisam_recover_options_str; + extern const char *in_left_expr_name, *in_additional_cond, *in_having_cond; ++extern char *opt_binlog_index_name; + extern const char * const triggers_file_ext; + extern const char * const trigname_file_ext; + extern Eq_creator eq_creator; +@@ -1875,6 +1877,10 @@ + extern "C" void unireg_abort(int exit_code); + void kill_delayed_threads(void); + bool check_stack_overrun(THD *thd, long margin, char *dummy); ++extern my_bool rpl_mirror_binlog_enabled; ++extern ulong sync_mirror_binlog_period; ++extern my_bool rpl_mirror_binlog_no_replicate; ++extern ulong rpl_mirror_binlog_clients, rpl_mirror_binlog_status; + #else + #define unireg_abort(exit_code) DBUG_RETURN(exit_code) + inline void kill_delayed_threads(void) {} +diff -r 66cc9e0a6768 sql/mysqld.cc +--- a/sql/mysqld.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/mysqld.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -555,6 +555,7 @@ + pthread_mutex_t LOCK_global_user_client_stats; + pthread_mutex_t LOCK_global_table_stats; + pthread_mutex_t LOCK_global_index_stats; ++pthread_mutex_t LOCK_failover_master; + /* + The below lock protects access to two global server variables: + max_prepared_stmt_count and prepared_stmt_count. These variables +@@ -584,13 +585,15 @@ + char *master_ssl_key, *master_ssl_cert; + char *master_ssl_ca, *master_ssl_capath, *master_ssl_cipher; + ++char *opt_binlog_index_name; ++ + /* Static variables */ + + static bool kill_in_progress, segfaulted; + static my_bool opt_do_pstack, opt_bootstrap, opt_myisam_log; + static int cleanup_done; + static ulong opt_specialflag, opt_myisam_block_size; +-static char *opt_logname, *opt_update_logname, *opt_binlog_index_name; ++static char *opt_logname, *opt_update_logname; + static char *opt_tc_heuristic_recover; + static char *mysql_home_ptr, *pidfile_name_ptr; + static char **defaults_argv; +@@ -598,6 +601,32 @@ + + static my_socket unix_sock,ip_sock; + struct rand_struct sql_rand; // used by sql_class.cc:THD::THD() ++ ++/* When set, we are inside a failover slave and deny all non-super access */ ++bool failover_deny_access= 0; ++ ++/* When set, binlog will be mirrored on the replica. */ ++my_bool rpl_mirror_binlog_enabled; ++ ++/* Sync the mirrored binlog to disk after every #th event. */ ++ulong sync_mirror_binlog_period; ++ ++/* The fixed size for replication event buffer. Replication event can exceed ++ * the size. ++ */ ++//ulong rpl_event_buffer_size; ++ ++/* This is a mirror binlog status variable on the primary to indicate how many ++ * mirror binlog servers are connecting. ++ */ ++ulong rpl_mirror_binlog_clients = 0; ++ ++/* This indicates whether mirror binlog is working on a replica database. It ++ * requires: ++ * . rpl_mirror_binlog_enabled = 1 ++ * . the slave I/O thread is running and mirror binlog is also dumped ++ */ ++ulong rpl_mirror_binlog_status = 0; + + /* OS specific variables */ + +@@ -1315,6 +1344,7 @@ + (void) pthread_cond_destroy(&COND_flush_thread_cache); + (void) pthread_cond_destroy(&COND_manager); + (void) pthread_mutex_destroy(&LOCK_stats); ++ (void) pthread_mutex_destroy(&LOCK_failover_master); + (void) pthread_mutex_destroy(&LOCK_global_user_client_stats); + (void) pthread_mutex_destroy(&LOCK_global_table_stats); + (void) pthread_mutex_destroy(&LOCK_global_index_stats); +@@ -3164,6 +3194,7 @@ + (void) pthread_cond_init(&COND_rpl_status, NULL); + #endif + (void) pthread_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST); ++ (void) pthread_mutex_init(&LOCK_failover_master, MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST); +@@ -3398,39 +3429,8 @@ + + if (opt_bin_log) + { +- char buf[FN_REFLEN]; +- const char *ln; +- ln= mysql_bin_log.generate_name(opt_bin_logname, "-bin", 1, buf); +- if (!opt_bin_logname && !opt_binlog_index_name) +- { +- /* +- User didn't give us info to name the binlog index file. +- Picking `hostname`-bin.index like did in 4.x, causes replication to +- fail if the hostname is changed later. So, we would like to instead +- require a name. But as we don't want to break many existing setups, we +- only give warning, not error. +- */ +- sql_print_warning("No argument was provided to --log-bin, and " +- "--log-bin-index was not used; so replication " +- "may break when this MySQL server acts as a " +- "master and has his hostname changed!! Please " +- "use '--log-bin=%s' to avoid this problem.", ln); +- } +- if (ln == buf) +- { +- my_free(opt_bin_logname, MYF(MY_ALLOW_ZERO_PTR)); +- opt_bin_logname=my_strdup(buf, MYF(0)); +- } +- if (mysql_bin_log.open_index_file(opt_binlog_index_name, ln)) +- { +- unireg_abort(1); +- } +- +- /* +- Used to specify which type of lock we need to use for queries of type +- INSERT ... SELECT. This will change when we have row level logging. +- */ +- using_update_log=1; ++ if (make_master_open_index(&opt_bin_logname, opt_binlog_index_name) != 0) ++ unireg_abort(1); + } + + if (xid_cache_init()) +@@ -3480,9 +3480,10 @@ + unireg_abort(1); + } + +- if (opt_bin_log && mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0, +- WRITE_CACHE, 0, max_binlog_size, 0)) +- unireg_abort(1); ++ if (opt_bin_log && ++ make_master(NULL, opt_bin_logname, opt_binlog_index_name, NULL) != 0) { ++ unireg_abort(1); ++ } + + #ifdef HAVE_REPLICATION + if (opt_bin_log && expire_logs_days) +@@ -5036,6 +5037,8 @@ + OPT_INNODB_READ_IO_THREADS, + OPT_INNODB_WRITE_IO_THREADS, + OPT_INNODB_ADAPTIVE_HASH_INDEX, ++ OPT_RPL_MIRROR_BINLOG, ++ OPT_SYNC_MIRROR_BINLOG, + OPT_FEDERATED + }; + +@@ -5725,6 +5728,11 @@ + {"rpl-recovery-rank", OPT_RPL_RECOVERY_RANK, "Undocumented.", + (gptr*) &rpl_recovery_rank, (gptr*) &rpl_recovery_rank, 0, GET_ULONG, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, ++ {"rpl_mirror_binlog_enabled", OPT_RPL_MIRROR_BINLOG, ++ "1 = support mirroring binlogs. 0 = disable mirroring binlogs", ++ (gptr*) &rpl_mirror_binlog_enabled, ++ (gptr*) &rpl_mirror_binlog_enabled, 0, GET_BOOL, NO_ARG, ++ 0, 0, 1, 0, 1, 0}, + {"safe-mode", OPT_SAFE, "Skip some optimize stages (for testing).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + #ifndef TO_BE_DELETED +@@ -5849,6 +5857,11 @@ + {"symbolic-links", 's', "Enable symbolic link support.", + (gptr*) &my_use_symdir, (gptr*) &my_use_symdir, 0, GET_BOOL, NO_ARG, + IF_PURIFY(0,1), 0, 0, 0, 0, 0}, ++ {"sync-mirror-binlog", OPT_SYNC_MIRROR_BINLOG, ++ "Sync the mirrored binlog to disk after every #th event. " ++ "#=0 (the default) does no sync. Syncing slows MySQL down", ++ (gptr*) &sync_mirror_binlog_period, ++ (gptr*) &sync_mirror_binlog_period, 0, GET_ULONG, REQUIRED_ARG, 0, 0, ~0L, 0, 1, 0}, + {"sysdate-is-now", OPT_SYSDATE_IS_NOW, + "Non-default option to alias SYSDATE() to NOW() to make it safe-replicable. Since 5.0, SYSDATE() returns a `dynamic' value different for different invocations, even within the same statement.", + (gptr*) &global_system_variables.sysdate_is_now, +@@ -6625,6 +6638,7 @@ + {"Delayed_errors", (char*) &delayed_insert_errors, SHOW_LONG}, + {"Delayed_insert_threads", (char*) &delayed_insert_threads, SHOW_LONG_CONST}, + {"Delayed_writes", (char*) &delayed_insert_writes, SHOW_LONG}, ++ {"Failover_deny_access", (char*) &failover_deny_access, SHOW_LONG}, + {"Flush_commands", (char*) &refresh_version, SHOW_LONG_CONST}, + {"Handler_commit", (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS}, + {"Handler_delete", (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS}, +diff -r 66cc9e0a6768 sql/repl_mule.cc +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/sql/repl_mule.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,466 @@ ++/* ++ Copyright (C) 2007 Google Inc. ++ ++This program is free software; you can redistribute it and/or ++modify it under the terms of the GNU General Public License ++as published by the Free Software Foundation; either version 2 ++of the License, or (at your option) any later version. ++ ++This program is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with this program; if not, write to the Free Software ++Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++*/ ++ ++#include "mysql_priv.h" ++#include <my_dir.h> ++#include "slave.h" ++#include "repl_mule.h" ++ ++/* max log size: 2GB */ ++#define MAX_LOG_SIZE BINLOG_NOSWITCH_SIZE ++ ++ReplMule::ReplMule(THD* thd, MASTER_INFO *mi, RelayStatus status, ++ my_off_t file_size, const char *binlog_indexname, ++ MYSQL_LOG *binlog, ulong sync_period) ++ : desc_event_(new Format_description_log_event(BINLOG_VERSION)), ++ io_thd_(thd), mi_(mi), status_(status), dump_position_(0L), ++ file_size_(file_size), mule_log_(binlog), ++ mule_log_sync_period_(sync_period), mule_log_event_counter_(0) { ++ char llbuf1[22], llbuf2[22]; ++ ++ DBUG_ENTER("ReplMule::ReplMule"); ++ ++ /* Indicate that we are in replication mule mode. */ ++ mule_log_->set_mule_mode(); ++ ++ strmake(curr_log_filename_, mi->master_log_name, ++ sizeof(curr_log_filename_)-1); ++ strmake(mule_indexname_, binlog_indexname, sizeof(mule_indexname_)-1); ++ ++ /* Open the mule log file */ ++ if (!mule_log_->is_log_open()) { ++ /* Do not open binlog file when master_log_name is not specified. We ++ * are at the I/O thread initialization time and we do not know what ++ * filename we are going to dump. ++ * We wait for the next rotation event to indicate the filename. ++ */ ++ if (strlen(curr_log_filename_) > 0 && ++ mule_log_->open(curr_log_filename_, LOG_BIN, NULL, ++ SEQ_READ_APPEND, true, MAX_LOG_SIZE, 0) != 0) { ++ sql_print_error("ReplMule: open binlog failed: %s", ++ curr_log_filename_); ++ status_ = MULE_ERROR; ++ DBUG_VOID_RETURN; ++ } ++ } ++ ++ switch (status_) { ++ case MULE_BEHIND: ++ dump_position_ = mi->master_log_pos; ++ mi->master_log_pos = file_size_; ++ sql_print_information("ReplicationMule: MULE_BEHIND - new(%s), old(%s)", ++ llstr(mi->master_log_pos, llbuf1), ++ llstr(dump_position_, llbuf2)); ++ break; ++ case RELAY_MATCH_MULE: ++ case RELAY_MATCH_MULE_RUN: ++ dump_position_ = mi->master_log_pos; ++ sql_print_information("ReplicationMule: RELAY_MATCH_MULE."); ++ break; ++ case MULE_VERIFY: ++ case MULE_VERIFY_RELAY_BEHIND: ++ dump_position_ = mi->master_log_pos; ++ mi->master_log_pos = BIN_LOG_HEADER_SIZE; ++ sql_print_information( ++ "ReplicationMule: MULE_VERIFY - old(%s), file_size(%s)", ++ llstr(dump_position_, llbuf1), llstr(file_size_, llbuf2)); ++ ++ /* seek to the beginning of the file for verification */ ++ seekToPosition(BIN_LOG_HEADER_SIZE); ++ break; ++ } ++ ++ DBUG_VOID_RETURN; ++} ++ ++ReplMule::~ReplMule() { ++ DBUG_ENTER("ReplMule::~ReplMule"); ++ ++ if (mule_log_->is_log_open()) ++ mule_log_->close(LOG_CLOSE_INDEX); ++ mule_log_->clear_mule_mode(); ++ ++ /* If we are still in MULE_BEHIND or MULE_VERIFY state and we exit from ++ * I/O thread, it means we encountered some errors. ++ * mi->master_log_pos might be used by later slave start. It is being ++ * changed here to do event dumping or event verification. So, we should ++ * restore it to its original value. ++ */ ++ switch (status_) { ++ case MULE_BEHIND: ++ case MULE_VERIFY: ++ if (mi_->master_log_pos < dump_position_) ++ mi_->master_log_pos = dump_position_; ++ break; ++ } ++ ++ delete desc_event_; ++ ++ DBUG_VOID_RETURN; ++} ++ ++ReplMule::WriteStatus ReplMule::writeEvent(const char* buf, ulong event_len) { ++ WriteStatus dump_status = WRITE_RELAY; ++ char llbuf1[22], llbuf2[22], llbuf3[22]; ++ char *verify_event; ++ bool verified = false; ++ bool skip_event = false; ++ ++ DBUG_ENTER("ReplMule::dumpEvent"); ++ switch (status_) { ++ case MULE_VERIFY: ++ case MULE_VERIFY_RELAY_BEHIND: ++ if (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT && ++ IsFakeRotation(buf, event_len)) { ++ /* Do not verify the faked rotate event */ ++ if (status_ == MULE_VERIFY) ++ dump_status = SKIP_RELAY; ++ break; ++ } ++ verify_event = new char[event_len]; ++ if (verify_event == NULL) { ++ sql_print_error( ++ "ReplMule::dumpEvent - insufficient memory in verification, " ++ "position(%s), event_len(%d).", ++ llstr(mi_->master_log_pos, llbuf1), event_len); ++ dump_status = WRITE_ERROR; ++ break; ++ } ++ if (my_b_read(mule_log_->get_log_file(), (byte*) verify_event, ++ event_len) != 0) { ++ sql_print_error( ++ "ReplMule::dumpEvent - read log error in verification, " ++ "position(%s), event_len(%d).", ++ llstr(mi_->master_log_pos, llbuf1), event_len); ++ dump_status = WRITE_ERROR; ++ delete verify_event; ++ break; ++ } ++ verified = (memcmp(buf, verify_event, event_len) == 0); ++ delete verify_event; ++ if (!verified) { ++ sql_print_error( ++ "ReplMule::dumpEvent - event does not match at position(%s)", ++ llstr(mi_->master_log_pos, llbuf1)); ++ dump_status = WRITE_ERROR; ++ break; ++ } ++ /* fall through */ ++ case MULE_BEHIND: ++ dump_status = SKIP_RELAY; ++ if (status_ == MULE_BEHIND && ++ queueEvent(buf, event_len, &skip_event) != 0) { ++ dump_status = WRITE_ERROR; ++ break; ++ } ++ ++ /* Skip faked rotation event */ ++ if (!skip_event) ++ mi_->master_log_pos += event_len; ++ ++ if (mi_->master_log_pos == dump_position_) { ++ if (dump_position_ < file_size_) { ++ status_ = MULE_VERIFY_RELAY_BEHIND; ++ } else { ++ status_ = RELAY_MATCH_MULE; ++ } ++ sql_print_information( ++ "ReplMule::dumpEvent - new status(%d) " ++ "master_log_pos(%s), dump_pos(%s), file_size(%s)", status_, ++ llstr(mi_->master_log_pos, llbuf1), llstr(dump_position_, llbuf2), ++ llstr(file_size_, llbuf3)); ++ } else if (mi_->master_log_pos == file_size_) { ++ if (dump_position_ > file_size_) { ++ status_ = MULE_BEHIND; ++ } else { ++ status_ = RELAY_MATCH_MULE; ++ } ++ sql_print_information( ++ "ReplMule::dumpEvent - new status(%d) " ++ "master_log_pos(%s), dump_pos(%s), file_size(%s)", status_, ++ llstr(mi_->master_log_pos, llbuf1), llstr(dump_position_, llbuf2), ++ llstr(file_size_, llbuf3)); ++ } else if (status_ != MULE_VERIFY_RELAY_BEHIND && ++ mi_->master_log_pos > dump_position_) { ++ sql_print_error( ++ "ReplMule::dumpEvent - mule position(%s) does not match " ++ "relay-log position(%s).", ++ llstr(mi_->master_log_pos, llbuf1), llstr(dump_position_, llbuf2)); ++ dump_status = WRITE_ERROR; ++ } ++ break; ++ case RELAY_MATCH_MULE_RUN: ++ if (buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) { ++ sql_print_information(" RELAY_MATCH_MULE event %d", buf[EVENT_TYPE_OFFSET] ); ++ /* Do not write format description record if size is the same */ ++ break; ++ } ++ case RELAY_MATCH_MULE: ++ if (queueEvent(buf, event_len, &skip_event) != 0) ++ dump_status = WRITE_ERROR; ++ break; ++ } ++ ++ DBUG_RETURN(dump_status); ++} ++ ++int ReplMule::appendEvent(const char* buf, ulong event_len) { ++ char llbuf1[22]; ++ int error; ++ ++ DBUG_ENTER("ReplMule::appendEvent"); ++ ++ error = mule_log_->appendv(buf,event_len,0); ++ if (error != 0) { ++ sql_print_error("ReplMule::appendEvent - append error at %s(%s)", ++ mi_->master_log_name, ++ llstr(mi_->master_log_pos, llbuf1)); ++ } else if (mule_log_->flush_log_file() != 0) { ++ sql_print_error("ReplMule::appendEvent - flush error at %s(%s)", ++ mi_->master_log_name, ++ llstr(mi_->master_log_pos, llbuf1)); ++ error = -1; ++ } else if (mule_log_sync_period_ > 0) { ++ mule_log_event_counter_++; ++ if (mule_log_event_counter_ >= mule_log_sync_period_) { ++ mule_log_event_counter_ = 0; ++ error = my_sync(mule_log_->get_log_file()->file, MYF(MY_WME)); ++ if (error != 0) ++ sql_print_error("ReplMule::appendEvent - sync error at %s(%s)", ++ mi_->master_log_name, ++ llstr(mi_->master_log_pos, llbuf1)); ++ } ++ } ++ ++ DBUG_RETURN(error); ++} ++ ++int ReplMule::queueEvent(const char* buf, ulong event_len, bool *skip_event) { ++ int error = 0; ++ ++ DBUG_ENTER("ReplMule::queueEvent"); ++ ++ *skip_event = false; ++ ++ mule_log_->lock_log(); ++ if (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT) { ++ Rotate_log_event rev(buf, event_len, desc_event_); ++ ++ /* If this is a faked rotate event and the specified filename is ++ * the same as the current binlog filename, ignore the event. ++ */ ++ if (IsFakeRotation(rev)) { ++ *skip_event = true; ++ DBUG_PRINT("info",("skipped faked rotation event")); ++ } else { ++ /* Only append real events. */ ++ if (rev.when != 0) ++ error = appendEvent(buf, event_len); ++ ++ /* Only rotate file when append succeeds. */ ++ if (error == 0) { ++ /* Create a new file: lock both index and log. */ ++ if (strlen(curr_log_filename_) == 0) { ++ /* If curr_log_filename_ is not specified, then this is the first ++ * valid rotation event to indicate the filename. ++ */ ++ error = mule_log_->open(rev.new_log_ident, LOG_BIN, NULL, ++ SEQ_READ_APPEND, true, MAX_LOG_SIZE, 0); ++ } else { ++ mule_log_->new_file(0, rev.new_log_ident); ++ } ++ ++ strmake(curr_log_filename_, rev.new_log_ident, ++ strlen(rev.new_log_ident)); ++ ++ DBUG_PRINT("info",("rotate file: %s", rev.new_log_ident)); ++ } ++ } ++ } else { ++ error = appendEvent(buf, event_len); ++ } ++ mule_log_->unlock_log(); ++ ++ DBUG_RETURN(error); ++} ++ ++void ReplMule::seekToPosition(my_off_t pos) { ++ DBUG_ENTER("ReplMule::seekToPosition"); ++ DBUG_PRINT("enter",("seek_pos: %ld", (ulong) pos)); ++ ++ my_b_seek(mule_log_->get_log_file(), pos); ++ DBUG_VOID_RETURN; ++} ++ ++bool ReplMule::IsFakeRotation(const char* buf, ulong event_len) { ++ DBUG_ENTER("ReplMule::IsFakeRotation"); ++ ++ Rotate_log_event rev(buf, event_len, desc_event_); ++ DBUG_RETURN(IsFakeRotation(rev)); ++} ++ ++bool ReplMule::IsFakeRotation(const Rotate_log_event& rev) { ++ DBUG_ENTER("ReplMule::IsFakeRotation"); ++ DBUG_RETURN(rev.when == 0 && ++ rev.ident_len == strlen(curr_log_filename_) && ++ strcmp(rev.new_log_ident, curr_log_filename_) == 0); ++} ++ ++/* createReplicationMule: ++ * Create a mule that relays master's replication binlog and ++ * generate an exact same copy on the local filesystem. ++ * ++ * Code flow: ++ * last_mulelog = scan the existing mule log index to find it ++ * if (mulelog index is not created or there is no mule log inside it) ++ * old_mule_log <- requested dumping position ++ * requested dumping position <- 0 in the file ++ * else ++ * check whether the mule log matches the requested dump ++ * (whether the last mule log name/size matches) ++ * if the mule log name does not match ++ * exit with an error ++ * if (the mule log size does not match the requested dump position) ++ * request the dump from position 0 and read all events ++ * verify all events with the corresponding events in mule log ++ * if (the verification succeeds) ++ * continue the dump ++ * else ++ * exit with an error ++ */ ++ReplMule* ReplMule::createReplicationMule( ++ THD* thd, MASTER_INFO *mi, const char *binlog_indexname, ++ MYSQL_LOG *binlog) { ++ ReplMule *mule = NULL; ++ LOG_INFO linfo; ++ bool index_opened = false; ++ ++ DBUG_ENTER("ReplMule::createReplicationMule"); ++ ++ /* binlog_indexname must be set to some real value. */ ++ DBUG_ASSERT(binlog_indexname); ++ ++ /* Lock binlog index for all binlog operations */ ++ binlog->lock_index(); ++ index_opened = binlog->open_index_file(binlog_indexname, NULL); ++ DBUG_PRINT("info",("open index file succeed: %d", index_opened)); ++ sql_print_information("createReplicationMule"); ++ ++ /* Scan the existing binlog index to find the last relayed binlog */ ++ if (index_opened || ++ binlog->find_log_pos(&linfo, NullS, false) != 0) { ++ /* binlog index is not created or has no log file inside: ++ * . old_relay_binlog <- requested dumping position ++ * . requested dumping position <- 0 in the file ++ */ ++ if (mi->master_log_pos == BIN_LOG_HEADER_SIZE) { ++ mule = new ReplMule(thd, mi, RELAY_MATCH_MULE, BIN_LOG_HEADER_SIZE, ++ binlog_indexname, binlog, sync_mirror_binlog_period); ++ } else { ++ mule = new ReplMule(thd, mi, MULE_BEHIND, BIN_LOG_HEADER_SIZE, ++ binlog_indexname, binlog, sync_mirror_binlog_period); ++ } ++ ++ if (mule == NULL) { ++ sql_print_error("Mule malloc operation failed."); ++ } ++ } else { ++ IO_CACHE* log_file; ++ MY_STAT stat; ++ char last_binlog_name[FN_REFLEN]; ++ ++ /* Find the last log file from the binlog index. ++ * Check whether the last binlog matches the requested dump for both ++ * binlog name and binlog size. ++ */ ++ for (;;) { ++ strmake(last_binlog_name, linfo.log_file_name, FN_REFLEN); ++ last_binlog_name[FN_REFLEN - 1] = '\0'; ++ if (binlog->find_next_log(&linfo, false)) ++ break; ++ } ++ DBUG_PRINT("info",("the last binlog: %s", last_binlog_name)); ++ ++ /* if the binlog name does not match, exit with an error. */ ++ if (strcmp(last_binlog_name+dirname_length(last_binlog_name), ++ mi->master_log_name) != 0) { ++ sql_print_error("Mule binlog(%s) does not match new relay-binlog(%s)", ++ last_binlog_name, mi->master_log_name); ++ } /* Open the last binlog. */ ++ else if (binlog->open(last_binlog_name, LOG_BIN, NULL, ++ SEQ_READ_APPEND, true, MAX_LOG_SIZE, 0) != 0) { ++ sql_print_error("Mule open last binlog failed: %s", last_binlog_name); ++ } else { ++ bool valid_file_size = true; ++ ++ /* Get the binlog size. */ ++ log_file = binlog->get_log_file(); ++ if (my_fstat(log_file->file, &stat, MYF(0)) == 0) { ++ /* If the binlog size does not match the requested dump position, then ++ * request the dump from position 0 and verify all events, we need to ++ * verify events because the mule log might be used for serving during ++ * anytime. We must be sure that they are correct. ++ */ ++ sql_print_information("Binglog size %d", stat.st_size); ++ if (stat.st_size == mi->master_log_pos) { ++ mule = new ReplMule(thd, mi, RELAY_MATCH_MULE_RUN, stat.st_size, ++ binlog_indexname, binlog, ++ sync_mirror_binlog_period); ++ } else if (stat.st_size > BIN_LOG_HEADER_SIZE) { ++ mule = new ReplMule(thd, mi, MULE_VERIFY, stat.st_size, ++ binlog_indexname, binlog, ++ sync_mirror_binlog_period); ++ } else if (stat.st_size == BIN_LOG_HEADER_SIZE) { ++ mule = new ReplMule(thd, mi, MULE_BEHIND, BIN_LOG_HEADER_SIZE, ++ binlog_indexname, binlog, ++ sync_mirror_binlog_period); ++ } else { ++ char llbuf[22]; ++ valid_file_size = false; ++ sql_print_error("Mule binlog file(%s) invalid size: %s", ++ last_binlog_name, llstr(stat.st_size, llbuf)); ++ } ++ } else { ++ valid_file_size = false; ++ sql_print_error("Mule binlog file(%s): fstat failed.", ++ last_binlog_name); ++ } ++ ++ if (valid_file_size) { ++ if (mule == NULL) { ++ sql_print_error("Mule malloc operation failed."); ++ } else if (mule->status_ == MULE_ERROR) { ++ /* If mule creation fails, indicate the error. */ ++ delete mule; ++ mule = NULL; ++ } ++ } ++ } ++ } ++ ++ /* Clear the mule binlog mode if there are errors. */ ++ if (mule == NULL) { ++ binlog->clear_mule_mode(); ++ binlog->close_index_file(); ++ } ++ ++ /* Unlock binlog index */ ++ binlog->unlock_index(); ++ ++ DBUG_RETURN(mule); ++} +diff -r 66cc9e0a6768 sql/repl_mule.h +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/sql/repl_mule.h Thu Dec 04 21:46:15 2008 -0800 +@@ -0,0 +1,166 @@ ++/* ++ Copyright (C) 2007 Google Inc. ++ ++This program is free software; you can redistribute it and/or ++modify it under the terms of the GNU General Public License ++as published by the Free Software Foundation; either version 2 ++of the License, or (at your option) any later version. ++ ++This program is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with this program; if not, write to the Free Software ++Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ++*/ ++ ++#ifndef SQL_REPL_MULE_H__ ++#define SQL_REPL_MULE_H__ ++ ++/* Replication Mule is the class that is responsible for generating ++ * an exact copy of the binlog from a master database. We call this feature ++ * mirror binlog and it can be enabled by setting rpl_mirror_binlog. We ++ * need to keep the same copy for the following purposes: ++ * . The replica can serve the binlog transparently as if they are the ++ * master database. This can relieve master connection overhead. ++ * . During failover, the replica can become the new master and serve ++ * old binlogs transparently. ++ * (The Mule name comes from the popular P2P software eMule.) ++ * ++ * Internally, we call the mirrored binlog mule log. ++ */ ++ ++class THD; ++class Rotate_log_event; ++class Format_description_log_event; ++typedef struct st_master_info MASTER_INFO; ++ ++class ReplMule { ++ public: ++ /* Because I/O thread also creates relay-binlog, instead of an exact ++ * copy of the original master's binlog, we have two resources that ++ * might get out of sync. ++ * This enum indicates the status: ++ * MULE_BEHIND - the mule's header is behind: ++ * (mule is activated for the first time) ++ * RELAY_MATCH_MULE - mule matches relay-log ++ * RELAY_MATCH_MULE_RUN - mule matches relay-log and it was not empty binlog ++ * MULE_VERIFY - mule has more events than the relay-log and needs ++ * verification; we can not verify based on relay-log ++ * events because events might get changed a little; ++ * verification starts with downloading all events in ++ * the last binlog from the master and compare with ++ * all events in the mule log; ++ * MULE_VERIFY_RELAY_BEHIND - mule has more events than the relay-log ++ * and relay-log needs to write events ++ * MULE_ERROR - mule detects errors in event duplicate ++ * ++ * When the mule mirrors binlogs, it writes an event into the mule log ++ * first. Then, I/O thread writes the event into the relay log. ++ */ ++ enum RelayStatus { ++ MULE_BEHIND = 1, ++ RELAY_MATCH_MULE = 2, ++ RELAY_MATCH_MULE_RUN = 7, ++ MULE_VERIFY = 3, ++ MULE_VERIFY_RELAY_BEHIND = 4, ++ MULE_ERROR = 5, ++ }; ++ ++ enum WriteStatus { ++ WRITE_RELAY = 1, ++ WRITE_ERROR = 2, ++ SKIP_RELAY = 3, ++ }; ++ ++ private: ++ const Format_description_log_event *desc_event_; ++ THD *io_thd_; ++ MASTER_INFO *mi_; ++ ++ /* ++ * I/O thread will write both mule log for mirror binlog and relay log ++ * for SQL thread. ++ * The variable indicates whether the two are in sync. ++ */ ++ RelayStatus status_; ++ ++ /* The starting event writing position. */ ++ my_off_t dump_position_; ++ ++ /* During the initial setup, the last mule log's file size. */ ++ my_off_t file_size_; ++ ++ /* Internally, we call the mirrored binlog mule log. */ ++ MYSQL_LOG *mule_log_; ++ ++ /* Sync the mule log to disk for every #N events. */ ++ ulong mule_log_sync_period_; ++ ulong mule_log_event_counter_; ++ ++ /* mule log's index filename */ ++ char mule_indexname_[FN_REFLEN]; ++ ++ /* the current mule log's filename */ ++ char curr_log_filename_[FN_REFLEN]; ++ ++ ReplMule(THD* thd, MASTER_INFO *mi, RelayStatus status, ++ my_off_t file_size, const char *binlog_indexname, ++ MYSQL_LOG *binlog, ulong sync_period); ++ ++ /* ++ * Queue the event into the current mule log. If it is a rotation ++ * event, generate a new mule log file. ++ * Indicate whether the event is skipped because it is an fake event. ++ * A fake event is generated by the master to indicate the current ++ * reading position. ++ */ ++ int queueEvent(const char* buf, ulong event_len, bool *skip_event); ++ ++ /* Append the event to the current mule log. */ ++ int appendEvent(const char* buf, ulong event_len); ++ ++ bool IsFakeRotation(const char* buf, ulong event_len); ++ bool IsFakeRotation(const Rotate_log_event& rev); ++ ++ /* Seek to the specified position in the current open mule log. */ ++ void seekToPosition(my_off_t pos); ++ ++ public: ++ ++ ~ReplMule(); ++ ++ /* Dump the event into mule binlog. ++ * Input: ++ * buf (IN) - replication event buffer ++ * event_len (IN) - the event length ++ * ++ * Return: ++ * . WRITE_RELAY: the relay log needs to writing the event ++ * . WRITE_ERROR: the writing encountered errors ++ * . SKIP_RELAY: the relay log should skip the event ++ */ ++ WriteStatus writeEvent(const char* buf, ulong event_len); ++ ++ /* createReplicationMule: ++ * Create a mule that relays master's replication binlog and ++ * generate an exact same copy on the local filesystem. ++ * ++ * Input: ++ * thd (IN) - replication I/O thread ++ * mi (IN) - master info struct for I/O thread's progress ++ * binlog_indexname (IN) - filename for binlog's index ++ * binlog (IN) - replication binlog ++ * ++ * Return: ++ * . a replication mule if success ++ * . NULL if there are any errors ++ */ ++ static ReplMule *createReplicationMule(THD* thd, MASTER_INFO *mi, ++ const char *binlog_indexname, ++ MYSQL_LOG *binlog); ++}; ++ ++#endif /* SQL_REPL_MULE_H__ */ +diff -r 66cc9e0a6768 sql/set_var.cc +--- a/sql/set_var.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/set_var.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -345,6 +345,8 @@ + slog_verb); + sys_var_long_ptr sys_rpl_recovery_rank("rpl_recovery_rank", + &rpl_recovery_rank); ++sys_var_bool_ptr sys_rpl_mirror_binlog_enabled("rpl_mirror_binlog_enabled", ++ &rpl_mirror_binlog_enabled); + sys_var_long_ptr sys_query_cache_size("query_cache_size", + &query_cache_size, + fix_query_cache_size); +@@ -364,6 +366,9 @@ + sys_var_thd_ulong sys_trans_prealloc_size("transaction_prealloc_size", + &SV::trans_prealloc_size, + 0, fix_trans_mem_root); ++sys_var_long_ptr sys_sync_mirror_binlog_period( ++ "sync_mirror_binlog_period", ++ &sync_mirror_binlog_period); + + #ifdef HAVE_QUERY_CACHE + sys_var_long_ptr sys_query_cache_limit("query_cache_limit", +@@ -774,6 +779,7 @@ + &sys_relay_log_purge, + #endif + &sys_rpl_recovery_rank, ++ &sys_rpl_mirror_binlog_enabled, + &sys_safe_updates, + &sys_secure_auth, + &sys_secure_file_priv, +@@ -1113,6 +1119,8 @@ + {"relay_log_space_limit", (char*) &relay_log_space_limit, SHOW_LONGLONG}, + #endif + {sys_rpl_recovery_rank.name,(char*) &sys_rpl_recovery_rank, SHOW_SYS}, ++ {sys_rpl_mirror_binlog_enabled.name, ++ (char *) &sys_rpl_mirror_binlog_enabled, SHOW_SYS}, + {"secure_auth", (char*) &sys_secure_auth, SHOW_SYS}, + {"secure_file_priv", (char*) &sys_secure_file_priv, SHOW_SYS}, + #ifdef HAVE_SMEM +diff -r 66cc9e0a6768 sql/slave.cc +--- a/sql/slave.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/slave.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -25,6 +25,7 @@ + #include <thr_alarm.h> + #include <my_dir.h> + #include <sql_common.h> ++#include "repl_mule.h" + #include <errmsg.h> + #include <mysys_err.h> + +@@ -3527,6 +3528,7 @@ + RELAY_LOG_INFO *rli= &mi->rli; + char llbuff[22]; + uint retry_count; ++ ReplMule *mule = NULL; + + // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff + my_thread_init(); +@@ -3609,6 +3611,23 @@ + if (get_master_version_and_clock(mysql, mi)) + goto err; + ++ if (rpl_mirror_binlog_enabled && !mule) { ++ if (opt_binlog_index_name == NULL) { ++ sql_print_error("\"log-bin-index\" must be set in mirror binlog."); ++ goto err; ++ } ++ ++ /* Create the mule to generate the exact copy of the binlog */ ++ mule = ReplMule::createReplicationMule( ++ thd, mi, opt_binlog_index_name, &mysql_bin_log); ++ ++ /* If we could not create the mule, we stop the I/O thread and report ++ * an error. ++ */ ++ if (mule == NULL) ++ goto err; ++ } ++ + if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1) + { + /* +@@ -3624,6 +3643,7 @@ + DBUG_PRINT("info",("Starting reading binary log from master")); + while (!io_slave_killed(thd,mi)) + { ++ const char* event_buf; + bool suppress_warnings= 0; + thd_proc_info(thd, "Requesting binlog dump"); + if (request_dump(mysql, mi, &suppress_warnings)) +@@ -3754,10 +3774,25 @@ + goto connected; + } // if (event_len == packet_error) + ++ event_buf = (const char*)mysql->net.read_pos + 1; ++ ++ if (mule) { ++ ReplMule::WriteStatus d_status = ++ mule->writeEvent(event_buf, event_len); ++ switch (d_status) { ++ case ReplMule::WRITE_RELAY: ++ break; ++ case ReplMule::SKIP_RELAY: ++ /* Skip writing relay event; go back to read the next event */ ++ continue; ++ case ReplMule::WRITE_ERROR: ++ goto err; ++ } ++ } ++ + retry_count=0; // ok event, reset retry counter + thd_proc_info(thd, "Queueing master event to the relay log"); +- if (queue_event(mi,(const char*)mysql->net.read_pos + 1, +- event_len)) ++ if (queue_event(mi, event_buf, event_len)) + { + sql_print_error("Slave I/O thread could not queue event from master"); + goto err; +@@ -3847,6 +3882,7 @@ + change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE); + DBUG_ASSERT(thd->net.buff != 0); + net_end(&thd->net); // destructor will not free it, because net.vio is 0 ++ delete mule; + close_thread_tables(thd, 0); + pthread_mutex_lock(&LOCK_thread_count); + THD_CHECK_SENTRY(thd); +diff -r 66cc9e0a6768 sql/sql_class.h +--- a/sql/sql_class.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_class.h Thu Dec 04 21:46:15 2008 -0800 +@@ -152,6 +152,12 @@ + #define LOG_INFO_FATAL -7 + #define LOG_INFO_IN_USE -8 + ++/* If the maximum size is equal to this value, binlog would not rotate on ++ * size limit. ++ */ ++#define BINLOG_NOSWITCH_SIZE ((ulong) -1) ++ ++ + /* bitmap to SQL_LOG::close() */ + #define LOG_CLOSE_INDEX 1 + #define LOG_CLOSE_TO_BE_OPENED 2 +@@ -245,6 +251,9 @@ + bool no_auto_events; + friend class Log_event; + ++ /* mule replication mode */ ++ bool mule_binlog_; ++ + public: + /* + These describe the log's format. This is used only for relay logs. +@@ -317,7 +326,8 @@ + } + bool open_index_file(const char *index_file_name_arg, + const char *log_name); +- void new_file(bool need_lock); ++ int close_index_file(); ++ void new_file(bool need_lock= 1, const char* log_filename= NULL); + bool write(THD *thd, enum enum_server_command command, + const char *format, ...) ATTRIBUTE_FORMAT(printf, 4, 5); + bool write(THD *thd, const char *query, uint query_length, +@@ -357,7 +367,27 @@ + int get_current_log(LOG_INFO* linfo); + int raw_get_current_log(LOG_INFO* linfo); + uint next_file_id(); +- inline bool is_open() { return log_type != LOG_CLOSED; } ++ ++ /* Because mysql use is_open() to check whether replication is on, ++ * we will let the check fail during binlog mule mode. Mule replication ++ * and normal master replication can not be on at the same time. ++ * ++ * is_log_open(): the binlog file is open for either purpose ++ * ++ * is_open(): the binlog is open for master replication. ++ * is_mule_open(): the binlog is open for mirror binlog or for ++ * replication mule; refer repl_mule.h for details ++ */ ++ bool is_log_open() { ++ return log_type != LOG_CLOSED; ++ } ++ bool is_open() { ++ return (!mule_binlog_) && is_log_open(); ++ } ++ bool is_mule_open() { ++ return (mule_binlog_) && is_log_open(); ++ } ++ + inline char* get_index_fname() { return index_file_name;} + inline char* get_log_fname() { return log_file_name; } + inline char* get_name() { return name; } +@@ -366,8 +396,18 @@ + + inline void lock_index() { pthread_mutex_lock(&LOCK_index);} + inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);} ++ inline void lock_log() { pthread_mutex_lock(&LOCK_log);} ++ inline void unlock_log() { pthread_mutex_unlock(&LOCK_log);} + inline IO_CACHE *get_index_file() { return &index_file;} + inline uint32 get_open_count() { return open_count; } ++ /* Look in file repl_mule.h for the definition of mule. */ ++ void set_mule_mode() { ++ mule_binlog_ = 1; ++ } ++ void clear_mule_mode() { ++ mule_binlog_ = 0; ++ } ++ int flush_log_file(); + }; + + /* +diff -r 66cc9e0a6768 sql/sql_lex.h +--- a/sql/sql_lex.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_lex.h Thu Dec 04 21:46:15 2008 -0800 +@@ -104,6 +104,7 @@ + // TODO(mcallaghan): update status_vars in mysqld to export these + SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS, + SQLCOM_SHOW_CLIENT_STATS, ++ SQLCOM_MAKE_MASTER, + /* This should be the last !!! */ + SQLCOM_END + }; +@@ -171,6 +172,12 @@ + char *ssl_key, *ssl_cert, *ssl_ca, *ssl_capath, *ssl_cipher; + char *relay_log_name; + ulong relay_log_pos; ++ ++ /* the following fields are used for make master command */ ++ char *log_index_name; ++ bool in_failover; ++ bool kill_session; ++ bool with_old_binlog; + } LEX_MASTER_INFO; + + +diff -r 66cc9e0a6768 sql/sql_parse.cc +--- a/sql/sql_parse.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_parse.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -402,6 +402,15 @@ + passwd_len ? "yes": "no", + thd->main_security_ctx.master_access, + (thd->db ? thd->db : "*none*"))); ++ ++ /* If we are in failover mode, reject all non-super user connections. */ ++ if (is_in_failover() && ++ !(thd->main_security_ctx.master_access & SUPER_ACL)) { ++ net_send_error(thd, ER_SPECIFIC_ACCESS_DENIED_ERROR, ++ "super-user only during failover"); ++ DBUG_RETURN(-1); ++ } ++ + + if (check_count) + { +@@ -3470,6 +3479,22 @@ + else + res = load_master_data(thd); + break; ++ ++ case SQLCOM_MAKE_MASTER: ++ { ++ thd_proc_info(thd, "Making master"); ++ ++ if (check_global_access(thd, SUPER_ACL)) ++ goto error; ++ res = make_master(thd, NULL, NULL, &lex->mi); ++ if (res == 0) { ++ // TODO -- wei is this OK, setting it to NULL? ++ thd_proc_info(thd, 0); ++ send_ok(thd); ++ } ++ break; ++ } ++ + #endif /* HAVE_REPLICATION */ + #ifdef HAVE_NDBCLUSTER_DB + case SQLCOM_SHOW_NDBCLUSTER_STATUS: +diff -r 66cc9e0a6768 sql/sql_repl.cc +--- a/sql/sql_repl.cc Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_repl.cc Thu Dec 04 21:46:15 2008 -0800 +@@ -20,11 +20,19 @@ + #include "log_event.h" + #include <my_dir.h> + ++extern pthread_mutex_t LOCK_failover_master; ++extern bool failover_deny_access; ++ + int max_binlog_dump_events = 0; // unlimited + my_bool opt_sporadic_binlog_dump_fail = 0; + #ifndef DBUG_OFF + static int binlog_dump_count = 0; + #endif ++ ++static int make_master_open_log(MYSQL_LOG *log, const char *opt_name, ++ bool no_auto_events, ulong max_size); ++static int set_in_failover(bool kill_session); ++static void clear_in_failover(void); + + /* + fake_rotate_event() builds a fake (=which does not exist physically in any +@@ -255,7 +263,7 @@ + bool purge_master_logs(THD* thd, const char* to_log) + { + char search_file_name[FN_REFLEN]; +- if (!mysql_bin_log.is_open()) ++ if (!mysql_bin_log.is_log_open()) + { + send_ok(thd); + return FALSE; +@@ -308,6 +316,44 @@ + return error; + } + ++/* Show processlist command dump the binlog state. ++ * ++ * Input: ++ * output_info - (OUT) the output proc_info ++ * output_len - (IN) output proc_info's length ++ * thd - (IN) the thread ++ * input_msg - (IN) the input proc_info ++ * log_file_name - (IN) binlog file name ++ * log_pos - (IN) binlog position ++ */ ++static void processlist_show_binlog_state(char *output_info, ++ int output_len, ++ THD *thd, ++ const char *input_msg, ++ const char *log_file_name, ++ my_off_t log_pos) { ++ DBUG_ENTER("processlist_show_binlog_state"); ++ ++ /* Point to input_msg in case "show processlist" access it before the copy ++ * is finished. ++ */ ++ thd_proc_info(thd, input_msg); ++ ++ if (snprintf(output_info, output_len, "%s :%s:%lld:", input_msg, ++ log_file_name + dirname_length(log_file_name), ++ log_pos) > 0) { ++ thd_proc_info(thd, output_info); ++ } ++ ++ DBUG_VOID_RETURN; ++} ++ ++static void repl_cleanup(ushort flags) { ++ if (flags & BINLOG_MIRROR_CLIENT) { ++ /* One less mirror binlog client. */ ++ thread_safe_sub(rpl_mirror_binlog_clients, 1, &LOCK_stats); ++ } ++} + + /* + TODO: Clean up loop to only have one call to send_file() +@@ -319,6 +365,11 @@ + LOG_INFO linfo; + char *log_file_name = linfo.log_file_name; + char search_file_name[FN_REFLEN], *name; ++ ++ /* This buffer should be enough for "comments + :file_name:file_pos:". */ ++ char binlog_state_msg[FN_REFLEN + 100]; ++ int binlog_state_msg_len = FN_REFLEN + 100; ++ + IO_CACHE log; + File file = -1; + String* packet = &thd->packet; +@@ -335,6 +386,15 @@ + + bzero((char*) &log,sizeof(log)); + ++ sql_print_information("Start %s binlog_dump to slave_server(%d), pos(%s, %lu)", ++ "asynchronous", ++ thd->server_id, log_ident, (ulong)pos); ++ ++ if (flags & BINLOG_MIRROR_CLIENT) { ++ /* One more mirror binlog clients. */ ++ thread_safe_increment(rpl_mirror_binlog_clients, &LOCK_stats); ++ } ++ + #ifndef DBUG_OFF + if (opt_sporadic_binlog_dump_fail && (binlog_dump_count++ % 2)) + { +@@ -344,7 +404,7 @@ + } + #endif + +- if (!mysql_bin_log.is_open()) ++ if (!mysql_bin_log.is_log_open()) + { + errmsg = "Binary log is not open"; + my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; +@@ -529,6 +589,12 @@ + } + #endif + ++ /* Update the binlog sending state. */ ++ processlist_show_binlog_state( ++ binlog_state_msg, binlog_state_msg_len, thd, ++ "Send binlog events to slave", ++ log_file_name, pos); ++ + if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT) + { + binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] & +@@ -634,6 +700,13 @@ + } + if (!thd->killed) + { ++ /* Update the binlog sending state. */ ++ processlist_show_binlog_state( ++ binlog_state_msg, binlog_state_msg_len, thd, ++ "Has sent all binlog to slave; " ++ "waiting for binlog to be updated", ++ log_file_name, pos); ++ + /* Note that the following call unlocks lock_log */ + mysql_bin_log.wait_for_update(thd, 0); + } +@@ -650,7 +723,12 @@ + + if (read_packet) + { +- thd_proc_info(thd, "Sending binlog event to slave"); ++ // thd_proc_info(thd, "Sending binlog event to slave"); ++ /* Update the binlog sending state. */ ++ processlist_show_binlog_state(binlog_state_msg, ++ binlog_state_msg_len, thd, ++ "Sending binlog event to slave", ++ log_file_name, pos); + if (my_net_write(net, (char*)packet->ptr(), packet->length()) ) + { + errmsg = "Failed on my_net_write()"; +@@ -685,10 +763,21 @@ + } + else + { ++ char old_log_file_name[FN_REFLEN]; + bool loop_breaker = 0; + /* need this to break out of the for loop from switch */ + +- thd_proc_info(thd, "Finished reading one binlog; switching to next binlog"); ++ // thd_proc_info(thd, "Finished reading one binlog; switching to next binlog"); ++ /* Update the binlog sending state. */ ++ processlist_show_binlog_state( ++ binlog_state_msg, binlog_state_msg_len, thd, ++ "Finished reading one binlog; switching to next binlog", ++ log_file_name, pos); ++ ++ /* Keep the old fileename. */ ++ strmake(old_log_file_name, log_file_name, ++ sizeof(old_log_file_name) - 1); ++ + switch (mysql_bin_log.find_next_log(&linfo, 1)) { + case LOG_INFO_EOF: + loop_breaker = (flags & BINLOG_DUMP_NON_BLOCK); +@@ -706,6 +795,16 @@ + + end_io_cache(&log); + (void) my_close(file, MYF(MY_WME)); ++ ++ /* A sanity check that we can not serve the same binlog twice because ++ * the filenames are stored in a .index file. ++ */ ++ if (strcmp(old_log_file_name, log_file_name) >= 0) { ++ errmsg = "Re-serving an already served binlog file."; ++ my_errno = ER_MASTER_FATAL_ERROR_READING_BINLOG; ++ goto err; ++ } ++ + + /* + Call fake_rotate_event() in case the previous log (the one which +@@ -733,6 +832,8 @@ + end_io_cache(&log); + (void)my_close(file, MYF(MY_WME)); + ++ repl_cleanup(flags); ++ + send_eof(thd); + thd_proc_info(thd, "Waiting to finalize termination"); + pthread_mutex_lock(&LOCK_thread_count); +@@ -743,6 +844,7 @@ + err: + thd_proc_info(thd, "Waiting to finalize termination"); + end_io_cache(&log); ++ repl_cleanup(flags); + /* + Exclude iteration through thread list + this is needed for purge_logs() - it will iterate through +@@ -1316,7 +1418,7 @@ + Format_description_log_event *description_event= new + Format_description_log_event(3); /* MySQL 4.0 by default */ + +- if (mysql_bin_log.is_open()) ++ if (mysql_bin_log.is_log_open()) + { + LEX_MASTER_INFO *lex_mi= &thd->lex->mi; + SELECT_LEX_UNIT *unit= &thd->lex->unit; +@@ -1456,7 +1558,7 @@ + DBUG_RETURN(TRUE); + protocol->prepare_for_resend(); + +- if (mysql_bin_log.is_open()) ++ if (mysql_bin_log.is_log_open()) + { + LOG_INFO li; + mysql_bin_log.get_current_log(&li); +@@ -1497,7 +1599,7 @@ + Protocol *protocol= thd->protocol; + DBUG_ENTER("show_binlogs"); + +- if (!mysql_bin_log.is_open()) ++ if (!mysql_bin_log.is_log_open()) + { + my_message(ER_NO_BINARY_LOGGING, ER(ER_NO_BINARY_LOGGING), MYF(0)); + return 1; +@@ -1606,6 +1708,235 @@ + DBUG_RETURN(0); + } + ++ ++/* make_master: Make the current database a primary and starts the ++ * binlog logging for all updates. ++ * ++ * The function handles the following sql commands: ++ * . MAKE MASTER MASTER_LOG_FILE='replication_log', MASTER_SERVER_ID=1, ++ * [WITH BINLOG]; ++ * . MAKE MASTER MASTER_LOG_FILE='replication_log', MASTER_SERVER_ID=1, ++ * INDEX='replication_log.index' [WITH BINLOG]; ++ * . MAKE MASTER REVOKE SESSION; ++ * . MAKE MASTER REVOKE SESSION WITH KILL; ++ * . MAKE MASTER GRANT SESSION; ++ * ++ * Args: ++ * thd - the current thread ++ * binlog_name - binlog's filename ++ * binlog_indexname - binlog index's filename ++ * mi - master info struct containing binlog name ++ * (set when we enable master during runtime) ++ * ++ * Return: ++ * 0 : success ++ * -1 : failure ++ */ ++int make_master(THD* thd, ++ const char *binlog_name, ++ const char *binlog_indexname, ++ const LEX_MASTER_INFO* mi) { ++ int error = 0; ++ ++ DBUG_ENTER("make_master"); ++ /* In two mode, we enable the binlog: ++ * . !mi - LEX is not provided; this is called from startup time ++ * . mi->log_file_name - binlog is specified in the command ++ */ ++ if (!mi || mi->log_file_name) { ++ /* Get the mutex */ ++ VOID(pthread_mutex_lock(&LOCK_failover_master)); ++ ++ /* If the binlog is already opened, we issue an error. We reuse one ++ * existing error, which might not be fully accurate. ++ */ ++ if (mysql_bin_log.is_log_open()) { ++ my_error(ER_MASTER_INFO, MYF(0)); ++ sql_print_error("Replication master log is already open: cannot " ++ "make another master!"); ++ error = -1; ++ } else { ++ if (!mi) { ++ /* This opening happens at mysql startup time. */ ++ if (make_master_open_log(&mysql_bin_log, binlog_name, ++ 0, max_binlog_size) != 0) { ++ error = -1; ++ } ++ } else { ++ /* This opening happens during mysql runtime, which is mostly ++ * requested to do failover. ++ */ ++ ++ error = -1; ++ if (!is_in_failover()) { ++ sql_print_error( ++ "\"make master\" runs only in failover mode. " ++ "Please run \"make master revoke session (with kill)\""); ++ } else if (strlen(mi->log_file_name) == 0) { ++ sql_print_error("Master log filename is not specified correctly."); ++ } else if (!mi->server_id || mi->server_id == MASTER_INFO_SERVER_ID) { ++ sql_print_error("\"make master\": invalid server_id(%d)", ++ mi->server_id); ++ } else { ++ /* Open the new log files and delete all existing ones to avoid ++ * conflicts. ++ */ ++ uint32 old_server_id = server_id; ++ char *binlog_name = NULL; ++ ++ /* Set the global master server id. ++ * We would not change server id for all connection threads. ++ * All non-super sessions should be blocked by revoke sessions. ++ * Super-user sessions are responsible for their own operations. ++ */ ++ server_id = mi->server_id; ++ thd->server_id = mi->server_id; ++ ++ if (!(binlog_name = my_strdup(mi->log_file_name, MYF(0))) || ++ make_master_open_index(&binlog_name, mi->log_index_name) != 0 || ++ make_master_open_log(&mysql_bin_log, binlog_name, ++ 0, max_binlog_size) != 0) { ++ sql_print_error("Open master logfile failed."); ++ thd->server_id = old_server_id; ++ server_id = old_server_id; ++ } else if (!mi->with_old_binlog && ++ mysql_bin_log.reset_logs(thd) != 0) { ++ sql_print_error("Cleanup existing master logfiles failed."); ++ thd->server_id = old_server_id; ++ server_id = old_server_id; ++ } else { ++ error = 0; ++ } ++ } ++ if (error == -1) ++ my_error(ER_MASTER_INFO, MYF(0)); ++ } ++ } ++ ++ if (error == 0) { ++ /* indicates that binlog is enabled now */ ++ using_update_log = 1; ++ } else if (mysql_bin_log.is_open()) { ++ mysql_bin_log.close(LOG_CLOSE_INDEX); ++ } ++ ++ /* Release the mutex */ ++ VOID(pthread_mutex_unlock(&LOCK_failover_master)); ++ } else { ++ /* The following actions are related to session management during ++ * failover operation. We do not want some sessions come in ++ * during failover and make updates. ++ * This is invoked for command: MAKE MASTER GRANT/REVOKE SESSION; ++ */ ++ if (mi->in_failover) { ++ set_in_failover(mi->kill_session); ++ } else { ++ clear_in_failover(); ++ } ++ } ++ ++ DBUG_RETURN(error); ++} ++ ++static int make_master_open_log(MYSQL_LOG *log, ++ const char *opt_name, ++ bool no_auto_events, ++ ulong max_size) { ++ char tmp[FN_REFLEN]; ++ ++ // get rid of extension ++ char *p = fn_ext(opt_name); ++ uint length=(uint) (p-opt_name); ++ strmake(tmp,opt_name,min(length,FN_REFLEN)); ++ opt_name=tmp; ++ ++ return log->open(opt_name, LOG_BIN, NULL, WRITE_CACHE, 0, ++ max_size, 0); ++} ++ ++int make_master_open_index(char **binlog_name, ++ const char *binlog_indexname) { ++ char buf[FN_REFLEN]; ++ const char *ln; ++ DBUG_ENTER("make_master_open_index"); ++ ++ ln= mysql_bin_log.generate_name(*binlog_name, "-bin", 1, buf); ++ if (!(*binlog_name) && !binlog_indexname) { ++ /* ++ User didn't give us info to name the binlog index file. ++ Picking `hostname`-bin.index like did in 4.x, causes replication to ++ fail if the hostname is changed later. So, we would like to instead ++ require a name. But as we don't want to break many existing setups, we ++ only give warning, not error. ++ */ ++ sql_print_warning("No argument was provided to --log-bin, and " ++ "--log-bin-index was not used; so replication " ++ "may break when this MySQL server acts as a " ++ "master and has his hostname changed!! Please " ++ "use '--log-bin=%s' to avoid this problem.", ln); ++ } ++ if (ln == buf) { ++ my_free(*binlog_name, MYF(MY_ALLOW_ZERO_PTR)); ++ *binlog_name = my_strdup(buf, MYF(0)); ++ } ++ if (mysql_bin_log.open_index_file(binlog_indexname, ln) != 0) { ++ DBUG_RETURN(-1); ++ } ++ ++ /* ++ Used to specify which type of lock we need to use for queries of type ++ INSERT ... SELECT. This will change when we have row level logging. ++ */ ++ using_update_log=1; ++ ++ DBUG_RETURN(0); ++} ++ ++/* Set the status indicating that we are in failover and deny all non-super ++ * user access. ++ * ++ * Args: ++ * kill_session - kill all non-super sessions if specified ++ * ++ * Return: ++ * 0 - success ++ * -1 - failure (caused by not killing all sessions) ++ */ ++static int set_in_failover(bool kill_session) { ++ failover_deny_access = 1; ++ ++ if (kill_session) { ++ /* If kill session option is specified, we need to kill all non-super ++ * user sessions. ++ */ ++ THD *kill_thd; ++ ++ uint error=ER_NO_SUCH_THREAD; ++ pthread_mutex_lock(&LOCK_thread_count); // For unlink from list ++ I_List_iterator<THD> it(threads); ++ while ((kill_thd=it++)) { ++ if (!(kill_thd->main_security_ctx.master_access & SUPER_ACL)) { ++ pthread_mutex_lock(&kill_thd->LOCK_delete); // Lock from delete ++ ++ /* ask the thread to die */ ++ kill_thd->awake(THD::KILL_CONNECTION); ++ pthread_mutex_unlock(&kill_thd->LOCK_delete); ++ } ++ } ++ pthread_mutex_unlock(&LOCK_thread_count); ++ } ++ return 0; ++} ++ ++static void clear_in_failover(void) { ++ failover_deny_access = 0; ++} ++ ++bool is_in_failover(void) { ++ return failover_deny_access; ++} ++ ++ + #endif /* HAVE_REPLICATION */ + + +diff -r 66cc9e0a6768 sql/sql_repl.h +--- a/sql/sql_repl.h Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_repl.h Thu Dec 04 21:46:15 2008 -0800 +@@ -38,6 +38,10 @@ + int start_slave(THD* thd, MASTER_INFO* mi, bool net_report); + int stop_slave(THD* thd, MASTER_INFO* mi, bool net_report); + bool change_master(THD* thd, MASTER_INFO* mi); ++int make_master(THD* thd, const char *binlog_name, ++ const char *binlog_indexname, const LEX_MASTER_INFO* mi); ++int make_master_open_index(char **binlog_name, const char *binlog_indexname); ++bool is_in_failover(void); + bool mysql_show_binlog_events(THD* thd); + int cmp_master_pos(const char* log_file_name1, ulonglong log_pos1, + const char* log_file_name2, ulonglong log_pos2); +diff -r 66cc9e0a6768 sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Thu Dec 04 21:37:12 2008 -0800 ++++ b/sql/sql_yacc.yy Thu Dec 04 21:46:15 2008 -0800 +@@ -735,6 +735,7 @@ + %token LOOP_SYM + %token LOW_PRIORITY + %token LT ++%token MAKE_SYM + %token MAKE_SET_SYM + %token MASTER_CONNECT_RETRY_SYM + %token MASTER_HOST_SYM +@@ -1167,7 +1168,7 @@ + query verb_clause create change select do drop insert replace insert2 + insert_values update delete truncate rename + show describe load alter optimize keycache preload flush +- reset purge begin commit rollback savepoint release ++ make reset purge begin commit rollback savepoint release + slave master_def master_defs master_file_def slave_until_opts + repair restore backup analyze check start checksum + field_list field_list_item field_spec kill column_def key_def +@@ -1301,6 +1302,7 @@ + | kill + | load + | lock ++ | make + | optimize + | keycache + | preload +@@ -1428,6 +1430,56 @@ + master_defs + {} + ; ++ ++/* make master */ ++make: ++ MAKE_SYM MASTER_SYM ++ { ++ LEX *lex = Lex; ++ lex->sql_command = SQLCOM_MAKE_MASTER; ++ bzero((char*) &lex->mi, sizeof(lex->mi)); ++ } ++ make_master_defs ++ { ++ } ++ ; ++ ++make_master_defs: ++ MASTER_LOG_FILE_SYM EQ TEXT_STRING ',' MASTER_SERVER_ID_SYM EQ ulong_num ++ { ++ Lex->mi.log_file_name = $3.str; ++ Lex->mi.server_id = $7; ++ } ++ make_master_with_defs {} ++ | MASTER_LOG_FILE_SYM EQ TEXT_STRING ',' MASTER_SERVER_ID_SYM EQ ulong_num ',' INDEX_SYM EQ TEXT_STRING ++ { ++ Lex->mi.log_file_name = $3.str; ++ Lex->mi.server_id = $7; ++ Lex->mi.log_index_name = $11.str; ++ } ++ make_master_with_defs {} ++ | GRANT SESSION_SYM ++ { ++ Lex->mi.in_failover = 0; ++ } ++ | REVOKE SESSION_SYM ++ { ++ Lex->mi.in_failover = 1; ++ } ++ | REVOKE SESSION_SYM WITH KILL_SYM ++ { ++ Lex->mi.in_failover = 1; ++ Lex->mi.kill_session = 1; ++ } ++ ; ++ ++make_master_with_defs: ++ /* empty */ {} ++ | WITH BINLOG_SYM ++ { ++ /* All old binlogs will be kept after "make master" command. */ ++ Lex->mi.with_old_binlog = 1; ++ } + + master_defs: + master_def +@@ -8396,6 +8448,7 @@ + | HANDLER_SYM {} + | HELP_SYM {} + | LANGUAGE_SYM {} ++ | MAKE_SYM {} + | NO_SYM {} + | OPEN_SYM {} + | PREPARE_SYM {} diff --git a/percona/5.0.77-b13/mysqld_safe_syslog.patch b/percona/5.0.77-b13/mysqld_safe_syslog.patch new file mode 100644 index 0000000..a493a29 --- /dev/null +++ b/percona/5.0.77-b13/mysqld_safe_syslog.patch @@ -0,0 +1,127 @@ +diff -r d91edeb58b50 patch_info/mysqld_safe_syslog.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/mysqld_safe_syslog.info Mon Sep 01 21:58:00 2008 -0700 +@@ -0,0 +1,6 @@ ++File=mysqld_safe_syslog.patch ++Name=Patch allows redirect output of error.log to syslog-ng ++Version=1.0 ++Author=Percona <info@percona.com> ++License=GPL ++Comment=Ported from Debian +diff -r d91edeb58b50 scripts/mysqld_safe.sh +--- a/scripts/mysqld_safe.sh Mon Sep 01 21:57:21 2008 -0700 ++++ b/scripts/mysqld_safe.sh Mon Sep 01 21:58:00 2008 -0700 +@@ -10,12 +10,16 @@ + # mysql.server works by first doing a cd to the base directory and from there + # executing mysqld_safe + +-KILL_MYSQLD=1; + MYSQLD= + + trap '' 1 2 3 15 # we shouldn't let anyone kill us + + umask 007 ++ ++KILL_MYSQLD=1; ++ ++# This command can be used as pipe to syslog. With "-s" it also logs to stderr. ++ERR_LOGGER="logger -p daemon.err -t mysqld_safe -i" + + defaults= + case "$1" in +@@ -177,7 +181,6 @@ + + # these rely on $DATADIR by default, so we'll set them later on + pid_file= +-err_log= + + # Get first arguments from the my.cnf file, groups [mysqld] and [mysqld_safe] + # and then merge with the command line arguments +@@ -245,7 +248,6 @@ + * ) pid_file="$DATADIR/$pid_file" ;; + esac + fi +-test -z "$err_log" && err_log=$DATADIR/`@HOSTNAME@`.err + + if test -n "$mysql_unix_port" + then +@@ -315,8 +317,6 @@ + then + USER_OPTION="--user=$user" + fi +- # If we are root, change the err log to the right user. +- touch $err_log; chown $user $err_log + if test -n "$open_files" + then + ulimit -n $open_files +@@ -341,18 +341,16 @@ + then + if @FIND_PROC@ + then # The pid contains a mysqld process +- echo "A mysqld process already exists" +- echo "A mysqld process already exists at " `date` >> $err_log ++ echo "A mysqld process already exists" | $ERR_LOGGER -s + exit 1 + fi + fi + rm -f $pid_file + if test -f $pid_file + then +- echo "Fatal error: Can't remove the pid file: $pid_file" +- echo "Fatal error: Can't remove the pid file: $pid_file at " `date` >> $err_log +- echo "Please remove it manually and start $0 again" +- echo "mysqld daemon not started" ++ echo "Fatal error: Can't remove the pid file: $pid_file" | $ERR_LOGGER -s ++ echo "Please remove it manually and start $0 again" | $ERR_LOGGER -s ++ echo "mysqld daemon not started" | $ERR_LOGGER -s + exit 1 + fi + fi +@@ -377,15 +375,15 @@ + # ulimit -n 256 > /dev/null 2>&1 # Fix for BSD and FreeBSD systems + #fi + +-echo "`date +'%y%m%d %H:%M:%S mysqld started'`" >> $err_log ++echo "started" | $ERR_LOGGER -s + while true + do + rm -f $safe_mysql_unix_port $pid_file # Some extra safety + if test -z "$args" + then +- $NOHUP_NICENESS $ledir/$MYSQLD $defaults --basedir=$MY_BASEDIR_VERSION --datadir=$DATADIR $USER_OPTION --pid-file=$pid_file @MYSQLD_DEFAULT_SWITCHES@ >> $err_log 2>&1 ++ $NOHUP_NICENESS $ledir/$MYSQLD $defaults --basedir=$MY_BASEDIR_VERSION --datadir=$DATADIR $USER_OPTION --pid-file=$pid_file @MYSQLD_DEFAULT_SWITCHES@ 2>&1 | $ERR_LOGGER -t mysqld + else +- eval "$NOHUP_NICENESS $ledir/$MYSQLD $defaults --basedir=$MY_BASEDIR_VERSION --datadir=$DATADIR $USER_OPTION --pid-file=$pid_file @MYSQLD_DEFAULT_SWITCHES@ $args >> $err_log 2>&1" ++ eval "$NOHUP_NICENESS $ledir/$MYSQLD $defaults --basedir=$MY_BASEDIR_VERSION --datadir=$DATADIR $USER_OPTION --pid-file=$pid_file @MYSQLD_DEFAULT_SWITCHES@ $args 2>&1 | $ERR_LOGGER -t mysqld" + fi + if test ! -f $pid_file # This is removed if normal shutdown + then +@@ -402,7 +400,7 @@ + # kill -9 is used or the process won't react on the kill. + numofproces=`ps xaww | grep -v "grep" | grep "$ledir/$MYSQLD\>" | grep -c "pid-file=$pid_file"` + +- echo -e "\nNumber of processes running now: $numofproces" | tee -a $err_log ++ echo -e "\nNumber of processes running now: $numofproces" | $ERR_LOGGER -s + I=1 + while test "$I" -le "$numofproces" + do +@@ -415,16 +413,14 @@ + # echo "TEST $I - $T **" + if kill -9 $T + then +- echo "$MYSQLD process hanging, pid $T - killed" | tee -a $err_log ++ echo "$MYSQLD process hanging, pid $T - killed" | $ERR_LOGGER -s + else + break + fi + I=`expr $I + 1` + done + fi +- echo "`date +'%y%m%d %H:%M:%S'` mysqld restarted" | tee -a $err_log ++ echo "restarted" | $ERR_LOGGER -s + done + +-echo "`date +'%y%m%d %H:%M:%S'` mysqld ended" | tee -a $err_log +-echo "" | tee -a $err_log +- ++echo "ended" | $ERR_LOGGER -s diff --git a/percona/5.0.77-b13/show_patches.patch b/percona/5.0.77-b13/show_patches.patch new file mode 100644 index 0000000..7f1d431 --- /dev/null +++ b/percona/5.0.77-b13/show_patches.patch @@ -0,0 +1,288 @@ +diff -r c3e57b0c22c4 patch_info/show_patches.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/show_patches.info Mon Dec 22 00:25:06 2008 -0800 +@@ -0,0 +1,6 @@ ++File=show_patches.patch ++Name=SHOW PATCHES ++Version=1.0 ++Author=Jeremy Cole ++License=N/A ++Comment +diff -r c3e57b0c22c4 sql/Makefile.am +--- a/sql/Makefile.am Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/Makefile.am Mon Dec 22 00:25:06 2008 -0800 +@@ -118,7 +118,7 @@ + -DSHAREDIR="\"$(MYSQLSHAREdir)\"" \ + @DEFS@ + +-BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h ++BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h patch_info.h + EXTRA_DIST = $(BUILT_SOURCES) nt_servc.cc nt_servc.h \ + message.mc message.h message.rc MSG00001.bin \ + examples/CMakeLists.txt CMakeLists.txt \ +@@ -175,6 +175,8 @@ + udf_example_la_SOURCES= udf_example.c + udf_example_la_LDFLAGS= -module -rpath $(pkglibdir) + ++patch_info.h: patch_info.h.pl ++ $(PERL) $< > $@ + + # Don't update the files from bitkeeper + %::SCCS/s.% +diff -r c3e57b0c22c4 sql/Makefile.in +--- a/sql/Makefile.in Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/Makefile.in Mon Dec 22 00:25:06 2008 -0800 +@@ -561,7 +561,7 @@ + gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) + mysql_tzinfo_to_sql_SOURCES = mysql_tzinfo_to_sql.cc + mysql_tzinfo_to_sql_LDADD = @MYSQLD_EXTRA_LDFLAGS@ $(LDADD) $(CXXLDFLAGS) +-BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h ++BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h patch_info.h + EXTRA_DIST = $(BUILT_SOURCES) nt_servc.cc nt_servc.h \ + message.mc message.h message.rc MSG00001.bin \ + examples/CMakeLists.txt CMakeLists.txt \ +@@ -1237,6 +1237,9 @@ + ./gen_lex_hash$(EXEEXT) > $@-t + $(MV) $@-t $@ + ++patch_info.h: patch_info.h.pl ++ $(PERL) $< > $@ ++ + # Don't update the files from bitkeeper + %::SCCS/s.% + # Tell versions [3.59,3.63) of GNU make to not export all variables. +diff -r c3e57b0c22c4 sql/lex.h +--- a/sql/lex.h Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/lex.h Mon Dec 22 00:25:06 2008 -0800 +@@ -367,6 +367,7 @@ + { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, + { "PARTIAL", SYM(PARTIAL)}, + { "PASSWORD", SYM(PASSWORD)}, ++ { "PATCHES", SYM(PATCHES)}, + { "PHASE", SYM(PHASE_SYM)}, + { "POINT", SYM(POINT_SYM)}, + { "POLYGON", SYM(POLYGON)}, +diff -r c3e57b0c22c4 sql/mysql_priv.h +--- a/sql/mysql_priv.h Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/mysql_priv.h Mon Dec 22 00:25:06 2008 -0800 +@@ -968,6 +968,7 @@ + int mysqld_show_status(THD *thd); + int mysqld_show_variables(THD *thd,const char *wild); + bool mysqld_show_storage_engines(THD *thd); ++bool mysqld_show_patches(THD *thd); + bool mysqld_show_privileges(THD *thd); + bool mysqld_show_column_types(THD *thd); + bool mysqld_help (THD *thd, const char *text); +diff -r c3e57b0c22c4 sql/patch_info.h.pl +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/sql/patch_info.h.pl Mon Dec 22 00:25:06 2008 -0800 +@@ -0,0 +1,65 @@ ++use strict; ++ ++my $patch_info_path = '../patch_info'; ++my $file = ''; ++my $output = ''; ++ ++ ++if (opendir(PATCH_DIR, $patch_info_path)) ++{ ++ while ((my $file = readdir(PATCH_DIR))) ++ { ++ open(PATCH_FILE, "<$patch_info_path/$file") || die("Unable to open $patch_info_path/$file ($!)"); ++ my %fields; ++ ++ if ($file =~ /^\./) ++ { ++ next; ++ } ++ ++ while (<PATCH_FILE>) ++ { ++ chomp; ++ ++ my ($key, $value) = split(/\s*=\s*/); ++ $fields{lc($key)} = $value; ++ } ++ ++ $output .= "{\"$fields{'file'}\", \"$fields{'name'}\", \"$fields{'version'}\", \"$fields{'author'}\", \"$fields{'license'}\",\"$fields{'comment'}\"},\n" ++ } ++} ++ ++print <<HEADER; ++ ++/* Copyright (C) 2002-2006 MySQL AB ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; version 2 of the License. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ ++ ++#ifdef USE_PRAGMA_INTERFACE ++#pragma interface /* gcc class implementation */ ++#endif ++ ++struct patch { ++ const char *file; ++ const char *name; ++ const char *version; ++ const char *author; ++ const char *license; ++ const char *comment; ++}patches[] = { ++$output ++{NULL, NULL, NULL, NULL} ++}; ++ ++HEADER +diff -r c3e57b0c22c4 sql/sp_head.cc +--- a/sql/sp_head.cc Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sp_head.cc Mon Dec 22 00:25:06 2008 -0800 +@@ -191,6 +191,7 @@ + case SQLCOM_SHOW_MUTEX_STATUS: + case SQLCOM_SHOW_NEW_MASTER: + case SQLCOM_SHOW_OPEN_TABLES: ++ case SQLCOM_SHOW_PATCHES: + case SQLCOM_SHOW_PRIVILEGES: + case SQLCOM_SHOW_PROCESSLIST: + case SQLCOM_SHOW_SLAVE_HOSTS: +diff -r c3e57b0c22c4 sql/sql_lex.h +--- a/sql/sql_lex.h Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_lex.h Mon Dec 22 00:25:06 2008 -0800 +@@ -95,6 +95,7 @@ + SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER, + SQLCOM_SHOW_PROC_CODE, SQLCOM_SHOW_FUNC_CODE, + SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES, ++ SQLCOM_SHOW_PATCHES, + + /* + When a command is added here, be sure it's also added in mysqld.cc +diff -r c3e57b0c22c4 sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_parse.cc Mon Dec 22 00:25:06 2008 -0800 +@@ -3947,6 +3947,9 @@ + break; + case SQLCOM_SHOW_STORAGE_ENGINES: + res= mysqld_show_storage_engines(thd); ++ break; ++ case SQLCOM_SHOW_PATCHES: ++ res= mysqld_show_patches(thd); + break; + case SQLCOM_SHOW_PRIVILEGES: + res= mysqld_show_privileges(thd); +diff -r c3e57b0c22c4 sql/sql_prepare.cc +--- a/sql/sql_prepare.cc Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_prepare.cc Mon Dec 22 00:25:06 2008 -0800 +@@ -1790,6 +1790,7 @@ + case SQLCOM_SHOW_DATABASES: + case SQLCOM_SHOW_PROCESSLIST: + case SQLCOM_SHOW_STORAGE_ENGINES: ++ case SQLCOM_SHOW_PATCHES: + case SQLCOM_SHOW_PRIVILEGES: + case SQLCOM_SHOW_COLUMN_TYPES: + case SQLCOM_SHOW_STATUS: +diff -r c3e57b0c22c4 sql/sql_show.cc +--- a/sql/sql_show.cc Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_show.cc Mon Dec 22 00:25:06 2008 -0800 +@@ -22,6 +22,7 @@ + #include "sp.h" + #include "sp_head.h" + #include "sql_trigger.h" ++#include "patch_info.h" + #include <my_dir.h> + + #ifdef HAVE_BERKELEY_DB +@@ -45,6 +46,47 @@ + static int + view_store_create_info(THD *thd, TABLE_LIST *table, String *buff); + bool schema_table_store_record(THD *thd, TABLE *table); ++ ++/*************************************************************************** ++** List patches built into this release ++***************************************************************************/ ++ ++bool mysqld_show_patches(THD *thd) ++{ ++ List<Item> field_list; ++ int i = 0; ++ Protocol *protocol= thd->protocol; ++ DBUG_ENTER("mysqld_show_patches"); ++ ++ field_list.push_back(new Item_empty_string("File", 255)); ++ field_list.push_back(new Item_empty_string("Name", 50)); ++ field_list.push_back(new Item_empty_string("Version", 10)); ++ field_list.push_back(new Item_empty_string("Author", 50)); ++ field_list.push_back(new Item_empty_string("License", 50)); ++ field_list.push_back(new Item_empty_string("Comment", 32)); ++ ++ if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) ++ DBUG_RETURN(TRUE); ++ ++ for (i = 0; patches[i].file; i++) ++ { ++ protocol->prepare_for_resend(); ++ protocol->store(patches[i].file, system_charset_info); ++ protocol->store(patches[i].name, system_charset_info); ++ protocol->store(patches[i].version, system_charset_info); ++ protocol->store(patches[i].author, system_charset_info); ++ protocol->store(patches[i].license, system_charset_info); ++ protocol->store(patches[i].comment, system_charset_info); ++ ++ if (protocol->write()) ++ DBUG_RETURN(TRUE); ++ } ++ ++ ++ send_eof(thd); ++ DBUG_RETURN(FALSE); ++ ++} + + + /*************************************************************************** +diff -r c3e57b0c22c4 sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Mon Dec 22 00:20:06 2008 -0800 ++++ b/sql/sql_yacc.yy Mon Dec 22 00:25:06 2008 -0800 +@@ -824,6 +824,7 @@ + %token PAGE_SYM + %token PARTIAL + %token PASSWORD ++%token PATCHES + %token PARAM_MARKER + %token PHASE_SYM + %token POINTFROMTEXT +@@ -8019,7 +8020,7 @@ + ; + + show_param: +- DATABASES wild_and_where ++ DATABASES wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SELECT; +@@ -8119,6 +8120,10 @@ + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES; + WARN_DEPRECATED("SHOW TABLE TYPES", "SHOW [STORAGE] ENGINES"); ++ } ++ | PATCHES ++ { ++ Lex->sql_command= SQLCOM_SHOW_PATCHES; + } + | opt_storage ENGINES_SYM + { +@@ -9554,6 +9559,7 @@ + | PAGE_SYM {} + | PARTIAL {} + | PASSWORD {} ++ | PATCHES {} + | PHASE_SYM {} + | POINT_SYM {} + | POLYGON {} diff --git a/percona/5.0.77-b13/split_buf_pool_mutex_fixed_optimistic_safe.patch b/percona/5.0.77-b13/split_buf_pool_mutex_fixed_optimistic_safe.patch new file mode 100644 index 0000000..885e893 --- /dev/null +++ b/percona/5.0.77-b13/split_buf_pool_mutex_fixed_optimistic_safe.patch @@ -0,0 +1,1314 @@ +diff -r 2e0c46e78b50 innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/buf/buf0buf.c Mon Dec 22 00:33:59 2008 -0800 +@@ -548,6 +548,19 @@ + mutex_create(&(buf_pool->mutex)); + mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL); + ++ mutex_create(&(buf_pool->flush_list_mutex)); ++ mutex_create(&(buf_pool->LRU_mutex)); ++ mutex_create(&(buf_pool->free_mutex)); ++ rw_lock_create(&(buf_pool->hash_latch)); ++ mutex_set_level(&(buf_pool->flush_list_mutex), SYNC_NO_ORDER_CHECK); ++ mutex_set_level(&(buf_pool->LRU_mutex), SYNC_NO_ORDER_CHECK); ++ mutex_set_level(&(buf_pool->free_mutex), SYNC_NO_ORDER_CHECK); ++ rw_lock_set_level(&(buf_pool->hash_latch), SYNC_NO_ORDER_CHECK); ++ ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); ++ rw_lock_x_lock(&(buf_pool->hash_latch)); + mutex_enter(&(buf_pool->mutex)); + + if (srv_use_awe) { +@@ -723,6 +736,10 @@ + block->in_free_list = TRUE; + } + ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + mutex_exit(&(buf_pool->mutex)); + + if (srv_use_adaptive_hash_indexes) { +@@ -859,12 +876,12 @@ + if (buf_pool->freed_page_clock >= block->freed_page_clock + + 1 + (buf_pool->curr_size / 4)) { + +- mutex_enter(&buf_pool->mutex); ++ mutex_enter(&(buf_pool->LRU_mutex)); + /* There has been freeing activity in the LRU list: + best to move to the head of the LRU list */ + + buf_LRU_make_block_young(block); +- mutex_exit(&buf_pool->mutex); ++ mutex_exit(&(buf_pool->LRU_mutex)); + } + } + +@@ -880,7 +897,7 @@ + { + buf_block_t* block; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + block = buf_block_align(frame); + +@@ -888,7 +905,7 @@ + + buf_LRU_make_block_young(block); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + } + + /************************************************************************ +@@ -899,7 +916,7 @@ + /*===========*/ + buf_block_t* block) /* in, own: block to be freed */ + { +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + mutex_enter(&block->mutex); + +@@ -909,7 +926,7 @@ + + mutex_exit(&block->mutex); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + } + + /************************************************************************* +@@ -950,11 +967,11 @@ + { + buf_block_t* block; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + + block = buf_page_hash_get(space, offset); + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + return(block); + } +@@ -971,7 +988,7 @@ + { + buf_block_t* block; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + + block = buf_page_hash_get(space, offset); + +@@ -979,7 +996,7 @@ + block->check_index_page_at_flush = FALSE; + } + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + } + + /************************************************************************ +@@ -998,7 +1015,7 @@ + buf_block_t* block; + ibool is_hashed; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + + block = buf_page_hash_get(space, offset); + +@@ -1008,7 +1025,7 @@ + is_hashed = block->is_hashed; + } + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + return(is_hashed); + } +@@ -1050,7 +1067,7 @@ + { + buf_block_t* block; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + + block = buf_page_hash_get(space, offset); + +@@ -1058,7 +1075,7 @@ + block->file_page_was_freed = TRUE; + } + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + return(block); + } +@@ -1079,7 +1096,7 @@ + { + buf_block_t* block; + +- mutex_enter_fast(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + + block = buf_page_hash_get(space, offset); + +@@ -1087,7 +1104,7 @@ + block->file_page_was_freed = FALSE; + } + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + return(block); + } +@@ -1166,26 +1183,33 @@ + buf_pool->n_page_gets++; + loop: + block = NULL; +- mutex_enter_fast(&(buf_pool->mutex)); ++ // mutex_enter_fast(&(buf_pool->mutex)); + + if (guess) { + block = buf_block_align(guess); + ++ mutex_enter(&block->mutex); + if ((offset != block->offset) || (space != block->space) + || (block->state != BUF_BLOCK_FILE_PAGE)) { + ++ mutex_exit(&block->mutex); + block = NULL; + } + } + + if (block == NULL) { ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + block = buf_page_hash_get(space, offset); ++ if(block) { ++ mutex_enter(&block->mutex); ++ } ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + } + + if (block == NULL) { + /* Page not in buf_pool: needs to be read from file */ + +- mutex_exit(&(buf_pool->mutex)); ++ // mutex_exit(&(buf_pool->mutex)); + + if (mode == BUF_GET_IF_IN_POOL) { + +@@ -1204,7 +1228,7 @@ + goto loop; + } + +- mutex_enter(&block->mutex); ++ // mutex_enter(&block->mutex); + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + +@@ -1216,7 +1240,7 @@ + + if (mode == BUF_GET_IF_IN_POOL) { + /* The page is only being read to buffer */ +- mutex_exit(&buf_pool->mutex); ++ // mutex_exit(&buf_pool->mutex); + mutex_exit(&block->mutex); + + return(NULL); +@@ -1233,7 +1257,9 @@ + LRU list and we must put it to awe_LRU_free_mapped list once + mapped to a frame */ + ++ mutex_enter_fast(&(buf_pool->mutex)); + buf_awe_map_page_to_frame(block, TRUE); ++ mutex_exit(&buf_pool->mutex); + } + + #ifdef UNIV_SYNC_DEBUG +@@ -1241,7 +1267,7 @@ + #else + buf_block_buf_fix_inc(block); + #endif +- mutex_exit(&buf_pool->mutex); ++ // mutex_exit(&buf_pool->mutex); + + /* Check if this is the first access to the page */ + +@@ -1791,7 +1817,8 @@ + + ut_a(block); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ rw_lock_x_lock(&(buf_pool->hash_latch)); + mutex_enter(&block->mutex); + + if (fil_tablespace_deleted_or_being_deleted_in_mem(space, +@@ -1806,7 +1833,8 @@ + being deleted, or the page is already in buf_pool, return */ + + mutex_exit(&block->mutex); +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + + buf_block_free(block); + +@@ -1821,10 +1849,14 @@ + ut_ad(block); + + buf_page_init(space, offset, block); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + + /* The block must be put to the LRU list, to the old blocks */ + + buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */ ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ ++ mutex_enter(&(buf_pool->mutex)); /* for consistency about aio */ + + block->io_fix = BUF_IO_READ; + +@@ -1873,7 +1905,8 @@ + + free_block = buf_LRU_get_free_block(); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ rw_lock_x_lock(&(buf_pool->hash_latch)); + + block = buf_page_hash_get(space, offset); + +@@ -1884,7 +1917,8 @@ + block->file_page_was_freed = FALSE; + + /* Page can be found in buf_pool */ +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + + buf_block_free(free_block); + +@@ -1907,6 +1941,7 @@ + mutex_enter(&block->mutex); + + buf_page_init(space, offset, block); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + + /* The block must be put to the LRU list */ + buf_LRU_add_block(block, FALSE); +@@ -1918,7 +1953,7 @@ + #endif + buf_pool->n_pages_created++; + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + + mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); + +@@ -1932,7 +1967,7 @@ + ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE); + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + frame = block->frame; + +@@ -1968,6 +2003,7 @@ + { + ulint io_type; + ulint read_page_no; ++ ulint flush_type; + + buf_io_counter_t* io_counter; + ulint fold; +@@ -2050,9 +2086,6 @@ + } + } + +- mutex_enter(&(buf_pool->mutex)); +- mutex_enter(&block->mutex); +- + #ifdef UNIV_IBUF_DEBUG + ut_a(ibuf_count_get(block->space, block->offset) == 0); + #endif +@@ -2061,9 +2094,12 @@ + removes the newest lock debug record, without checking the thread + id. */ + +- block->io_fix = 0; +- + if (io_type == BUF_IO_READ) { ++ mutex_enter(&block->mutex); ++ mutex_enter(&(buf_pool->mutex)); ++ ++ block->io_fix = 0; ++ + /* NOTE that the call to ibuf may have moved the ownership of + the x-latch to this OS thread: do not let this confuse you in + debugging! */ +@@ -2094,6 +2130,8 @@ + } + } + ++ mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); + #ifdef UNIV_DEBUG + if (buf_debug_prints) { + fputs("Has read ", stderr); +@@ -2102,10 +2140,25 @@ + } else { + ut_ad(io_type == BUF_IO_WRITE); + ++ flush_type = block->flush_type; ++ if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */ ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ } ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&block->mutex); ++ mutex_enter(&(buf_pool->mutex)); ++ ++ block->io_fix = 0; ++ + /* Write means a flush operation: call the completion + routine in the flush system */ + + buf_flush_write_complete(block); ++ ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */ ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ } + + rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE); + /* io_counter here */ +@@ -2131,6 +2184,9 @@ + + buf_pool->n_pages_written++; + ++ mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); ++ + #ifdef UNIV_DEBUG + if (buf_debug_prints) { + fputs("Has written ", stderr); +@@ -2138,9 +2194,6 @@ + #endif /* UNIV_DEBUG */ + } + +- mutex_exit(&block->mutex); +- mutex_exit(&(buf_pool->mutex)); +- + #ifdef UNIV_DEBUG + if (buf_debug_prints) { + fprintf(stderr, "page space %lu page no %lu\n", +@@ -2168,11 +2221,11 @@ + freed = buf_LRU_search_and_free_block(100); + } + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + } + + /************************************************************************* +@@ -2191,10 +2244,22 @@ + ulint n_flush = 0; + ulint n_free = 0; + ulint n_page = 0; ++ ulint n_single_flush_tmp = 0; ++ ulint n_lru_flush_tmp = 0; ++ ulint n_list_flush_tmp = 0; + + ut_ad(buf_pool); + ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); ++ rw_lock_x_lock(&(buf_pool->hash_latch)); ++ + mutex_enter(&(buf_pool->mutex)); ++ n_single_flush_tmp = buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]; ++ n_list_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LIST]; ++ n_lru_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LRU]; ++ mutex_exit(&(buf_pool->mutex)); + + for (i = 0; i < buf_pool->curr_size; i++) { + +@@ -2262,11 +2327,14 @@ + } + ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush); + +- ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush); +- ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush); +- ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush); ++ ut_a(n_single_flush_tmp == n_single_flush); ++ ut_a(n_list_flush_tmp == n_list_flush); ++ ut_a(n_lru_flush_tmp == n_lru_flush); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + + ut_a(buf_LRU_validate()); + ut_a(buf_flush_validate()); +@@ -2298,7 +2366,9 @@ + index_ids = mem_alloc(sizeof(dulint) * size); + counts = mem_alloc(sizeof(ulint) * size); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + fprintf(stderr, + "buf_pool size %lu\n" +@@ -2351,7 +2421,9 @@ + } + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + for (i = 0; i < n_found; i++) { + index = dict_index_get_if_in_cache(index_ids[i]); +@@ -2386,8 +2458,6 @@ + ulint i; + ulint fixed_pages_number = 0; + +- mutex_enter(&(buf_pool->mutex)); +- + for (i = 0; i < buf_pool->curr_size; i++) { + + block = buf_pool_get_nth_block(buf_pool, i); +@@ -2403,7 +2473,6 @@ + } + } + +- mutex_exit(&(buf_pool->mutex)); + return fixed_pages_number; + } + #endif /* UNIV_DEBUG */ +@@ -2431,7 +2500,9 @@ + { + ulint ratio; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list)) + / (1 + UT_LIST_GET_LEN(buf_pool->LRU) +@@ -2439,7 +2510,9 @@ + + /* 1 + is there to avoid division by zero */ + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + return(ratio); + } +@@ -2459,6 +2532,9 @@ + ut_ad(buf_pool); + size = buf_pool->curr_size; + ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + mutex_enter(&(buf_pool->mutex)); + + if (srv_use_awe) { +@@ -2532,6 +2608,9 @@ + buf_pool->n_pages_written_old = buf_pool->n_pages_written; + buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped; + ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + mutex_exit(&(buf_pool->mutex)); + } + +@@ -2562,8 +2641,6 @@ + + ut_ad(buf_pool); + +- mutex_enter(&(buf_pool->mutex)); +- + for (i = 0; i < buf_pool->curr_size; i++) { + + block = buf_pool_get_nth_block(buf_pool, i); +@@ -2584,8 +2661,6 @@ + + mutex_exit(&block->mutex); + } +- +- mutex_exit(&(buf_pool->mutex)); + + return(TRUE); + } +@@ -2625,11 +2700,11 @@ + { + ulint len; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + len = UT_LIST_GET_LEN(buf_pool->free); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + return(len); + } +diff -r 2e0c46e78b50 innobase/buf/buf0flu.c +--- a/innobase/buf/buf0flu.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/buf/buf0flu.c Mon Dec 22 00:33:59 2008 -0800 +@@ -117,12 +117,14 @@ + ut_ad(mutex_own(&block->mutex)); + #endif /* UNIV_SYNC_DEBUG */ + if (block->state != BUF_BLOCK_FILE_PAGE) { ++ /* I permited not to own LRU_mutex.. */ ++/* + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: buffer block state %lu in the LRU list!\n", + (ulong)block->state); + ut_print_buf(stderr, (byte*)block, sizeof(buf_block_t)); +- ++*/ + return(FALSE); + } + +@@ -536,18 +538,20 @@ + ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST + || flush_type == BUF_FLUSH_SINGLE_PAGE); + +- mutex_enter(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + + block = buf_page_hash_get(space, offset); + + ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE); + + if (!block) { +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + return(0); + } + + mutex_enter(&block->mutex); ++ mutex_enter(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + if (flush_type == BUF_FLUSH_LIST + && buf_flush_ready_for_flush(block, flush_type)) { +@@ -744,7 +748,7 @@ + high = fil_space_get_size(space); + } + +- mutex_enter(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + + for (i = low; i < high; i++) { + +@@ -778,7 +782,7 @@ + + mutex_exit(&block->mutex); + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + /* Note: as we release the buf_pool mutex + above, in buf_flush_try_page we cannot be sure +@@ -789,14 +793,14 @@ + count += buf_flush_try_page(space, i, + flush_type); + +- mutex_enter(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + } else { + mutex_exit(&block->mutex); + } + } + } + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + return(count); + } +@@ -849,7 +853,14 @@ + } + + (buf_pool->init_flush)[flush_type] = TRUE; ++ ++ mutex_exit(&(buf_pool->mutex)); + ++ if (flush_type == BUF_FLUSH_LRU) { ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ } ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ + for (;;) { + /* If we have flushed enough, leave the loop */ + if (page_count >= min_n) { +@@ -895,7 +906,10 @@ + offset = block->offset; + + mutex_exit(&block->mutex); +- mutex_exit(&(buf_pool->mutex)); ++ if (flush_type == BUF_FLUSH_LRU) { ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ } ++ mutex_exit(&(buf_pool->flush_list_mutex)); + + old_page_count = page_count; + +@@ -908,7 +922,10 @@ + flush_type, offset, + page_count - old_page_count); */ + +- mutex_enter(&(buf_pool->mutex)); ++ if (flush_type == BUF_FLUSH_LRU) { ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ } ++ mutex_enter(&(buf_pool->flush_list_mutex)); + + } else if (flush_type == BUF_FLUSH_LRU) { + +@@ -930,6 +947,13 @@ + break; + } + } ++ ++ if (flush_type == BUF_FLUSH_LRU) { ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ } ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ ++ mutex_enter(&(buf_pool->mutex)); + + (buf_pool->init_flush)[flush_type] = FALSE; + +@@ -989,10 +1013,14 @@ + buf_block_t* block; + ulint n_replaceable; + ulint distance = 0; +- +- mutex_enter(&(buf_pool->mutex)); ++ ++ /* optimistic search... */ ++ //mutex_enter(&(buf_pool->LRU_mutex)); ++ //mutex_enter(&(buf_pool->free_mutex)); + + n_replaceable = UT_LIST_GET_LEN(buf_pool->free); ++ ++ //mutex_exit(&(buf_pool->free_mutex)); + + block = UT_LIST_GET_LAST(buf_pool->LRU); + +@@ -1014,7 +1042,7 @@ + block = UT_LIST_GET_PREV(LRU, block); + } + +- mutex_exit(&(buf_pool->mutex)); ++ //mutex_exit(&(buf_pool->LRU_mutex)); + + if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) { + +@@ -1033,8 +1061,9 @@ + immediately, without waiting. */ + + void +-buf_flush_free_margin(void) ++buf_flush_free_margin( + /*=======================*/ ++ ibool wait) + { + ulint n_to_flush; + ulint n_flushed; +@@ -1044,7 +1073,7 @@ + if (n_to_flush > 0) { + n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, + ut_dulint_zero); +- if (n_flushed == ULINT_UNDEFINED) { ++ if (wait && n_flushed == ULINT_UNDEFINED) { + /* There was an LRU type flush batch already running; + let us wait for it to end */ + +@@ -1094,11 +1123,11 @@ + { + ibool ret; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); + + ret = buf_flush_validate_low(); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); + + return(ret); + } +diff -r 2e0c46e78b50 innobase/buf/buf0lru.c +--- a/innobase/buf/buf0lru.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/buf/buf0lru.c Mon Dec 22 00:33:59 2008 -0800 +@@ -79,7 +79,10 @@ + ibool all_freed; + + scan_again: +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); ++ rw_lock_x_lock(&(buf_pool->hash_latch)); + + all_freed = TRUE; + +@@ -117,7 +120,10 @@ + + mutex_exit(&block->mutex); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + + /* Note that the following call will acquire + an S-latch on the page */ +@@ -147,7 +153,10 @@ + block = UT_LIST_GET_PREV(LRU, block); + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + + if (!all_freed) { + os_thread_sleep(20000); +@@ -170,14 +179,14 @@ + ulint len; + ulint limit; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + len = UT_LIST_GET_LEN(buf_pool->LRU); + + if (len < BUF_LRU_OLD_MIN_LEN) { + /* The LRU list is too short to do read-ahead */ + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + + return(0); + } +@@ -186,7 +195,7 @@ + + limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO; + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + + return(limit); + } +@@ -210,13 +219,15 @@ + ulint distance = 0; + ibool freed; + +- mutex_enter(&(buf_pool->mutex)); ++ /* optimistic search... */ ++ //mutex_enter(&(buf_pool->LRU_mutex)); + ++retry: + freed = FALSE; + block = UT_LIST_GET_LAST(buf_pool->LRU); + + while (block != NULL) { +- ut_a(block->in_LRU_list); ++ //ut_a(block->in_LRU_list); /* optimistic */ + + mutex_enter(&block->mutex); + +@@ -231,9 +242,17 @@ + } + #endif /* UNIV_DEBUG */ + ++ mutex_exit(&block->mutex); ++ ++ mutex_enter(&(buf_pool->LRU_mutex));/* optimistic */ ++ ++ rw_lock_x_lock(&(buf_pool->hash_latch)); ++ mutex_enter(&block->mutex); ++ if(block->in_LRU_list && buf_flush_ready_for_replace(block)) { + buf_LRU_block_remove_hashed_page(block); ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + mutex_exit(&block->mutex); + + /* Remove possible adaptive hash index built on the +@@ -246,14 +265,25 @@ + + ut_a(block->buf_fix_count == 0); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + mutex_enter(&block->mutex); + + buf_LRU_block_free_hashed_page(block); + freed = TRUE; ++ mutex_exit(&(buf_pool->free_mutex)); + mutex_exit(&block->mutex); + + break; ++ } else { /* someone may interrupt...??? */ ++ mutex_exit(&(buf_pool->LRU_mutex));/* optimistic */ ++ ++ rw_lock_x_unlock(&(buf_pool->hash_latch)); ++ ++ if (!(block->in_LRU_list)) { ++ mutex_exit(&block->mutex); ++ goto retry; ++ } ++ } + } + + mutex_exit(&block->mutex); +@@ -264,13 +294,21 @@ + if (!freed && n_iterations <= 10 + && distance > 100 + (n_iterations * buf_pool->curr_size) + / 10) { ++ ++ mutex_enter(&(buf_pool->mutex)); + buf_pool->LRU_flush_ended = 0; ++ mutex_exit(&(buf_pool->mutex)); + +- mutex_exit(&(buf_pool->mutex)); ++ //mutex_exit(&(buf_pool->LRU_mutex)); + + return(FALSE); + } + } ++ if (!freed) { ++ //mutex_exit(&(buf_pool->LRU_mutex)); ++ } ++ ++ mutex_enter(&(buf_pool->mutex)); + if (buf_pool->LRU_flush_ended > 0) { + buf_pool->LRU_flush_ended--; + } +@@ -322,7 +360,8 @@ + { + ibool ret = FALSE; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) { +@@ -330,7 +369,8 @@ + ret = TRUE; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + return(ret); + } +@@ -353,7 +393,7 @@ + ibool mon_value_was = FALSE; + ibool started_monitor = FALSE; + loop: +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); /* LRU info:optimistic */ + + if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) { +@@ -409,7 +449,7 @@ + /* If there is a block in the free list, take it */ + if (UT_LIST_GET_LEN(buf_pool->free) > 0) { + +- block = UT_LIST_GET_FIRST(buf_pool->free); ++ block = UT_LIST_GET_LAST(buf_pool->free); + ut_a(block->in_free_list); + UT_LIST_REMOVE(free, buf_pool->free, block); + block->in_free_list = FALSE; +@@ -437,7 +477,7 @@ + + mutex_exit(&block->mutex); + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + if (started_monitor) { + srv_print_innodb_monitor = mon_value_was; +@@ -449,7 +489,7 @@ + /* If no block was in the free list, search from the end of the LRU + list and try to free a block there */ + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + + freed = buf_LRU_search_and_free_block(n_iterations); + +@@ -486,7 +526,7 @@ + + /* No free block was found: try to flush the LRU list */ + +- buf_flush_free_margin(); ++ buf_flush_free_margin(TRUE); + ++srv_buf_pool_wait_free; + + os_aio_simulated_wake_handler_threads(); +@@ -958,7 +998,7 @@ + ulint LRU_pos; + + ut_ad(buf_pool); +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { + +@@ -1001,7 +1041,10 @@ + + if (buf_pool->LRU_old) { + ut_a(buf_pool->LRU_old_len == old_len); +- } ++ } ++ ++ mutex_exit(&(buf_pool->LRU_mutex)); ++ mutex_enter(&(buf_pool->free_mutex)); + + UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free); + +@@ -1013,7 +1056,7 @@ + block = UT_LIST_GET_NEXT(free, block); + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->free_mutex)); + return(TRUE); + } + +@@ -1029,7 +1072,7 @@ + ulint len; + + ut_ad(buf_pool); +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->LRU_mutex)); + + fprintf(stderr, "Pool ulint clock %lu\n", (ulong) buf_pool->ulint_clock); + +@@ -1073,5 +1116,5 @@ + } + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->LRU_mutex)); + } +diff -r 2e0c46e78b50 innobase/buf/buf0rea.c +--- a/innobase/buf/buf0rea.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/buf/buf0rea.c Mon Dec 22 00:33:59 2008 -0800 +@@ -236,10 +236,12 @@ + + return(0); + } ++ mutex_exit(&(buf_pool->mutex)); + + /* Count how many blocks in the area have been recently accessed, + that is, reside near the start of the LRU list. */ + ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + for (i = low; i < high; i++) { + block = buf_page_hash_get(space, i); + +@@ -250,8 +252,9 @@ + recent_blocks++; + } + } ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + +- mutex_exit(&(buf_pool->mutex)); ++ // mutex_exit(&(buf_pool->mutex)); + + if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) { + /* Do nothing */ +@@ -347,7 +350,7 @@ + } + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + return(count + count2); + } +@@ -450,6 +453,7 @@ + + return(0); + } ++ mutex_exit(&(buf_pool->mutex)); + + /* Check that almost all pages in the area have been accessed; if + offset == low, the accesses must be in a descending order, otherwise, +@@ -463,6 +467,7 @@ + + fail_count = 0; + ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + for (i = low; i < high; i++) { + block = buf_page_hash_get(space, i); + +@@ -479,12 +484,13 @@ + pred_block = block; + } + } ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + if (fail_count > BUF_READ_AHEAD_LINEAR_AREA - + BUF_READ_AHEAD_LINEAR_THRESHOLD) { + /* Too many failures: return */ + +- mutex_exit(&(buf_pool->mutex)); ++ //mutex_exit(&(buf_pool->mutex)); + + return(0); + } +@@ -492,10 +498,11 @@ + /* If we got this far, we know that enough pages in the area have + been accessed in the right order: linear read-ahead can be sensible */ + ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + block = buf_page_hash_get(space, offset); + + if (block == NULL) { +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + return(0); + } +@@ -511,7 +518,7 @@ + pred_offset = fil_page_get_prev(frame); + succ_offset = fil_page_get_next(frame); + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + if ((offset == low) && (succ_offset == offset + 1)) { + +@@ -587,7 +594,7 @@ + os_aio_simulated_wake_handler_threads(); + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + #ifdef UNIV_DEBUG + if (buf_debug_prints && (count > 0)) { +@@ -655,7 +662,7 @@ + os_aio_simulated_wake_handler_threads(); + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + #ifdef UNIV_DEBUG + if (buf_debug_prints) { +@@ -727,7 +734,7 @@ + os_aio_simulated_wake_handler_threads(); + + /* Flush pages from the end of the LRU list if necessary */ +- buf_flush_free_margin(); ++ buf_flush_free_margin(FALSE); + + #ifdef UNIV_DEBUG + if (buf_debug_prints) { +diff -r 2e0c46e78b50 innobase/include/buf0buf.h +--- a/innobase/include/buf0buf.h Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/include/buf0buf.h Mon Dec 22 00:33:59 2008 -0800 +@@ -946,6 +946,7 @@ + mem_heap_t* io_counter_heap; + ulint io_counters; + hash_table_t* page_hash; /* hash table of the file pages */ ++ rw_lock_t hash_latch; + + ulint n_pend_reads; /* number of pending read operations */ + +@@ -978,6 +979,7 @@ + UT_LIST_BASE_NODE_T(buf_block_t) flush_list; + /* base node of the modified block + list */ ++ mutex_t flush_list_mutex; + ibool init_flush[BUF_FLUSH_LIST + 1]; + /* this is TRUE when a flush of the + given type is being initialized */ +@@ -1011,8 +1013,10 @@ + in the case of AWE, at the start are + always free blocks for which the + physical memory is mapped to a frame */ ++ mutex_t free_mutex; + UT_LIST_BASE_NODE_T(buf_block_t) LRU; + /* base node of the LRU list */ ++ mutex_t LRU_mutex; + buf_block_t* LRU_old; /* pointer to the about 3/8 oldest + blocks in the LRU list; NULL if LRU + length less than BUF_LRU_OLD_MIN_LEN */ +diff -r 2e0c46e78b50 innobase/include/buf0buf.ic +--- a/innobase/include/buf0buf.ic Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/include/buf0buf.ic Mon Dec 22 00:33:59 2008 -0800 +@@ -112,7 +112,7 @@ + buf_block_t* block; + dulint lsn; + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); + + block = UT_LIST_GET_LAST(buf_pool->flush_list); + +@@ -122,7 +122,7 @@ + lsn = block->oldest_modification; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); + + return(lsn); + } +@@ -392,18 +392,18 @@ + /* out: TRUE if io going on */ + buf_block_t* block) /* in: buf_pool block, must be bufferfixed */ + { +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&block->mutex); + + ut_ad(block->state == BUF_BLOCK_FILE_PAGE); + ut_ad(block->buf_fix_count > 0); + + if (block->io_fix != 0) { +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); + + return(TRUE); + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); + + return(FALSE); + } +@@ -425,7 +425,7 @@ + + block = buf_block_align(frame); + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&block->mutex); + + if (block->state == BUF_BLOCK_FILE_PAGE) { + lsn = block->newest_modification; +@@ -433,7 +433,7 @@ + lsn = ut_dulint_zero; + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&block->mutex); + + return(lsn); + } +@@ -632,9 +632,9 @@ + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + + if (rw_latch == RW_X_LATCH && mtr->modifications) { +- mutex_enter(&buf_pool->mutex); ++ mutex_enter(&buf_pool->flush_list_mutex); + buf_flush_note_modification(block, mtr); +- mutex_exit(&buf_pool->mutex); ++ mutex_exit(&buf_pool->flush_list_mutex); + } + + mutex_enter(&block->mutex); +diff -r 2e0c46e78b50 innobase/include/buf0flu.h +--- a/innobase/include/buf0flu.h Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/include/buf0flu.h Mon Dec 22 00:33:59 2008 -0800 +@@ -26,8 +26,9 @@ + a margin of replaceable pages there. */ + + void +-buf_flush_free_margin(void); ++buf_flush_free_margin( + /*=======================*/ ++ ibool wait); + /************************************************************************ + Initializes a page for writing to the tablespace. */ + +diff -r 2e0c46e78b50 innobase/include/buf0flu.ic +--- a/innobase/include/buf0flu.ic Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/include/buf0flu.ic Mon Dec 22 00:33:59 2008 -0800 +@@ -84,7 +84,7 @@ + ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); + #endif /* UNIV_SYNC_DEBUG */ + +- mutex_enter(&(buf_pool->mutex)); ++ mutex_enter(&(buf_pool->flush_list_mutex)); + + ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0); + +@@ -102,5 +102,5 @@ + start_lsn) <= 0); + } + +- mutex_exit(&(buf_pool->mutex)); ++ mutex_exit(&(buf_pool->flush_list_mutex)); + } +diff -r 2e0c46e78b50 innobase/log/log0recv.c +--- a/innobase/log/log0recv.c Mon Dec 22 00:33:53 2008 -0800 ++++ b/innobase/log/log0recv.c Mon Dec 22 00:33:59 2008 -0800 +@@ -1693,11 +1693,11 @@ + + mtr_start(&mtr); + +- mutex_enter(&(buf_pool->mutex)); ++ rw_lock_s_lock(&(buf_pool->hash_latch)); + + page = buf_page_hash_get(space, page_no)->frame; + +- mutex_exit(&(buf_pool->mutex)); ++ rw_lock_s_unlock(&(buf_pool->hash_latch)); + + replica = buf_page_get(space + RECV_REPLICA_SPACE_ADD, page_no, + RW_X_LATCH, &mtr); +diff -r 2e0c46e78b50 patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info Mon Dec 22 00:33:59 2008 -0800 +@@ -0,0 +1,6 @@ ++File=split_buf_pool_mutex_fixed_optimistic_safe.patch ++Name=InnoDB patch to fix buffer pool scalability ++Version=1.0 ++Author=Yasufumi Kinoshita ++License=BSD ++Comment= diff --git a/percona/5.0.77-b13/userstatv2.patch b/percona/5.0.77-b13/userstatv2.patch new file mode 100644 index 0000000..a5efe3e --- /dev/null +++ b/percona/5.0.77-b13/userstatv2.patch @@ -0,0 +1,4441 @@ +diff -r 23e5576aa59a BUILD/Makefile.in +--- a/BUILD/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/BUILD/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -146,6 +146,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a Docs/Makefile.in +--- a/Docs/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/Docs/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -144,6 +144,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a Makefile.in +--- a/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -171,6 +171,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a SSL/Makefile.in +--- a/SSL/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/SSL/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -144,6 +144,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a aclocal.m4 +--- a/aclocal.m4 Tue Feb 17 22:33:00 2009 -0800 ++++ b/aclocal.m4 Tue Feb 17 22:33:23 2009 -0800 +@@ -4412,6 +4412,9 @@ + # Is the compiler the GNU C compiler? + with_gcc=$_LT_AC_TAGVAR(GCC, $1) + ++gcc_dir=\`gcc -print-file-name=. | $SED 's,/\.$,,'\` ++gcc_ver=\`gcc -dumpversion\` ++ + # An ERE matcher. + EGREP=$lt_EGREP + +@@ -4545,11 +4548,11 @@ + + # Dependencies to place before the objects being linked to create a + # shared library. +-predep_objects=$lt_[]_LT_AC_TAGVAR(predep_objects, $1) ++predep_objects=\`echo $lt_[]_LT_AC_TAGVAR(predep_objects, $1) | \$SED -e "s@\${gcc_dir}@\\\${gcc_dir}@g;s@\${gcc_ver}@\\\${gcc_ver}@g"\` + + # Dependencies to place after the objects being linked to create a + # shared library. +-postdep_objects=$lt_[]_LT_AC_TAGVAR(postdep_objects, $1) ++postdep_objects=\`echo $lt_[]_LT_AC_TAGVAR(postdep_objects, $1) | \$SED -e "s@\${gcc_dir}@\\\${gcc_dir}@g;s@\${gcc_ver}@\\\${gcc_ver}@g"\` + + # Dependencies to place before the objects being linked to create a + # shared library. +@@ -4561,7 +4564,7 @@ + + # The library search path used internally by the compiler when linking + # a shared library. +-compiler_lib_search_path=$lt_[]_LT_AC_TAGVAR(compiler_lib_search_path, $1) ++compiler_lib_search_path=\`echo $lt_[]_LT_AC_TAGVAR(compiler_lib_search_path, $1) | \$SED -e "s@\${gcc_dir}@\\\${gcc_dir}@g;s@\${gcc_ver}@\\\${gcc_ver}@g"\` + + # Method to check whether dependent libraries are shared objects. + deplibs_check_method=$lt_deplibs_check_method +@@ -4641,7 +4644,7 @@ + link_all_deplibs=$_LT_AC_TAGVAR(link_all_deplibs, $1) + + # Compile-time system search path for libraries +-sys_lib_search_path_spec=$lt_sys_lib_search_path_spec ++sys_lib_search_path_spec=\`echo $lt_sys_lib_search_path_spec | \$SED -e "s@\${gcc_dir}@\\\${gcc_dir}@g;s@\${gcc_ver}@\\\${gcc_ver}@g"\` + + # Run-time system search path for libraries + sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec +diff -r 23e5576aa59a client/Makefile.in +--- a/client/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/client/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -247,6 +247,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @CLIENT_LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a cmd-line-utils/Makefile.in +--- a/cmd-line-utils/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/cmd-line-utils/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -157,6 +157,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a cmd-line-utils/libedit/Makefile.in +--- a/cmd-line-utils/libedit/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/cmd-line-utils/libedit/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -175,6 +175,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a cmd-line-utils/readline/Makefile.in +--- a/cmd-line-utils/readline/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/cmd-line-utils/readline/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -173,6 +173,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a configure +--- a/configure Tue Feb 17 22:33:00 2009 -0800 ++++ b/configure Tue Feb 17 22:33:23 2009 -0800 +@@ -39583,7 +39583,91 @@ + # We also disable for SCO for the time being, the headers for the + # thread library we use conflicts with other headers. + ;; +- *) ++*) ++ # most systems require the program be linked with librt library to use ++ # the function clock_gettime ++ my_save_LIBS="$LIBS" ++ LIBS="" ++ ++echo "$as_me:$LINENO: checking for clock_gettime in -lrt" >&5 ++echo $ECHO_N "checking for clock_gettime in -lrt... $ECHO_C" >&6 ++if test "${ac_cv_lib_rt_clock_gettime+set}" = set; then ++ echo $ECHO_N "(cached) $ECHO_C" >&6 ++else ++ ac_check_lib_save_LIBS=$LIBS ++LIBS="-lrt $LIBS" ++cat >conftest.$ac_ext <<_ACEOF ++/* confdefs.h. */ ++_ACEOF ++cat confdefs.h >>conftest.$ac_ext ++cat >>conftest.$ac_ext <<_ACEOF ++/* end confdefs.h. */ ++ ++/* Override any gcc2 internal prototype to avoid an error. */ ++#ifdef __cplusplus ++extern "C" ++#endif ++/* We use char because int might match the return type of a gcc2 ++ builtin and then its argument prototype would still apply. */ ++char clock_gettime (); ++int ++main () ++{ ++clock_gettime (); ++ ; ++ return 0; ++} ++_ACEOF ++rm -f conftest.$ac_objext conftest$ac_exeext ++if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 ++ (eval $ac_link) 2>conftest.er1 ++ ac_status=$? ++ grep -v '^ *+' conftest.er1 >conftest.err ++ rm -f conftest.er1 ++ cat conftest.err >&5 ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); } && ++ { ac_try='test -z "$ac_c_werror_flag" ++ || test ! -s conftest.err' ++ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); }; } && ++ { ac_try='test -s conftest$ac_exeext' ++ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); }; }; then ++ ac_cv_lib_rt_clock_gettime=yes ++else ++ echo "$as_me: failed program was:" >&5 ++sed 's/^/| /' conftest.$ac_ext >&5 ++ ++ac_cv_lib_rt_clock_gettime=no ++fi ++rm -f conftest.err conftest.$ac_objext \ ++ conftest$ac_exeext conftest.$ac_ext ++LIBS=$ac_check_lib_save_LIBS ++fi ++echo "$as_me:$LINENO: result: $ac_cv_lib_rt_clock_gettime" >&5 ++echo "${ECHO_T}$ac_cv_lib_rt_clock_gettime" >&6 ++if test $ac_cv_lib_rt_clock_gettime = yes; then ++ cat >>confdefs.h <<_ACEOF ++#define HAVE_LIBRT 1 ++_ACEOF ++ ++ LIBS="-lrt $LIBS" ++ ++fi ++ ++ LIBRT=$LIBS ++ LIBS="$my_save_LIBS" ++ ++ ++ LIBS="$LIBS $LIBRT" ++ + for ac_func in clock_gettime + do + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +@@ -42956,7 +43040,7 @@ + + fi + +-CLIENT_LIBS="$NON_THREADED_LIBS $openssl_libs $ZLIB_LIBS $STATIC_NSS_FLAGS" ++CLIENT_LIBS="$NON_THREADED_LIBS $openssl_libs $ZLIB_LIBS $STATIC_NSS_FLAGS $LIBRT" + + + +diff -r 23e5576aa59a configure.in +--- a/configure.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/configure.in Tue Feb 17 22:33:23 2009 -0800 +@@ -2098,7 +2098,18 @@ + # We also disable for SCO for the time being, the headers for the + # thread library we use conflicts with other headers. + ;; +- *) AC_CHECK_FUNCS(clock_gettime) ++*) ++ # most systems require the program be linked with librt library to use ++ # the function clock_gettime ++ my_save_LIBS="$LIBS" ++ LIBS="" ++ AC_CHECK_LIB(rt,clock_gettime) ++ LIBRT=$LIBS ++ LIBS="$my_save_LIBS" ++ AC_SUBST(LIBRT) ++ ++ LIBS="$LIBS $LIBRT" ++ AC_CHECK_FUNCS(clock_gettime) + ;; + esac + +@@ -2713,7 +2724,7 @@ + AC_DEFINE([THREAD_SAFE_CLIENT], [1], [Should be client be thread safe]) + fi + +-CLIENT_LIBS="$NON_THREADED_LIBS $openssl_libs $ZLIB_LIBS $STATIC_NSS_FLAGS" ++CLIENT_LIBS="$NON_THREADED_LIBS $openssl_libs $ZLIB_LIBS $STATIC_NSS_FLAGS $LIBRT" + + AC_SUBST(CLIENT_LIBS) + AC_SUBST(NON_THREADED_LIBS) +diff -r 23e5576aa59a dbug/Makefile.in +--- a/dbug/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/dbug/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -192,6 +192,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a extra/Makefile.in +--- a/extra/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/extra/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -240,6 +240,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a extra/yassl/Makefile.in +--- a/extra/yassl/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/extra/yassl/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -142,6 +142,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a extra/yassl/src/Makefile.in +--- a/extra/yassl/src/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/extra/yassl/src/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -151,6 +151,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a extra/yassl/taocrypt/Makefile.in +--- a/extra/yassl/taocrypt/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/extra/yassl/taocrypt/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -142,6 +142,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a extra/yassl/taocrypt/benchmark/Makefile.in +--- a/extra/yassl/taocrypt/benchmark/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/extra/yassl/taocrypt/benchmark/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -153,6 +153,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a extra/yassl/taocrypt/src/Makefile.in +--- a/extra/yassl/taocrypt/src/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/extra/yassl/taocrypt/src/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -164,6 +164,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a extra/yassl/taocrypt/test/Makefile.in +--- a/extra/yassl/taocrypt/test/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/extra/yassl/taocrypt/test/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -153,6 +153,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a extra/yassl/testsuite/Makefile.in +--- a/extra/yassl/testsuite/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/extra/yassl/testsuite/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -156,6 +156,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a heap/Makefile.in +--- a/heap/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/heap/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -202,6 +202,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a include/Makefile.in +--- a/include/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/include/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -160,6 +160,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a include/mysql_com.h +--- a/include/mysql_com.h Tue Feb 17 22:33:00 2009 -0800 ++++ b/include/mysql_com.h Tue Feb 17 22:33:23 2009 -0800 +@@ -106,6 +106,11 @@ + thread */ + #define REFRESH_MASTER 128 /* Remove all bin logs in the index + and truncate the index */ ++#define REFRESH_TABLE_STATS 256 /* Refresh table stats hash table */ ++#define REFRESH_INDEX_STATS 512 /* Refresh index stats hash table */ ++#define REFRESH_USER_STATS 1024 /* Refresh user stats hash table */ ++#define REFRESH_SLOW_QUERY_LOG 4096 /* Flush slow query log and rotate*/ ++#define REFRESH_CLIENT_STATS 8192 /* Refresh client stats hash table */ + + /* The following can't be set with mysql_refresh() */ + #define REFRESH_READ_LOCK 16384 /* Lock tables for read */ +diff -r 23e5576aa59a libmysql/Makefile.in +--- a/libmysql/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/libmysql/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -224,6 +224,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @CLIENT_LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a libmysql_r/Makefile.in +--- a/libmysql_r/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/libmysql_r/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -221,6 +221,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ @ZLIB_LIBS@ @openssl_libs@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a libmysqld/Makefile.in +--- a/libmysqld/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/libmysqld/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -246,6 +246,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a libmysqld/examples/Makefile.in +--- a/libmysqld/examples/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/libmysqld/examples/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -192,6 +192,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ @WRAPLIBS@ @CLIENT_LIBS@ $(yassl_libs) + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a man/Makefile.in +--- a/man/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/man/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -151,6 +151,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a myisam/Makefile.in +--- a/myisam/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/myisam/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -235,6 +235,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a myisammrg/Makefile.in +--- a/myisammrg/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/myisammrg/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -183,6 +183,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a mysql-test/Makefile.in +--- a/mysql-test/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/mysql-test/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -161,6 +161,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a mysql-test/ndb/Makefile.in +--- a/mysql-test/ndb/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/mysql-test/ndb/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -147,6 +147,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Tue Feb 17 22:33:00 2009 -0800 ++++ b/mysql-test/r/information_schema.result Tue Feb 17 22:33:23 2009 -0800 +@@ -37,10 +37,12 @@ + select * from v1; + c + CHARACTER_SETS ++CLIENT_STATISTICS + COLLATIONS + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INDEX_STATISTICS + KEY_COLUMN_USAGE + PROFILING + ROUTINES +@@ -50,8 +52,10 @@ + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + USER_PRIVILEGES ++USER_STATISTICS + VIEWS + columns_priv + db +@@ -83,6 +87,7 @@ + TABLES TABLES + TABLE_CONSTRAINTS TABLE_CONSTRAINTS + TABLE_PRIVILEGES TABLE_PRIVILEGES ++TABLE_STATISTICS TABLE_STATISTICS + TRIGGERS TRIGGERS + tables_priv tables_priv + time_zone time_zone +@@ -102,6 +107,7 @@ + TABLES TABLES + TABLE_CONSTRAINTS TABLE_CONSTRAINTS + TABLE_PRIVILEGES TABLE_PRIVILEGES ++TABLE_STATISTICS TABLE_STATISTICS + TRIGGERS TRIGGERS + tables_priv tables_priv + time_zone time_zone +@@ -121,6 +127,7 @@ + TABLES TABLES + TABLE_CONSTRAINTS TABLE_CONSTRAINTS + TABLE_PRIVILEGES TABLE_PRIVILEGES ++TABLE_STATISTICS TABLE_STATISTICS + TRIGGERS TRIGGERS + tables_priv tables_priv + time_zone time_zone +@@ -594,12 +601,13 @@ + where table_schema='information_schema' limit 2; + TABLE_NAME TABLE_TYPE ENGINE + CHARACTER_SETS SYSTEM VIEW MEMORY +-COLLATIONS SYSTEM VIEW MEMORY ++CLIENT_STATISTICS SYSTEM VIEW MEMORY + show tables from information_schema like "T%"; + Tables_in_information_schema (T%) + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + create database information_schema; + ERROR 42000: Access denied for user 'root'@'localhost' to database 'information_schema' +@@ -609,6 +617,7 @@ + TABLES SYSTEM VIEW + TABLE_CONSTRAINTS SYSTEM VIEW + TABLE_PRIVILEGES SYSTEM VIEW ++TABLE_STATISTICS SYSTEM VIEW + TRIGGERS SYSTEM VIEW + create table t1(a int); + ERROR 42S02: Unknown table 't1' in information_schema +@@ -621,6 +630,7 @@ + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + select table_name from tables where table_name='user'; + table_name +@@ -730,7 +740,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-102 ++106 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -790,18 +800,20 @@ + TABLE_NAME COLUMN_NAME PRIVILEGES + COLUMNS TABLE_NAME select + COLUMN_PRIVILEGES TABLE_NAME select ++INDEX_STATISTICS TABLE_NAME select + KEY_COLUMN_USAGE TABLE_NAME select + STATISTICS TABLE_NAME select + TABLES TABLE_NAME select + TABLE_CONSTRAINTS TABLE_NAME select + TABLE_PRIVILEGES TABLE_NAME select ++TABLE_STATISTICS TABLE_NAME select + VIEWS TABLE_NAME select + delete from mysql.user where user='mysqltest_4'; + delete from mysql.db where user='mysqltest_4'; + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 17 ++information_schema 21 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1187,10 +1199,12 @@ + ); + table_name column_name + CHARACTER_SETS CHARACTER_SET_NAME ++CLIENT_STATISTICS CLIENT + COLLATIONS COLLATION_NAME + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA +@@ -1200,8 +1214,10 @@ + TABLES TABLE_SCHEMA + TABLE_CONSTRAINTS CONSTRAINT_SCHEMA + TABLE_PRIVILEGES TABLE_SCHEMA ++TABLE_STATISTICS TABLE_SCHEMA + TRIGGERS TRIGGER_SCHEMA + USER_PRIVILEGES GRANTEE ++USER_STATISTICS USER + VIEWS TABLE_SCHEMA + SELECT t.table_name, c1.column_name + FROM information_schema.tables t +@@ -1219,10 +1235,12 @@ + ); + table_name column_name + CHARACTER_SETS CHARACTER_SET_NAME ++CLIENT_STATISTICS CLIENT + COLLATIONS COLLATION_NAME + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA +@@ -1232,8 +1250,10 @@ + TABLES TABLE_SCHEMA + TABLE_CONSTRAINTS CONSTRAINT_SCHEMA + TABLE_PRIVILEGES TABLE_SCHEMA ++TABLE_STATISTICS TABLE_SCHEMA + TRIGGERS TRIGGER_SCHEMA + USER_PRIVILEGES GRANTEE ++USER_STATISTICS USER + VIEWS TABLE_SCHEMA + SELECT MAX(table_name) FROM information_schema.tables; + MAX(table_name) +@@ -1302,10 +1322,12 @@ + group by t.table_name order by num1, t.table_name; + table_name group_concat(t.table_schema, '.', t.table_name) num1 + CHARACTER_SETS information_schema.CHARACTER_SETS 1 ++CLIENT_STATISTICS information_schema.CLIENT_STATISTICS 1 + COLLATIONS information_schema.COLLATIONS 1 + COLLATION_CHARACTER_SET_APPLICABILITY information_schema.COLLATION_CHARACTER_SET_APPLICABILITY 1 + COLUMNS information_schema.COLUMNS 1 + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 ++INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 + PROFILING information_schema.PROFILING 1 + ROUTINES information_schema.ROUTINES 1 +@@ -1315,8 +1337,10 @@ + TABLES information_schema.TABLES 1 + TABLE_CONSTRAINTS information_schema.TABLE_CONSTRAINTS 1 + TABLE_PRIVILEGES information_schema.TABLE_PRIVILEGES 1 ++TABLE_STATISTICS information_schema.TABLE_STATISTICS 1 + TRIGGERS information_schema.TRIGGERS 1 + USER_PRIVILEGES information_schema.USER_PRIVILEGES 1 ++USER_STATISTICS information_schema.USER_STATISTICS 1 + VIEWS information_schema.VIEWS 1 + show global status like "Uptime_%"; + Variable_name Value +diff -r 23e5576aa59a mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Tue Feb 17 22:33:00 2009 -0800 ++++ b/mysql-test/r/information_schema_db.result Tue Feb 17 22:33:23 2009 -0800 +@@ -6,10 +6,12 @@ + show tables; + Tables_in_information_schema + CHARACTER_SETS ++CLIENT_STATISTICS + COLLATIONS + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INDEX_STATISTICS + KEY_COLUMN_USAGE + PROFILING + ROUTINES +@@ -19,14 +21,17 @@ + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + USER_PRIVILEGES ++USER_STATISTICS + VIEWS + show tables from INFORMATION_SCHEMA like 'T%'; + Tables_in_information_schema (T%) + TABLES + TABLE_CONSTRAINTS + TABLE_PRIVILEGES ++TABLE_STATISTICS + TRIGGERS + create database `inf%`; + create database mbase; +diff -r 23e5576aa59a mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Tue Feb 17 22:33:00 2009 -0800 ++++ b/mysql-test/r/mysqlshow.result Tue Feb 17 22:33:23 2009 -0800 +@@ -80,10 +80,12 @@ + | Tables | + +---------------------------------------+ + | CHARACTER_SETS | ++| CLIENT_STATISTICS | + | COLLATIONS | + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROFILING | + | ROUTINES | +@@ -93,8 +95,10 @@ + | TABLES | + | TABLE_CONSTRAINTS | + | TABLE_PRIVILEGES | ++| TABLE_STATISTICS | + | TRIGGERS | + | USER_PRIVILEGES | ++| USER_STATISTICS | + | VIEWS | + +---------------------------------------+ + Database: INFORMATION_SCHEMA +@@ -102,10 +106,12 @@ + | Tables | + +---------------------------------------+ + | CHARACTER_SETS | ++| CLIENT_STATISTICS | + | COLLATIONS | + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROFILING | + | ROUTINES | +@@ -115,8 +121,10 @@ + | TABLES | + | TABLE_CONSTRAINTS | + | TABLE_PRIVILEGES | ++| TABLE_STATISTICS | + | TRIGGERS | + | USER_PRIVILEGES | ++| USER_STATISTICS | + | VIEWS | + +---------------------------------------+ + Wildcard: inf_rmation_schema +diff -r 23e5576aa59a mysys/Makefile.in +--- a/mysys/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/mysys/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -228,6 +228,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/Makefile.in +--- a/ndb/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -171,6 +171,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/docs/Makefile.in +--- a/ndb/docs/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/docs/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -149,6 +149,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/include/Makefile.in +--- a/ndb/include/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/include/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -179,6 +179,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/Makefile.in +--- a/ndb/src/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -204,6 +204,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/common/Makefile.in +--- a/ndb/src/common/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/common/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -174,6 +174,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/common/debugger/Makefile.in +--- a/ndb/src/common/debugger/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/common/debugger/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -206,6 +206,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/common/debugger/signaldata/Makefile.in +--- a/ndb/src/common/debugger/signaldata/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/common/debugger/signaldata/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -211,6 +211,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/common/logger/Makefile.in +--- a/ndb/src/common/logger/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/common/logger/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -197,6 +197,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/common/mgmcommon/Makefile.in +--- a/ndb/src/common/mgmcommon/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/common/mgmcommon/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -211,6 +211,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/common/portlib/Makefile.in +--- a/ndb/src/common/portlib/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/common/portlib/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -222,6 +222,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/common/transporter/Makefile.in +--- a/ndb/src/common/transporter/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/common/transporter/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -197,6 +197,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/common/util/Makefile.in +--- a/ndb/src/common/util/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/common/util/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -217,6 +217,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/cw/Makefile.in +--- a/ndb/src/cw/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/cw/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -156,6 +156,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/cw/cpcd/Makefile.in +--- a/ndb/src/cw/cpcd/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/cw/cpcd/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -207,6 +207,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/Makefile.in +--- a/ndb/src/kernel/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -227,6 +227,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/Makefile.in +--- a/ndb/src/kernel/blocks/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -156,6 +156,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/backup/Makefile.in +--- a/ndb/src/kernel/blocks/backup/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/backup/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/cmvmi/Makefile.in +--- a/ndb/src/kernel/blocks/cmvmi/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/cmvmi/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/dbacc/Makefile.in +--- a/ndb/src/kernel/blocks/dbacc/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/dbacc/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/dbdict/Makefile.in +--- a/ndb/src/kernel/blocks/dbdict/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/dbdict/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -206,6 +206,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/dbdih/Makefile.in +--- a/ndb/src/kernel/blocks/dbdih/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/dbdih/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -203,6 +203,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/dblqh/Makefile.in +--- a/ndb/src/kernel/blocks/dblqh/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/dblqh/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -204,6 +204,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/dbtc/Makefile.in +--- a/ndb/src/kernel/blocks/dbtc/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/dbtc/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/dbtup/Makefile.in +--- a/ndb/src/kernel/blocks/dbtup/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/dbtup/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -204,6 +204,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/dbtux/Makefile.in +--- a/ndb/src/kernel/blocks/dbtux/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/dbtux/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -199,6 +199,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/dbutil/Makefile.in +--- a/ndb/src/kernel/blocks/dbutil/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/dbutil/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/ndbcntr/Makefile.in +--- a/ndb/src/kernel/blocks/ndbcntr/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/ndbcntr/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -197,6 +197,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/ndbfs/Makefile.in +--- a/ndb/src/kernel/blocks/ndbfs/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/ndbfs/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -197,6 +197,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/qmgr/Makefile.in +--- a/ndb/src/kernel/blocks/qmgr/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/qmgr/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/suma/Makefile.in +--- a/ndb/src/kernel/blocks/suma/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/suma/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/blocks/trix/Makefile.in +--- a/ndb/src/kernel/blocks/trix/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/blocks/trix/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/error/Makefile.in +--- a/ndb/src/kernel/error/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/error/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -206,6 +206,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/kernel/vm/Makefile.in +--- a/ndb/src/kernel/vm/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/kernel/vm/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -207,6 +207,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/mgmapi/Makefile.in +--- a/ndb/src/mgmapi/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/mgmapi/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -205,6 +205,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/mgmclient/Makefile.in +--- a/ndb/src/mgmclient/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/mgmclient/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -216,6 +216,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/mgmsrv/Makefile.in +--- a/ndb/src/mgmsrv/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/mgmsrv/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -213,6 +213,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/src/ndbapi/Makefile.in +--- a/ndb/src/ndbapi/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/src/ndbapi/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -215,6 +215,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/test/Makefile.in +--- a/ndb/test/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/test/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -156,6 +156,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/test/ndbapi/Makefile.in +--- a/ndb/test/ndbapi/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/test/ndbapi/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -595,6 +595,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/test/ndbapi/bank/Makefile.in +--- a/ndb/test/ndbapi/bank/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/test/ndbapi/bank/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -282,6 +282,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/test/run-test/Makefile.in +--- a/ndb/test/run-test/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/test/run-test/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -243,6 +243,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/test/src/Makefile.in +--- a/ndb/test/src/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/test/src/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -213,6 +213,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/test/tools/Makefile.in +--- a/ndb/test/tools/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/test/tools/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -325,6 +325,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a ndb/tools/Makefile.in +--- a/ndb/tools/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/ndb/tools/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -344,6 +344,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a netware/Makefile.in +--- a/netware/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/netware/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -199,6 +199,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a os2/Makefile.in +--- a/os2/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/os2/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -156,6 +156,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a os2/include/Makefile.in +--- a/os2/include/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/os2/include/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -156,6 +156,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a os2/include/sys/Makefile.in +--- a/os2/include/sys/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/os2/include/sys/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -144,6 +144,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a patch_info/userstats.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/userstats.info Tue Feb 17 22:33:23 2009 -0800 +@@ -0,0 +1,14 @@ ++File=userstatsv2.patch ++Name=SHOW USER/TABLE/INDEX statistics ++Version=V2 ++Author=Google ++License=GPL ++Comment=Added INFORMATION_SCHEMA.*_STATISTICS ++2008-12-01 ++YK: fix behavior for prepared statements ++ ++2008-11-26 ++YK: add switch variable "userstat_running" to control INFORMATION_SCHEMA.*_STATISTICS (default:OFF) ++ ++2008-12-09 ++YK: fixed "Row_sent: 0" problem at microslow_innodb.patch +diff -r 23e5576aa59a pstack/Makefile.in +--- a/pstack/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/pstack/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -196,6 +196,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a pstack/aout/Makefile.in +--- a/pstack/aout/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/pstack/aout/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -134,6 +134,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a regex/Makefile.in +--- a/regex/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/regex/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -180,6 +180,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a scripts/Makefile.in +--- a/scripts/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/scripts/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -176,6 +176,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a server-tools/Makefile.in +--- a/server-tools/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/server-tools/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -155,6 +155,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a server-tools/instance-manager/Makefile.in +--- a/server-tools/instance-manager/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/server-tools/instance-manager/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -205,6 +205,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a sql/Makefile.in +--- a/sql/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -274,6 +274,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/ha_innodb.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -3324,6 +3324,8 @@ + + error = row_insert_for_mysql((byte*) record, prebuilt); + ++ if (error == DB_SUCCESS) rows_changed++; ++ + if (error == DB_SUCCESS && auto_inc_used) { + + /* Fetch the value that was set in the autoincrement field */ +@@ -3596,6 +3598,8 @@ + } + } + ++ if (error == DB_SUCCESS) rows_changed++; ++ + innodb_srv_conc_exit_innodb(prebuilt->trx); + + error = convert_error_code_to_mysql(error, user_thd); +@@ -3643,6 +3647,8 @@ + innodb_srv_conc_enter_innodb(prebuilt->trx); + + error = row_update_for_mysql((byte*) record, prebuilt); ++ ++ if (error == DB_SUCCESS) rows_changed++; + + innodb_srv_conc_exit_innodb(prebuilt->trx); + +@@ -4076,6 +4082,9 @@ + if (ret == DB_SUCCESS) { + error = 0; + table->status = 0; ++ rows_read++; ++ if (active_index >= 0 && active_index < MAX_KEY) ++ index_rows_read[active_index]++; + + } else if (ret == DB_RECORD_NOT_FOUND) { + error = HA_ERR_END_OF_FILE; +diff -r 23e5576aa59a sql/ha_myisam.cc +--- a/sql/ha_myisam.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/ha_myisam.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -670,7 +670,9 @@ + if ((error= update_auto_increment())) + return error; + } +- return mi_write(file,buf); ++ int error=mi_write(file,buf); ++ if (!error) rows_changed++; ++ return error; + } + + int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) +@@ -1518,13 +1520,17 @@ + statistic_increment(table->in_use->status_var.ha_update_count,&LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); +- return mi_update(file,old_data,new_data); ++ int error=mi_update(file,old_data,new_data); ++ if (!error) rows_changed++; ++ return error; + } + + int ha_myisam::delete_row(const byte * buf) + { + statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); +- return mi_delete(file,buf); ++ int error=mi_delete(file,buf); ++ if (!error) rows_changed++; ++ return error; + } + + int ha_myisam::index_read(byte * buf, const byte * key, +@@ -1535,6 +1541,13 @@ + &LOCK_status); + int error=mi_rkey(file,buf,active_index, key, key_len, find_flag); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1545,6 +1558,13 @@ + &LOCK_status); + int error=mi_rkey(file,buf,index, key, key_len, find_flag); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1555,6 +1575,13 @@ + &LOCK_status); + int error=mi_rkey(file,buf,active_index, key, key_len, HA_READ_PREFIX_LAST); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1565,6 +1592,13 @@ + &LOCK_status); + int error=mi_rnext(file,buf,active_index); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1575,6 +1609,13 @@ + &LOCK_status); + int error=mi_rprev(file,buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1585,6 +1626,13 @@ + &LOCK_status); + int error=mi_rfirst(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1595,6 +1643,13 @@ + &LOCK_status); + int error=mi_rlast(file, buf, active_index); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1611,6 +1666,13 @@ + error= mi_rnext_same(file,buf); + } while (error == HA_ERR_RECORD_DELETED); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) { ++ rows_read++; ++ ++ int inx = (active_index == -1) ? file->lastinx : active_index; ++ if (inx >= 0 && inx < MAX_KEY) ++ index_rows_read[inx]++; ++ } + return error; + } + +@@ -1628,6 +1690,7 @@ + &LOCK_status); + int error=mi_scan(file, buf); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) rows_read++; + return error; + } + +@@ -1642,6 +1705,7 @@ + &LOCK_status); + int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length)); + table->status=error ? STATUS_NOT_FOUND: 0; ++ if (!error) rows_read++; + return error; + } + +diff -r 23e5576aa59a sql/handler.cc +--- a/sql/handler.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/handler.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -725,6 +725,8 @@ + if (cookie) + tc_log->unlog(cookie, xid); + DBUG_EXECUTE_IF("crash_commit_after", abort();); ++ if (is_real_trans) ++ thd->diff_commit_trans++; + end: + if (is_real_trans) + start_waiting_global_read_lock(thd); +@@ -829,6 +831,7 @@ + thd->transaction.cleanup(); + } + } ++ thd->diff_rollback_trans++; + #endif /* USING_TRANSACTIONS */ + if (all) + thd->transaction_rollback_request= FALSE; +@@ -1212,6 +1215,7 @@ + statistic_increment(thd->status_var.ha_rollback_count,&LOCK_status); + *ht=0; // keep it conveniently zero-filled + } ++ thd->diff_rollback_trans++; + DBUG_RETURN(error); + } + +@@ -1442,6 +1446,8 @@ + else + dupp_ref=ref+ALIGN_SIZE(ref_length); + } ++ rows_read = rows_changed = 0; ++ memset(index_rows_read, 0, sizeof(index_rows_read)); + DBUG_RETURN(error); + } + +@@ -2276,6 +2282,111 @@ + return error; + } + ++// Updates the global table stats with the TABLE this handler represents. ++void handler::update_global_table_stats() { ++ if (!opt_userstat_running) { ++ rows_read = rows_changed = 0; ++ return; ++ } ++ ++ if (!rows_read && !rows_changed) return; // Nothing to update. ++ // table_cache_key is db_name + '\0' + table_name + '\0'. ++ if (!table->s || !table->s->table_cache_key || !table->s->table_name) return; ++ ++ TABLE_STATS* table_stats; ++ char key[NAME_LEN * 2 + 2]; ++ // [db] + '.' + [table] ++ sprintf(key, "%s.%s", table->s->table_cache_key, table->s->table_name); ++ ++ pthread_mutex_lock(&LOCK_global_table_stats); ++ // Gets the global table stats, creating one if necessary. ++ if (!(table_stats = (TABLE_STATS*)hash_search(&global_table_stats, ++ (byte*)key, ++ strlen(key)))) { ++ if (!(table_stats = ((TABLE_STATS*) ++ my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL))))) { ++ // Out of memory. ++ sql_print_error("Allocating table stats failed."); ++ goto end; ++ } ++ strncpy(table_stats->table, key, sizeof(table_stats->table)); ++ table_stats->rows_read = 0; ++ table_stats->rows_changed = 0; ++ table_stats->rows_changed_x_indexes = 0; ++ table_stats->engine_type = (int) ht->db_type; ++ ++ if (my_hash_insert(&global_table_stats, (byte*)table_stats)) { ++ // Out of memory. ++ sql_print_error("Inserting table stats failed."); ++ my_free((char*)table_stats, 0); ++ goto end; ++ } ++ } ++ // Updates the global table stats. ++ table_stats->rows_read += rows_read; ++ table_stats->rows_changed += rows_changed; ++ table_stats->rows_changed_x_indexes += ++ rows_changed * (table->s->keys ? table->s->keys : 1); ++ current_thd->diff_total_read_rows += rows_read; ++ rows_read = rows_changed = 0; ++end: ++ pthread_mutex_unlock(&LOCK_global_table_stats); ++} ++ ++// Updates the global index stats with this handler's accumulated index reads. ++void handler::update_global_index_stats() { ++ // table_cache_key is db_name + '\0' + table_name + '\0'. ++ if (!table->s || !table->s->table_cache_key || !table->s->table_name) return; ++ ++ if (!opt_userstat_running) { ++ for (int x = 0; x < table->s->keys; x++) { ++ index_rows_read[x] = 0; ++ } ++ return; ++ } ++ ++ for (int x = 0; x < table->s->keys; x++) { ++ if (index_rows_read[x]) { ++ // Rows were read using this index. ++ KEY* key_info = &table->key_info[x]; ++ ++ if (!key_info->name) continue; ++ ++ INDEX_STATS* index_stats; ++ char key[NAME_LEN * 3 + 3]; ++ // [db] + '.' + [table] + '.' + [index] ++ sprintf(key, "%s.%s.%s", table->s->table_cache_key, ++ table->s->table_name, key_info->name); ++ ++ pthread_mutex_lock(&LOCK_global_index_stats); ++ // Gets the global index stats, creating one if necessary. ++ if (!(index_stats = (INDEX_STATS*)hash_search(&global_index_stats, ++ (byte*)key, ++ strlen(key)))) { ++ if (!(index_stats = ((INDEX_STATS*) ++ my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL))))) { ++ // Out of memory. ++ sql_print_error("Allocating index stats failed."); ++ goto end; ++ } ++ strncpy(index_stats->index, key, sizeof(index_stats->index)); ++ index_stats->rows_read = 0; ++ ++ if (my_hash_insert(&global_index_stats, (byte*)index_stats)) { ++ // Out of memory. ++ sql_print_error("Inserting index stats failed."); ++ my_free((char*)index_stats, 0); ++ goto end; ++ } ++ } ++ // Updates the global index stats. ++ index_stats->rows_read += index_rows_read[x]; ++ index_rows_read[x] = 0; ++end: ++ pthread_mutex_unlock(&LOCK_global_index_stats); ++ } ++ } ++} + + /**************************************************************************** + ** Some general functions that isn't in the handler class +diff -r 23e5576aa59a sql/handler.h +--- a/sql/handler.h Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/handler.h Tue Feb 17 22:33:23 2009 -0800 +@@ -30,6 +30,10 @@ + #if defined(HAVE_BERKELEY_DB) || defined(HAVE_INNOBASE_DB) || \ + defined(HAVE_NDBCLUSTER_DB) + #define USING_TRANSACTIONS ++#endif ++ ++#if MAX_KEY > 128 ++#error MAX_KEY is too large. Values up to 128 are supported. + #endif + + // the following is for checking tables +@@ -604,6 +608,9 @@ + bool auto_increment_column_changed; + bool implicit_emptied; /* Can be !=0 only if HEAP */ + const COND *pushed_cond; ++ ulonglong rows_read; ++ ulonglong rows_changed; ++ ulonglong index_rows_read[MAX_KEY]; + + handler(const handlerton *ht_arg, TABLE *table_arg) :table(table_arg), + ht(ht_arg), +@@ -615,8 +622,10 @@ + ref_length(sizeof(my_off_t)), block_size(0), + raid_type(0), ft_handler(0), inited(NONE), + locked(FALSE), implicit_emptied(0), +- pushed_cond(NULL) +- {} ++ pushed_cond(NULL), rows_read(0), rows_changed(0) ++ { ++ memset(index_rows_read, 0, sizeof(index_rows_read)); ++ } + virtual ~handler(void) { DBUG_ASSERT(locked == FALSE); /* TODO: DBUG_ASSERT(inited == NONE); */ } + virtual handler *clone(MEM_ROOT *mem_root); + int ha_open(const char *name, int mode, int test_if_locked); +@@ -625,7 +634,11 @@ + virtual void print_error(int error, myf errflag); + virtual bool get_error_message(int error, String *buf); + uint get_dup_key(int error); +- void change_table_ptr(TABLE *table_arg) { table=table_arg; } ++ void change_table_ptr(TABLE *table_arg) { ++ table=table_arg; ++ rows_read = rows_changed = 0; ++ memset(index_rows_read, 0, sizeof(index_rows_read)); ++ } + virtual double scan_time() + { return ulonglong2double(data_file_length) / IO_SIZE + 2; } + virtual double read_time(uint index, uint ranges, ha_rows rows) +@@ -886,6 +899,9 @@ + virtual bool is_crashed() const { return 0; } + virtual bool auto_repair() const { return 0; } + ++ void update_global_table_stats(); ++ void update_global_index_stats(); ++ + /* + default rename_table() and delete_table() rename/delete files with a + given name and extensions from bas_ext() +diff -r 23e5576aa59a sql/lex.h +--- a/sql/lex.h Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/lex.h Tue Feb 17 22:33:23 2009 -0800 +@@ -109,6 +109,7 @@ + { "CHECKSUM", SYM(CHECKSUM_SYM)}, + { "CIPHER", SYM(CIPHER_SYM)}, + { "CLIENT", SYM(CLIENT_SYM)}, ++ { "CLIENT_STATISTICS", SYM(CLIENT_STATS_SYM)}, + { "CLOSE", SYM(CLOSE_SYM)}, + { "CODE", SYM(CODE_SYM)}, + { "COLLATE", SYM(COLLATE_SYM)}, +@@ -238,6 +239,7 @@ + { "IN", SYM(IN_SYM)}, + { "INDEX", SYM(INDEX_SYM)}, + { "INDEXES", SYM(INDEXES)}, ++ { "INDEX_STATISTICS", SYM(INDEX_STATS_SYM)}, + { "INFILE", SYM(INFILE)}, + { "INNER", SYM(INNER_SYM)}, + { "INNOBASE", SYM(INNOBASE_SYM)}, +@@ -443,6 +445,7 @@ + { "SIGNED", SYM(SIGNED_SYM)}, + { "SIMPLE", SYM(SIMPLE_SYM)}, + { "SLAVE", SYM(SLAVE)}, ++ { "SLOW", SYM(SLOW_SYM)}, + { "SNAPSHOT", SYM(SNAPSHOT_SYM)}, + { "SMALLINT", SYM(SMALLINT)}, + { "SOME", SYM(ANY_SYM)}, +@@ -488,6 +491,7 @@ + { "TABLE", SYM(TABLE_SYM)}, + { "TABLES", SYM(TABLES)}, + { "TABLESPACE", SYM(TABLESPACE)}, ++ { "TABLE_STATISTICS", SYM(TABLE_STATS_SYM)}, + { "TEMPORARY", SYM(TEMPORARY)}, + { "TEMPTABLE", SYM(TEMPTABLE_SYM)}, + { "TERMINATED", SYM(TERMINATED)}, +@@ -525,6 +529,7 @@ + { "USE", SYM(USE_SYM)}, + { "USER", SYM(USER)}, + { "USER_RESOURCES", SYM(RESOURCES)}, ++ { "USER_STATISTICS", SYM(USER_STATS_SYM)}, + { "USE_FRM", SYM(USE_FRM)}, + { "USING", SYM(USING)}, + { "UTC_DATE", SYM(UTC_DATE_SYM)}, +diff -r 23e5576aa59a sql/log.cc +--- a/sql/log.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/log.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -1954,18 +1954,24 @@ + thd->current_insert_id); + if (e.write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += e.data_written; + } + if (thd->insert_id_used) + { + Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id); + if (e.write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += e.data_written; + } + if (thd->rand_used) + { + Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2); + if (e.write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += e.data_written; + } + if (thd->user_var_events.elements) + { +@@ -1981,6 +1987,8 @@ + user_var_event->charset_number); + if (e.write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += e.data_written; + } + } + } +@@ -1991,6 +1999,8 @@ + + if (event_info->write(file)) + goto err; ++ if (file == &log_file) ++ thd->binlog_bytes_written += event_info->data_written; + + if (file == &log_file) // we are writing to the real log (disk) + { +@@ -2112,6 +2122,7 @@ + */ + if (qinfo.write(&log_file)) + goto err; ++ thd->binlog_bytes_written += qinfo.data_written; + + /* Read from the file used to cache the queries .*/ + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) +@@ -2158,6 +2169,7 @@ + /* write the first half of the split header */ + if (my_b_write(&log_file, header, carry)) + goto err; ++ thd->binlog_bytes_written += carry; + + /* + copy fixed second half of header to cache so the correct +@@ -2226,6 +2238,8 @@ + /* Write data to the binary log file */ + if (my_b_write(&log_file, cache->read_pos, length)) + goto err; ++ thd->binlog_bytes_written += length; ++ + cache->read_pos=cache->read_end; // Mark buffer used up + DBUG_EXECUTE_IF("half_binlogged_transaction", goto DBUG_skip_commit;); + } while ((length=my_b_fill(cache))); +@@ -2234,6 +2248,8 @@ + + if (commit_event->write(&log_file)) + goto err; ++ thd->binlog_bytes_written += commit_event->data_written; ++ + #ifndef DBUG_OFF + DBUG_skip_commit: + #endif +diff -r 23e5576aa59a sql/mysql_priv.h +--- a/sql/mysql_priv.h Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/mysql_priv.h Tue Feb 17 22:33:23 2009 -0800 +@@ -823,7 +823,15 @@ + bool multi_delete_set_locks_and_link_aux_tables(LEX *lex); + void init_max_user_conn(void); + void init_update_queries(void); ++void init_global_user_stats(void); ++void init_global_table_stats(void); ++void init_global_index_stats(void); ++void init_global_client_stats(void); + void free_max_user_conn(void); ++void free_global_user_stats(void); ++void free_global_table_stats(void); ++void free_global_index_stats(void); ++void free_global_client_stats(void); + pthread_handler_t handle_one_connection(void *arg); + pthread_handler_t handle_bootstrap(void *arg); + void end_thread(THD *thd,bool put_in_cache); +@@ -1402,6 +1410,7 @@ + extern ulong max_connections,max_connect_errors, connect_timeout; + extern ulong slave_net_timeout, slave_trans_retries; + extern uint max_user_connections; ++extern ulonglong denied_connections; + extern ulong what_to_log,flush_time; + extern ulong query_buff_size, thread_stack; + extern ulong max_prepared_stmt_count, prepared_stmt_count; +@@ -1432,6 +1441,7 @@ + extern my_bool opt_safe_show_db, opt_local_infile; + extern my_bool opt_slave_compressed_protocol, use_temp_pool; + extern my_bool opt_readonly, lower_case_file_system; ++extern my_bool opt_userstat_running; + extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs; + extern my_bool opt_secure_auth; + extern char* opt_secure_file_priv; +@@ -1479,6 +1489,14 @@ + extern struct system_variables max_system_variables; + extern struct system_status_var global_status_var; + extern struct rand_struct sql_rand; ++extern HASH global_user_stats; ++extern HASH global_client_stats; ++extern pthread_mutex_t LOCK_global_user_client_stats; ++extern HASH global_table_stats; ++extern pthread_mutex_t LOCK_global_table_stats; ++extern HASH global_index_stats; ++extern pthread_mutex_t LOCK_global_index_stats; ++extern pthread_mutex_t LOCK_stats; + + extern const char *opt_date_time_formats[]; + extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[]; +diff -r 23e5576aa59a sql/mysqld.cc +--- a/sql/mysqld.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/mysqld.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -415,6 +415,7 @@ + uint opt_large_page_size= 0; + my_bool opt_old_style_user_limits= 0, trust_function_creators= 0; + char* opt_slow_logname= 0; ++my_bool opt_userstat_running= 0; + /* + True if there is at least one per-hour limit for some user, so we should + check them before each query (and possibly reset counters when hour is +@@ -451,6 +452,7 @@ + ulong binlog_cache_use= 0, binlog_cache_disk_use= 0; + ulong max_connections, max_connect_errors; + uint max_user_connections= 0; ++ulonglong denied_connections = 0; + /* + Limit of the total number of prepared statements in the server. + Is necessary to protect the server against out-of-memory attacks. +@@ -551,6 +553,10 @@ + LOCK_crypt, LOCK_bytes_sent, LOCK_bytes_received, + LOCK_global_system_variables, + LOCK_user_conn, LOCK_slave_list, LOCK_active_mi; ++pthread_mutex_t LOCK_stats; ++pthread_mutex_t LOCK_global_user_client_stats; ++pthread_mutex_t LOCK_global_table_stats; ++pthread_mutex_t LOCK_global_index_stats; + /* + The below lock protects access to two global server variables: + max_prepared_stmt_count and prepared_stmt_count. These variables +@@ -1192,6 +1198,10 @@ + x_free(opt_secure_file_priv); + bitmap_free(&temp_pool); + free_max_user_conn(); ++ free_global_user_stats(); ++ free_global_client_stats(); ++ free_global_table_stats(); ++ free_global_index_stats(); + #ifdef HAVE_REPLICATION + end_slave_list(); + free_list(&replicate_do_db); +@@ -1306,6 +1316,10 @@ + (void) pthread_cond_destroy(&COND_thread_cache); + (void) pthread_cond_destroy(&COND_flush_thread_cache); + (void) pthread_cond_destroy(&COND_manager); ++ (void) pthread_mutex_destroy(&LOCK_stats); ++ (void) pthread_mutex_destroy(&LOCK_global_user_client_stats); ++ (void) pthread_mutex_destroy(&LOCK_global_table_stats); ++ (void) pthread_mutex_destroy(&LOCK_global_index_stats); + } + + #endif /*EMBEDDED_LIBRARY*/ +@@ -3153,6 +3167,10 @@ + (void) pthread_mutex_init(&LOCK_rpl_status, MY_MUTEX_INIT_FAST); + (void) pthread_cond_init(&COND_rpl_status, NULL); + #endif ++ (void) pthread_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST); ++ (void) pthread_mutex_init(&LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST); ++ (void) pthread_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST); ++ (void) pthread_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST); + sp_cache_init(); + /* Parameter for threads created for connections */ + (void) pthread_attr_init(&connection_attrib); +@@ -3424,6 +3442,10 @@ + sql_print_error("Out of memory"); + unireg_abort(1); + } ++ ++ init_global_table_stats(); ++ init_global_index_stats(); ++ + if (ha_init()) + { + sql_print_error("Can't init databases"); +@@ -3506,6 +3528,8 @@ + + init_max_user_conn(); + init_update_queries(); ++ init_global_user_stats(); ++ init_global_client_stats(); + DBUG_RETURN(0); + } + +@@ -4232,6 +4256,7 @@ + { + DBUG_PRINT("error",("Too many connections")); + close_connection(thd, ER_CON_COUNT_ERROR, 1); ++ statistic_increment(denied_connections, &LOCK_status); + delete thd; + DBUG_VOID_RETURN; + } +@@ -5050,6 +5075,7 @@ + OPT_PROFILING, + OPT_SLOW_LOG, + OPT_SLOW_QUERY_LOG_FILE, ++ OPT_USERSTAT_RUNNING, + OPT_USE_GLOBAL_LONG_QUERY_TIME, + OPT_INNODB_ROLLBACK_ON_TIMEOUT, + OPT_SECURE_FILE_PRIV, +@@ -6498,6 +6524,10 @@ + (gptr*) &max_system_variables.net_wait_timeout, 0, GET_ULONG, + REQUIRED_ARG, NET_WAIT_TIMEOUT, 1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT), + 0, 1, 0}, ++ {"userstat_running", OPT_USERSTAT_RUNNING, ++ "Control USER_STATISTICS, CLIENT_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS running", ++ (gptr*) &opt_userstat_running, (gptr*) &opt_userstat_running, ++ 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} + }; + +diff -r 23e5576aa59a sql/set_var.cc +--- a/sql/set_var.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/set_var.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -325,6 +325,7 @@ + sys_var_thd_ulong sys_read_buff_size("read_buffer_size", + &SV::read_buff_size); + sys_var_bool_ptr sys_readonly("read_only", &opt_readonly); ++sys_var_bool_ptr sys_userstat_running("userstat_running", &opt_userstat_running); + sys_var_thd_ulong sys_read_rnd_buff_size("read_rnd_buffer_size", + &SV::read_rnd_buff_size); + sys_var_thd_ulong sys_div_precincrement("div_precision_increment", +@@ -828,6 +829,7 @@ + &sys_trans_alloc_block_size, + &sys_trans_prealloc_size, + &sys_tx_isolation, ++ &sys_userstat_running, + &sys_version, + #ifdef HAVE_BERKELEY_DB + &sys_version_bdb, +@@ -1174,6 +1176,7 @@ + {sys_tx_isolation.name, (char*) &sys_tx_isolation, SHOW_SYS}, + {sys_updatable_views_with_limit.name, + (char*) &sys_updatable_views_with_limit,SHOW_SYS}, ++ {sys_userstat_running.name, (char*) &sys_userstat_running, SHOW_SYS}, + {sys_use_global_long_query_time.name, (char*) &sys_use_global_long_query_time, SHOW_SYS}, + {sys_version.name, (char*) &sys_version, SHOW_SYS}, + #ifdef HAVE_BERKELEY_DB +diff -r 23e5576aa59a sql/share/Makefile.in +--- a/sql/share/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/share/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -144,6 +144,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a sql/sql_base.cc +--- a/sql/sql_base.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_base.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -624,6 +624,12 @@ + DBUG_ENTER("close_thread_table"); + DBUG_ASSERT(table->key_read == 0); + DBUG_ASSERT(!table->file || table->file->inited == handler::NONE); ++ ++ if(table->file) ++ { ++ table->file->update_global_table_stats(); ++ table->file->update_global_index_stats(); ++ } + + *table_ptr=table->next; + if (table->needs_reopen_or_name_lock() || +@@ -670,6 +676,9 @@ + { + DBUG_ENTER("close_temporary"); + char path[FN_REFLEN]; ++ ++ table->file->update_global_table_stats(); ++ table->file->update_global_index_stats(); + db_type table_type=table->s->db_type; + strmov(path,table->s->path); + free_io_cache(table); +diff -r 23e5576aa59a sql/sql_class.cc +--- a/sql/sql_class.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_class.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -236,6 +236,13 @@ + bzero(ha_data, sizeof(ha_data)); + mysys_var=0; + binlog_evt_union.do_union= FALSE; ++ busy_time = 0; ++ cpu_time = 0; ++ bytes_received = 0; ++ bytes_sent = 0; ++ binlog_bytes_written = 0; ++ updated_row_count = 0; ++ sent_row_count_2 = 0; + #ifndef DBUG_OFF + dbug_sentry=THD_SENTRY_MAGIC; + #endif +@@ -369,6 +376,88 @@ + total_warn_count= 0; + update_charset(); + bzero((char *) &status_var, sizeof(status_var)); ++ reset_stats(); ++} ++ ++// Resets stats in a THD. ++void THD::reset_stats(void) { ++ current_connect_time = time(NULL); ++ last_global_update_time = current_connect_time; ++ reset_diff_stats(); ++} ++ ++// Resets the 'diff' stats, which are used to update global stats. ++void THD::reset_diff_stats(void) { ++ diff_total_busy_time = 0; ++ diff_total_cpu_time = 0; ++ diff_total_bytes_received = 0; ++ diff_total_bytes_sent = 0; ++ diff_total_binlog_bytes_written = 0; ++ diff_total_sent_rows = 0; ++ diff_total_updated_rows = 0; ++ diff_total_read_rows = 0; ++ diff_select_commands = 0; ++ diff_update_commands = 0; ++ diff_other_commands = 0; ++ diff_commit_trans = 0; ++ diff_rollback_trans = 0; ++ diff_denied_connections = 0; ++ diff_lost_connections = 0; ++ diff_access_denied_errors = 0; ++ diff_empty_queries = 0; ++} ++ ++// Updates 'diff' stats of a THD. ++void THD::update_stats(bool ran_command) { ++ if (opt_userstat_running) { ++ diff_total_busy_time += busy_time; ++ diff_total_cpu_time += cpu_time; ++ diff_total_bytes_received += bytes_received; ++ diff_total_bytes_sent += bytes_sent; ++ diff_total_binlog_bytes_written += binlog_bytes_written; ++ diff_total_sent_rows += sent_row_count_2; ++ diff_total_updated_rows += updated_row_count; ++ // diff_total_read_rows is updated in handler.cc. ++ ++ if (ran_command) { ++ // The replication thread has the COM_CONNECT command. ++ if ((old_command == COM_QUERY || command == COM_CONNECT) && ++ (lex->sql_command >= 0 && lex->sql_command < SQLCOM_END)) { ++ // A SQL query. ++ if (lex->sql_command == SQLCOM_SELECT) { ++ if (lex->orig_sql_command == SQLCOM_END) { ++ diff_select_commands++; ++ if (!sent_row_count_2) ++ diff_empty_queries++; ++ } else { ++ // 'SHOW ' commands become SQLCOM_SELECT. ++ diff_other_commands++; ++ // 'SHOW ' commands shouldn't inflate total sent row count. ++ diff_total_sent_rows -= sent_row_count_2; ++ } ++ } else if (is_update_query(lex->sql_command)) { ++ diff_update_commands++; ++ } else { ++ diff_other_commands++; ++ } ++ } ++ } ++ // diff_commit_trans is updated in handler.cc. ++ // diff_rollback_trans is updated in handler.cc. ++ // diff_denied_connections is updated in sql_parse.cc. ++ // diff_lost_connections is updated in sql_parse.cc. ++ // diff_access_denied_errors is updated in sql_parse.cc. ++ ++ /* reset counters to zero to avoid double-counting since values ++ are already store in diff_total_*. */ ++ } ++ busy_time = 0; ++ cpu_time = 0; ++ bytes_received = 0; ++ bytes_sent = 0; ++ binlog_bytes_written = 0; ++ updated_row_count = 0; ++ sent_row_count_2 = 0; + } + + +@@ -408,6 +497,9 @@ + + void THD::change_user(void) + { ++ pthread_mutex_lock(&LOCK_status); ++ add_to_status(&global_status_var, &status_var); ++ pthread_mutex_unlock(&LOCK_status); + cleanup(); + cleanup_done= 0; + init(); +@@ -892,6 +984,33 @@ + } + #endif + ++char *THD::get_client_host_port(THD *client) ++{ ++ Security_context *client_sctx= client->security_ctx; ++ char *client_host= NULL; ++ ++ if (client->peer_port && (client_sctx->host || client_sctx->ip) && ++ security_ctx->host_or_ip[0]) ++ { ++ if ((client_host= this->alloc(LIST_PROCESS_HOST_LEN+1))) ++ my_snprintf((char *) client_host, LIST_PROCESS_HOST_LEN, ++ "%s:%u", client_sctx->host_or_ip, client->peer_port); ++ } ++ else ++ client_host= this->strdup(client_sctx->host_or_ip[0] ? ++ client_sctx->host_or_ip : ++ client_sctx->host ? client_sctx->host : ""); ++ ++ return client_host; ++} ++ ++const char *get_client_host(THD *client) ++{ ++ return client->security_ctx->host_or_ip[0] ? ++ client->security_ctx->host_or_ip : ++ client->security_ctx->host ? client->security_ctx->host : ""; ++} ++ + + struct Item_change_record: public ilink + { +@@ -1062,6 +1181,7 @@ + } + } + thd->sent_row_count++; ++ thd->sent_row_count_2++; + if (!thd->vio_ok()) + DBUG_RETURN(0); + if (!thd->net.report_error) +@@ -1154,6 +1274,7 @@ + select_export::~select_export() + { + thd->sent_row_count=row_count; ++ thd->sent_row_count_2=row_count; + } + + +@@ -2088,6 +2209,7 @@ + if (likely(thd != 0)) + { /* current_thd==0 when close_connection() calls net_send_error() */ + thd->status_var.bytes_sent+= length; ++ thd->bytes_sent+= length; + } + } + +@@ -2095,6 +2217,7 @@ + void thd_increment_bytes_received(ulong length) + { + current_thd->status_var.bytes_received+= length; ++ current_thd->bytes_received+= length; + } + + +diff -r 23e5576aa59a sql/sql_class.h +--- a/sql/sql_class.h Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_class.h Tue Feb 17 22:33:23 2009 -0800 +@@ -1299,6 +1299,8 @@ + first byte of the packet in do_command() + */ + enum enum_server_command command; ++ // Used to save the command, before it is set to COM_SLEEP. ++ enum enum_server_command old_command; + uint32 server_id; + uint32 file_id; // for LOAD DATA INFILE + /* +@@ -1488,6 +1490,8 @@ + /* variables.transaction_isolation is reset to this after each commit */ + enum_tx_isolation session_tx_isolation; + enum_check_fields count_cuted_fields; ++ ha_rows updated_row_count; ++ ha_rows sent_row_count_2; /* for userstat */ + + DYNAMIC_ARRAY user_var_events; /* For user variables replication */ + MEM_ROOT *user_var_events_alloc; /* Allocate above array elements here */ +@@ -1594,6 +1598,49 @@ + */ + LOG_INFO* current_linfo; + NET* slave_net; // network connection from slave -> m. ++ ++ /* ++ Used to update global user stats. The global user stats are updated ++ occasionally with the 'diff' variables. After the update, the 'diff' ++ variables are reset to 0. ++ */ ++ // Time when the current thread connected to MySQL. ++ time_t current_connect_time; ++ // Last time when THD stats were updated in global_user_stats. ++ time_t last_global_update_time; ++ // Busy (non-idle) time for just one command. ++ double busy_time; ++ // Busy time not updated in global_user_stats yet. ++ double diff_total_busy_time; ++ // Cpu (non-idle) time for just one thread. ++ double cpu_time; ++ // Cpu time not updated in global_user_stats yet. ++ double diff_total_cpu_time; ++ /* bytes counting */ ++ ulonglong bytes_received; ++ ulonglong diff_total_bytes_received; ++ ulonglong bytes_sent; ++ ulonglong diff_total_bytes_sent; ++ ulonglong binlog_bytes_written; ++ ulonglong diff_total_binlog_bytes_written; ++ ++ // Number of rows not reflected in global_user_stats yet. ++ ha_rows diff_total_sent_rows, diff_total_updated_rows, diff_total_read_rows; ++ // Number of commands not reflected in global_user_stats yet. ++ ulonglong diff_select_commands, diff_update_commands, diff_other_commands; ++ // Number of transactions not reflected in global_user_stats yet. ++ ulonglong diff_commit_trans, diff_rollback_trans; ++ // Number of connection errors not reflected in global_user_stats yet. ++ ulonglong diff_denied_connections, diff_lost_connections; ++ // Number of db access denied, not reflected in global_user_stats yet. ++ ulonglong diff_access_denied_errors; ++ // Number of queries that return 0 rows ++ ulonglong diff_empty_queries; ++ ++ // Per account query delay in miliseconds. When not 0, sleep this number of ++ // milliseconds before every SQL command. ++ ulonglong query_delay_millis; ++ + /* Used by the sys_var class to store temporary values */ + union + { +@@ -1649,6 +1696,11 @@ + alloc_root. + */ + void init_for_queries(); ++ void reset_stats(void); ++ void reset_diff_stats(void); ++ // ran_command is true when this is called immediately after a ++ // command has been run. ++ void update_stats(bool ran_command); + void change_user(void); + void cleanup(void); + void cleanup_after_query(); +@@ -1878,7 +1930,13 @@ + if (p_db_length) + *p_db_length= db_length; + return FALSE; ++ ++ // Returns string as 'IP:port' for the client-side of the connnection represented ++ // by 'client' as displayed by SHOW PROCESSLIST. Allocates memory from the heap of ++ // this THD and that is not reclaimed immediately, so use sparingly. May return NULL. + } ++ ++ char *get_client_host_port(THD *client); + + public: + /** +@@ -1922,6 +1980,11 @@ + MEM_ROOT main_mem_root; + }; + ++// Returns string as 'IP' for the client-side of the connection represented by ++// 'client'. Does not allocate memory. May return "". ++const char *get_client_host(THD *client); ++ ++#define LIST_PROCESS_HOST_LEN 64 + + #define tmp_disable_binlog(A) \ + {ulonglong tmp_disable_binlog__save_options= (A)->options; \ +diff -r 23e5576aa59a sql/sql_delete.cc +--- a/sql/sql_delete.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_delete.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -358,6 +358,7 @@ + send_ok(thd,deleted); + DBUG_PRINT("info",("%ld records deleted",(long) deleted)); + } ++ thd->updated_row_count += deleted; + DBUG_RETURN(error >= 0 || thd->net.report_error); + } + +@@ -869,6 +870,7 @@ + thd->row_count_func= deleted; + ::send_ok(thd, deleted); + } ++ thd->updated_row_count += deleted; + return 0; + } + +diff -r 23e5576aa59a sql/sql_insert.cc +--- a/sql/sql_insert.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_insert.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -989,6 +989,7 @@ + thd->row_count_func= info.copied + info.deleted + updated; + ::send_ok(thd, (ulong) thd->row_count_func, id, buff); + } ++ thd->updated_row_count += thd->row_count_func; + thd->abort_on_warning= 0; + DBUG_RETURN(FALSE); + +@@ -3036,6 +3037,7 @@ + autoinc_value_of_first_inserted_row : thd->insert_id_used ? + thd->last_insert_id : 0; + ::send_ok(thd, (ulong) thd->row_count_func, id, buff); ++ thd->updated_row_count += thd->row_count_func; + DBUG_RETURN(0); + } + +diff -r 23e5576aa59a sql/sql_lex.h +--- a/sql/sql_lex.h Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_lex.h Tue Feb 17 22:33:23 2009 -0800 +@@ -101,6 +101,9 @@ + When a command is added here, be sure it's also added in mysqld.cc + in "struct show_var_st status_vars[]= {" ... + */ ++ // TODO(mcallaghan): update status_vars in mysqld to export these ++ SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS, ++ SQLCOM_SHOW_CLIENT_STATS, + /* This should be the last !!! */ + SQLCOM_END + }; +diff -r 23e5576aa59a sql/sql_parse.cc +--- a/sql/sql_parse.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_parse.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -78,6 +78,12 @@ + const char *table_name); + static bool check_show_create_table_access(THD *thd, TABLE_LIST *table); + ++// Increments connection count for user. ++static int increment_connection_count(THD* thd, bool use_lock); ++ ++// Uses the THD to update the global stats by user name and client IP ++void update_global_user_stats(THD* thd, bool create_user, time_t now); ++ + const char *any_db="*any*"; // Special symbol for check_access + + const char *command_name[]={ +@@ -145,6 +151,17 @@ + #ifndef EMBEDDED_LIBRARY + static bool do_command(THD *thd); + #endif // EMBEDDED_LIBRARY ++ ++HASH global_user_stats; ++HASH global_client_stats; ++// Protects global_user_stats and global_client_stats ++extern pthread_mutex_t LOCK_global_user_client_stats; ++ ++HASH global_table_stats; ++extern pthread_mutex_t LOCK_global_table_stats; ++ ++HASH global_index_stats; ++extern pthread_mutex_t LOCK_global_index_stats; + + #ifdef __WIN__ + extern void win_install_sigabrt_handler(void); +@@ -504,6 +521,7 @@ + mysql_log.write(thd,COM_CONNECT,ER(ER_NOT_SUPPORTED_AUTH_MODE)); + DBUG_RETURN(-1); + } ++ thd->diff_access_denied_errors++; + net_printf_error(thd, ER_ACCESS_DENIED_ERROR, + thd->main_security_ctx.user, + thd->main_security_ctx.host_or_ip, +@@ -536,12 +554,190 @@ + void init_max_user_conn(void) + { + #ifndef NO_EMBEDDED_ACCESS_CHECKS +- (void) hash_init(&hash_user_connections,system_charset_info,max_connections, +- 0,0, +- (hash_get_key) get_key_conn, (hash_free_key) free_user, +- 0); +-#endif +-} ++ if (hash_init(&hash_user_connections,system_charset_info,max_connections, ++ 0,0, ++ (hash_get_key) get_key_conn, (hash_free_key) free_user, ++ 0)) { ++ sql_print_error("Initializing hash_user_connections failed."); ++ exit(1); ++ } ++#endif ++} ++ ++byte *get_key_user_stats(USER_STATS *user_stats, uint *length, ++ my_bool not_used __attribute__((unused))) ++{ ++ *length = strlen(user_stats->user); ++ return (byte*)user_stats->user; ++} ++ ++void free_user_stats(USER_STATS* user_stats) ++{ ++ my_free((char*)user_stats, MYF(0)); ++} ++ ++void init_user_stats(USER_STATS *user_stats, ++ const char *user, ++ const char *priv_user, ++ uint total_connections, ++ uint concurrent_connections, ++ time_t connected_time, ++ double busy_time, ++ double cpu_time, ++ ulonglong bytes_received, ++ ulonglong bytes_sent, ++ ulonglong binlog_bytes_written, ++ ha_rows rows_fetched, ++ ha_rows rows_updated, ++ ha_rows rows_read, ++ ulonglong select_commands, ++ ulonglong update_commands, ++ ulonglong other_commands, ++ ulonglong commit_trans, ++ ulonglong rollback_trans, ++ ulonglong denied_connections, ++ ulonglong lost_connections, ++ ulonglong access_denied_errors, ++ ulonglong empty_queries) ++{ ++ DBUG_ENTER("init_user_stats"); ++ DBUG_PRINT("info", ++ ("Add user_stats entry for user %s - priv_user %s", ++ user, priv_user)); ++ strncpy(user_stats->user, user, sizeof(user_stats->user)); ++ strncpy(user_stats->priv_user, priv_user, sizeof(user_stats->priv_user)); ++ ++ user_stats->total_connections = total_connections; ++ user_stats->concurrent_connections = concurrent_connections; ++ user_stats->connected_time = connected_time; ++ user_stats->busy_time = busy_time; ++ user_stats->cpu_time = cpu_time; ++ user_stats->bytes_received = bytes_received; ++ user_stats->bytes_sent = bytes_sent; ++ user_stats->binlog_bytes_written = binlog_bytes_written; ++ user_stats->rows_fetched = rows_fetched; ++ user_stats->rows_updated = rows_updated; ++ user_stats->rows_read = rows_read; ++ user_stats->select_commands = select_commands; ++ user_stats->update_commands = update_commands; ++ user_stats->other_commands = other_commands; ++ user_stats->commit_trans = commit_trans; ++ user_stats->rollback_trans = rollback_trans; ++ user_stats->denied_connections = denied_connections; ++ user_stats->lost_connections = lost_connections; ++ user_stats->access_denied_errors = access_denied_errors; ++ user_stats->empty_queries = empty_queries; ++ DBUG_VOID_RETURN; ++} ++ ++void add_user_stats(USER_STATS *user_stats, ++ uint total_connections, ++ uint concurrent_connections, ++ time_t connected_time, ++ double busy_time, ++ double cpu_time, ++ ulonglong bytes_received, ++ ulonglong bytes_sent, ++ ulonglong binlog_bytes_written, ++ ha_rows rows_fetched, ++ ha_rows rows_updated, ++ ha_rows rows_read, ++ ulonglong select_commands, ++ ulonglong update_commands, ++ ulonglong other_commands, ++ ulonglong commit_trans, ++ ulonglong rollback_trans, ++ ulonglong denied_connections, ++ ulonglong lost_connections, ++ ulonglong access_denied_errors, ++ ulonglong empty_queries) ++{ ++ user_stats->total_connections += total_connections; ++ user_stats->concurrent_connections += concurrent_connections; ++ user_stats->connected_time += connected_time; ++ user_stats->busy_time += busy_time; ++ user_stats->cpu_time += cpu_time; ++ user_stats->bytes_received += bytes_received; ++ user_stats->bytes_sent += bytes_sent; ++ user_stats->binlog_bytes_written += binlog_bytes_written; ++ user_stats->rows_fetched += rows_fetched; ++ user_stats->rows_updated += rows_updated; ++ user_stats->rows_read += rows_read; ++ user_stats->select_commands += select_commands; ++ user_stats->update_commands += update_commands; ++ user_stats->other_commands += other_commands; ++ user_stats->commit_trans += commit_trans; ++ user_stats->rollback_trans += rollback_trans; ++ user_stats->denied_connections += denied_connections; ++ user_stats->lost_connections += lost_connections; ++ user_stats->access_denied_errors += access_denied_errors; ++ user_stats->empty_queries += empty_queries; ++} ++ ++void init_global_user_stats(void) ++{ ++ if (hash_init(&global_user_stats, system_charset_info, max_connections, ++ 0, 0, (hash_get_key)get_key_user_stats, ++ (hash_free_key)free_user_stats, 0)) { ++ sql_print_error("Initializing global_user_stats failed."); ++ exit(1); ++ } ++} ++ ++void init_global_client_stats(void) ++{ ++ if (hash_init(&global_client_stats, system_charset_info, max_connections, ++ 0, 0, (hash_get_key)get_key_user_stats, ++ (hash_free_key)free_user_stats, 0)) { ++ sql_print_error("Initializing global_client_stats failed."); ++ exit(1); ++ } ++} ++ ++extern "C" byte *get_key_table_stats(TABLE_STATS *table_stats, uint *length, ++ my_bool not_used __attribute__((unused))) ++{ ++ *length = strlen(table_stats->table); ++ return (byte*)table_stats->table; ++} ++ ++extern "C" void free_table_stats(TABLE_STATS* table_stats) ++{ ++ my_free((char*)table_stats, MYF(0)); ++} ++ ++void init_global_table_stats(void) ++{ ++ if (hash_init(&global_table_stats, system_charset_info, max_connections, ++ 0, 0, (hash_get_key)get_key_table_stats, ++ (hash_free_key)free_table_stats, 0)) { ++ sql_print_error("Initializing global_table_stats failed."); ++ exit(1); ++ } ++} ++ ++extern "C" byte *get_key_index_stats(INDEX_STATS *index_stats, uint *length, ++ my_bool not_used __attribute__((unused))) ++{ ++ *length = strlen(index_stats->index); ++ return (byte*)index_stats->index; ++} ++ ++extern "C" void free_index_stats(INDEX_STATS* index_stats) ++{ ++ my_free((char*)index_stats, MYF(0)); ++} ++ ++void init_global_index_stats(void) ++{ ++ if (hash_init(&global_index_stats, system_charset_info, max_connections, ++ 0, 0, (hash_get_key)get_key_index_stats, ++ (hash_free_key)free_index_stats, 0)) { ++ sql_print_error("Initializing global_index_stats failed."); ++ exit(1); ++ } ++} ++ + + + /* +@@ -599,7 +795,10 @@ + + end: + if (error) ++ { ++ statistic_increment(denied_connections, &LOCK_status); + uc->connections--; // no need for decrease_user_connections() here ++ } + (void) pthread_mutex_unlock(&LOCK_user_conn); + DBUG_RETURN(error); + } +@@ -646,6 +845,25 @@ + #endif /* NO_EMBEDDED_ACCESS_CHECKS */ + } + ++void free_global_user_stats(void) ++{ ++ hash_free(&global_user_stats); ++} ++ ++void free_global_table_stats(void) ++{ ++ hash_free(&global_table_stats); ++} ++ ++void free_global_index_stats(void) ++{ ++ hash_free(&global_index_stats); ++} ++ ++void free_global_client_stats(void) ++{ ++ hash_free(&global_client_stats); ++} + + + /* +@@ -698,6 +916,214 @@ + return uc_update_queries[command] != 0; + } + ++// 'mysql_system_user' is used for when the user is not defined for a THD. ++static char mysql_system_user[] = "#mysql_system#"; ++ ++// Returns 'user' if it's not NULL. Returns 'mysql_system_user' otherwise. ++static char* get_valid_user_string(char* user) { ++ return user ? user : mysql_system_user; ++} ++ ++// Increments the global stats connection count for an entry from ++// global_client_stats or global_user_stats. Returns 0 on success ++// and 1 on error. ++static int increment_count_by_name(const char *name, const char *role_name, ++ HASH *users_or_clients, THD *thd) ++{ ++ USER_STATS* user_stats; ++ ++ if (!(user_stats = (USER_STATS*)hash_search(users_or_clients, name, ++ strlen(name)))) ++ { ++ // First connection for this user or client ++ if (!(user_stats = ((USER_STATS*) ++ my_malloc(sizeof(USER_STATS), MYF(MY_WME | MY_ZEROFILL))))) ++ { ++ return 1; // Out of memory ++ } ++ ++ init_user_stats(user_stats, name, role_name, ++ 0, 0, // connections ++ 0, 0, 0, // time ++ 0, 0, 0, // bytes sent, received and written ++ 0, 0, 0, // rows fetched, updated and read ++ 0, 0, 0, // select, update and other commands ++ 0, 0, // commit and rollback trans ++ thd->diff_denied_connections, ++ 0, // lost connections ++ 0, // access denied errors ++ 0); // empty queries ++ ++ if (my_hash_insert(users_or_clients, (byte*)user_stats)) ++ { ++ my_free((char*)user_stats, 0); ++ return 1; // Out of memory ++ } ++ } ++ user_stats->total_connections++; ++ return 0; ++} ++ ++// Increments the global user and client stats connection count. If 'use_lock' ++// is true, LOCK_global_user_client_stats will be locked/unlocked. Returns ++// 0 on success, 1 on error. ++static int increment_connection_count(THD* thd, bool use_lock) ++{ ++ char* user_string = get_valid_user_string(thd->main_security_ctx.user); ++ const char* client_string = get_client_host(thd); ++ int return_value = 0; ++ ++ if (!opt_userstat_running) ++ return return_value; ++ ++ if (use_lock) pthread_mutex_lock(&LOCK_global_user_client_stats); ++ ++ if (increment_count_by_name(user_string, user_string, ++ &global_user_stats, thd)) ++ { ++ return_value = 1; ++ goto end; ++ } ++ if (increment_count_by_name(client_string, ++ user_string, ++ &global_client_stats, thd)) ++ { ++ return_value = 1; ++ goto end; ++ } ++ ++end: ++ if (use_lock) pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ return return_value; ++} ++ ++// Used to update the global user and client stats. ++static void update_global_user_stats_with_user(THD* thd, ++ USER_STATS* user_stats, ++ time_t now) ++{ ++ user_stats->connected_time += now - thd->last_global_update_time; ++ thd->last_global_update_time = now; ++ user_stats->busy_time += thd->diff_total_busy_time; ++ user_stats->cpu_time += thd->diff_total_cpu_time; ++ user_stats->bytes_received += thd->diff_total_bytes_received; ++ user_stats->bytes_sent += thd->diff_total_bytes_sent; ++ user_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written; ++ user_stats->rows_fetched += thd->diff_total_sent_rows; ++ user_stats->rows_updated += thd->diff_total_updated_rows; ++ user_stats->rows_read += thd->diff_total_read_rows; ++ user_stats->select_commands += thd->diff_select_commands; ++ user_stats->update_commands += thd->diff_update_commands; ++ user_stats->other_commands += thd->diff_other_commands; ++ user_stats->commit_trans += thd->diff_commit_trans; ++ user_stats->rollback_trans += thd->diff_rollback_trans; ++ user_stats->denied_connections += thd->diff_denied_connections; ++ user_stats->lost_connections += thd->diff_lost_connections; ++ user_stats->access_denied_errors += thd->diff_access_denied_errors; ++ user_stats->empty_queries += thd->diff_empty_queries; ++} ++ ++// Updates the global stats of a user or client ++void update_global_user_stats(THD* thd, bool create_user, time_t now) ++{ ++ if (opt_userstat_running) { ++ char* user_string = get_valid_user_string(thd->main_security_ctx.user); ++ const char* client_string = get_client_host(thd); ++ ++ USER_STATS* user_stats; ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ ++ // Update by user name ++ if ((user_stats = (USER_STATS*)hash_search(&global_user_stats, ++ (byte*)user_string, ++ strlen(user_string)))) { ++ // Found user. ++ update_global_user_stats_with_user(thd, user_stats, now); ++ } else { ++ // Create the entry ++ if (create_user) { ++ increment_count_by_name(user_string, user_string, ++ &global_user_stats, thd); ++ } ++ } ++ ++ // Update by client IP ++ if ((user_stats = (USER_STATS*)hash_search(&global_client_stats, ++ (byte*)client_string, ++ strlen(client_string)))) { ++ // Found by client IP ++ update_global_user_stats_with_user(thd, user_stats, now); ++ } else { ++ // Create the entry ++ if (create_user) { ++ increment_count_by_name(client_string, ++ user_string, ++ &global_client_stats, thd); ++ } ++ } ++ thd->reset_diff_stats(); ++ ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ } else { ++ thd->reset_diff_stats(); ++ } ++} ++ ++// Determines the concurrent number of connections of current threads. ++static void set_connections_stats() ++{ ++ USER_STATS* user_stats; ++ ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ pthread_mutex_lock(&LOCK_thread_count); ++ ++ // Resets all concurrent connections to 0. ++ for (int i = 0; i < global_user_stats.records; ++i) { ++ user_stats = (USER_STATS*)hash_element(&global_user_stats, i); ++ user_stats->concurrent_connections = 0; ++ } ++ for (int i = 0; i < global_client_stats.records; ++i) { ++ user_stats = (USER_STATS*)hash_element(&global_client_stats, i); ++ user_stats->concurrent_connections = 0; ++ } ++ ++ I_List_iterator<THD> it(threads); ++ THD* thd; ++ time_t now = time(NULL); ++ // Iterates through the current threads. ++ while ((thd = it++)) { ++ char* user_string = get_valid_user_string(thd->main_security_ctx.user); ++ if ((user_stats = (USER_STATS*)hash_search(&global_user_stats, ++ (byte*)user_string, ++ strlen(user_string)))) { ++ // Found user. ++ user_stats->concurrent_connections++; ++ update_global_user_stats_with_user(thd, user_stats, now); ++ } else { ++ // The user name should exist. ++ if (user_string == mysql_system_user) { ++ // Only create the user if it is the mysql_system_user ++ increment_count_by_name(user_string, user_string, ++ &global_user_stats, thd); ++ } ++ } ++ ++ const char* client_string = get_client_host(thd); ++ if ((user_stats = (USER_STATS*)hash_search(&global_client_stats, ++ (byte*)client_string, ++ strlen(client_string)))) { ++ // Found user. ++ user_stats->concurrent_connections++; ++ update_global_user_stats_with_user(thd, user_stats, now); ++ } else { ++ // Do nothing, unlike what is done for global_user_stats ++ } ++ thd->reset_diff_stats(); ++ } ++ pthread_mutex_unlock(&LOCK_thread_count); ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++} ++ + /* + Reset per-hour user resource limits when it has been more than + an hour since they were last checked +@@ -1184,6 +1610,8 @@ + my_net_set_read_timeout(net, connect_timeout); + my_net_set_write_timeout(net, connect_timeout); + ++ bool create_user = true; ++ + if ((error=check_connection(thd))) + { // Wrong permissions + if (error > 0) +@@ -1193,8 +1621,22 @@ + my_sleep(1000); /* must wait after eof() */ + #endif + statistic_increment(aborted_connects,&LOCK_status); ++ thd->diff_denied_connections++; ++ if (error == -2) { ++ // Do not create statistics for a user who does not exist, or failed ++ // to authenticate. ++ create_user = false; ++ } + goto end_thread; + } ++ ++ thd->reset_stats(); ++ // Updates global user connection stats. ++ if (increment_connection_count(thd, true)) { ++ net_send_error(thd, ER_OUTOFMEMORY); // Out of memory ++ goto end_thread; ++ } ++ + #ifdef __NETWARE__ + netware_reg_user(sctx->ip, sctx->user, "MySQL"); + #endif +@@ -1251,6 +1693,7 @@ + net->vio && net->error && net->report_error) + { + statistic_increment(aborted_threads, &LOCK_status); ++ thd->diff_lost_connections++; + } + + if (net->error && net->vio != 0 && net->report_error) +@@ -1270,6 +1713,8 @@ + + end_thread: + close_connection(thd, 0, 1); ++ thd->update_stats(false); ++ update_global_user_stats(thd, create_user, time(NULL)); + end_thread(thd,1); + /* + If end_thread returns, we are either running with --one-thread +@@ -1601,6 +2046,13 @@ + + thd->clear_error(); // Clear error message + ++ thd->updated_row_count=0; ++ thd->busy_time=0; ++ thd->cpu_time=0; ++ thd->bytes_received=0; ++ thd->bytes_sent=0; ++ thd->binlog_bytes_written=0; ++ + net_new_transaction(net); + + packet_length= my_net_read(net); +@@ -1759,6 +2211,9 @@ + } + + thd->command=command; ++ // To increment the corrent command counter for user stats, 'command' must ++ // be saved because it is set to COM_SLEEP at the end of this function. ++ thd->old_command = command; + /* + Commands which always take a long time are logged into + the slow log only if opt_log_slow_admin_statements is set. +@@ -4428,6 +4883,15 @@ + if (check_global_access(thd,RELOAD_ACL)) + goto error; + ++ if(lex->type & REFRESH_SLOW_QUERY_LOG) { ++ /* We are only flushing slow query log */ ++ mysql_slow_log.new_file(1); ++ ++ send_ok(thd); ++ break; ++ } ++ ++ + /* + reload_acl_and_cache() will tell us if we are allowed to write to the + binlog or not. +@@ -4735,6 +5199,7 @@ + { + if (check_global_access(thd, SUPER_ACL)) + { ++ thd->diff_access_denied_errors++; + my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "SUPER"); + goto create_sp_error; + } +@@ -5571,6 +6036,7 @@ + if (!no_errors) + { + const char *db_name= db ? db : thd->db; ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->priv_host, db_name); + } +@@ -5606,6 +6072,7 @@ + { // We can never grant this + DBUG_PRINT("error",("No possible access")); + if (!no_errors) ++ thd->diff_access_denied_errors++; + my_error(ER_ACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, + sctx->priv_host, +@@ -5638,11 +6105,15 @@ + + DBUG_PRINT("error",("Access denied")); + if (!no_errors) ++ { ++ // increment needs !no_errors condition, otherwise double counting. ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->priv_host, + (db ? db : (thd->db ? + thd->db : + "unknown"))); /* purecov: tested */ ++ } + DBUG_RETURN(TRUE); /* purecov: tested */ + #endif /* NO_EMBEDDED_ACCESS_CHECKS */ + } +@@ -5676,6 +6147,7 @@ + if ((thd->security_ctx->master_access & want_access)) + return 0; + get_privilege_desc(command, sizeof(command), want_access); ++ thd->diff_access_denied_errors++; + my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command); + return 1; + #endif /* NO_EMBEDDED_ACCESS_CHECKS */ +@@ -5708,6 +6180,7 @@ + + if (!thd->col_access && check_grant_db(thd, dst_db_name)) + { ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + thd->security_ctx->priv_user, + thd->security_ctx->priv_host, +@@ -5739,6 +6212,12 @@ + check_grant(thd, SELECT_ACL, dst_table, 2, UINT_MAX, FALSE); + } + ++ ++ case SCH_USER_STATS: ++ case SCH_CLIENT_STATS: ++ return check_global_access(thd, SUPER_ACL | PROCESS_ACL); ++ case SCH_TABLE_STATS: ++ case SCH_INDEX_STATS: + case SCH_OPEN_TABLES: + case SCH_VARIABLES: + case SCH_STATUS: +@@ -5792,8 +6271,8 @@ + #ifndef NO_EMBEDDED_ACCESS_CHECKS + TABLE_LIST *org_tables= tables; + #endif ++ Security_context *sctx= thd->security_ctx, *backup_ctx= thd->security_ctx; + TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table(); +- Security_context *sctx= thd->security_ctx, *backup_ctx= thd->security_ctx; + /* + The check that first_not_own_table is not reached is for the case when + the given table list refers to the list for prelocking (contains tables +@@ -5810,9 +6289,12 @@ + (want_access & ~(SELECT_ACL | EXTRA_ACL | FILE_ACL))) + { + if (!no_errors) ++ { ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->priv_host, + INFORMATION_SCHEMA_NAME.str); ++ } + return TRUE; + } + /* +@@ -6321,6 +6803,30 @@ + */ + lex_start(thd); + mysql_reset_thd_for_next_command(thd); ++ ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } + + if (query_cache_send_result_to_client(thd, (char*) inBuf, length) <= 0) + { +@@ -6400,6 +6906,43 @@ + *found_semicolon= NULL; + } + ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) { ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->cpu_time > 2629743) { ++ thd->cpu_time = 0; ++ } ++ } else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); ++ + DBUG_VOID_RETURN; + } + +@@ -7411,8 +7954,35 @@ + pthread_mutex_unlock(&LOCK_active_mi); + } + #endif +- if (options & REFRESH_USER_RESOURCES) +- reset_mqh((LEX_USER *) NULL); ++ if (options & REFRESH_TABLE_STATS) ++ { ++ pthread_mutex_lock(&LOCK_global_table_stats); ++ free_global_table_stats(); ++ init_global_table_stats(); ++ pthread_mutex_unlock(&LOCK_global_table_stats); ++ } ++ if (options & REFRESH_INDEX_STATS) ++ { ++ pthread_mutex_lock(&LOCK_global_index_stats); ++ free_global_index_stats(); ++ init_global_index_stats(); ++ pthread_mutex_unlock(&LOCK_global_index_stats); ++ } ++ if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS)) ++ { ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ if (options & REFRESH_USER_STATS) ++ { ++ free_global_user_stats(); ++ init_global_user_stats(); ++ } ++ if (options & REFRESH_CLIENT_STATS) ++ { ++ free_global_client_stats(); ++ init_global_client_stats(); ++ } ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ } + *write_to_binlog= tmp_write_to_binlog; + return result; + } +diff -r 23e5576aa59a sql/sql_prepare.cc +--- a/sql/sql_prepare.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_prepare.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -80,6 +80,9 @@ + #else + #include <mysql_com.h> + #endif ++ ++// Uses the THD to update the global stats by user name and client IP ++void update_global_user_stats(THD* thd, bool create_user, time_t now); + + /* A result class used to send cursor rows using the binary protocol. */ + +@@ -1910,8 +1913,32 @@ + /* First of all clear possible warnings from the previous command */ + mysql_reset_thd_for_next_command(thd); + ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } ++ + if (! (stmt= new Prepared_statement(thd, &thd->protocol_prep))) +- DBUG_VOID_RETURN; /* out of memory: error is set in Sql_alloc */ ++ goto end; /* out of memory: error is set in Sql_alloc */ + + if (thd->stmt_map.insert(thd, stmt)) + { +@@ -1919,7 +1946,7 @@ + The error is set in the insert. The statement itself + will be also deleted there (this is how the hash works). + */ +- DBUG_VOID_RETURN; ++ goto end; + } + + /* Reset warnings from previous command */ +@@ -1941,6 +1968,44 @@ + thd->stmt_map.erase(stmt); + } + /* check_prepared_statemnt sends the metadata packet in case of success */ ++end: ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) { ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->cpu_time > 2629743) { ++ thd->cpu_time = 0; ++ } ++ } else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); ++ + DBUG_VOID_RETURN; + } + +@@ -2281,8 +2346,32 @@ + /* First of all clear possible warnings from the previous command */ + mysql_reset_thd_for_next_command(thd); + ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } ++ + if (!(stmt= find_prepared_statement(thd, stmt_id, "mysql_stmt_execute"))) +- DBUG_VOID_RETURN; ++ goto end; + + #ifdef ENABLED_PROFILING + thd->profiling.set_query_source(stmt->query, stmt->query_length); +@@ -2325,11 +2414,50 @@ + test(flags & (ulong) CURSOR_TYPE_READ_ONLY)); + if (!(specialflag & SPECIAL_NO_PRIOR)) + my_pthread_setprio(pthread_self(), WAIT_PRIOR); +- DBUG_VOID_RETURN; ++ goto end; + + set_params_data_err: + my_error(ER_WRONG_ARGUMENTS, MYF(0), "mysql_stmt_execute"); + reset_stmt_params(stmt); ++ ++end: ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) { ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->cpu_time > 2629743) { ++ thd->cpu_time = 0; ++ } ++ } else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); ++ + DBUG_VOID_RETURN; + } + +@@ -2423,6 +2551,31 @@ + + /* First of all clear possible warnings from the previous command */ + mysql_reset_thd_for_next_command(thd); ++ ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } ++ + statistic_increment(thd->status_var.com_stmt_fetch, &LOCK_status); + if (!(stmt= find_prepared_statement(thd, stmt_id, "mysql_stmt_fetch"))) + DBUG_VOID_RETURN; +@@ -2455,6 +2608,43 @@ + thd->restore_backup_statement(stmt, &stmt_backup); + thd->stmt_arena= thd; + ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) { ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->cpu_time > 2629743) { ++ thd->cpu_time = 0; ++ } ++ } else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); ++ + DBUG_VOID_RETURN; + } + +@@ -2487,6 +2677,30 @@ + /* First of all clear possible warnings from the previous command */ + mysql_reset_thd_for_next_command(thd); + ++ int start_time_error = 0; ++ int end_time_error = 0; ++ struct timeval start_time, end_time; ++ double start_usecs = 0; ++ double end_usecs = 0; ++ /* cpu time */ ++ int cputime_error = 0; ++ struct timespec tp; ++ double start_cpu_nsecs = 0; ++ double end_cpu_nsecs = 0; ++ ++ if (opt_userstat_running) { ++#ifdef HAVE_CLOCK_GETTIME ++ /* get start cputime */ ++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ ++ // Gets the start time, in order to measure how long this command takes. ++ if (!(start_time_error = gettimeofday(&start_time, NULL))) { ++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec; ++ } ++ } ++ + statistic_increment(thd->status_var.com_stmt_reset, &LOCK_status); + if (!(stmt= find_prepared_statement(thd, stmt_id, "mysql_stmt_reset"))) + DBUG_VOID_RETURN; +@@ -2502,6 +2716,43 @@ + stmt->state= Query_arena::PREPARED; + + send_ok(thd); ++ ++ if (opt_userstat_running) { ++ // Gets the end time. ++ if (!(end_time_error = gettimeofday(&end_time, NULL))) { ++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec; ++ } ++ ++ // Calculates the difference between the end and start times. ++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) { ++ thd->busy_time = (end_usecs - start_usecs) / 1000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->busy_time > 2629743) { ++ thd->busy_time = 0; ++ } ++ } else { ++ // end time went back in time, or gettimeofday() failed. ++ thd->busy_time = 0; ++ } ++ ++#ifdef HAVE_CLOCK_GETTIME ++ /* get end cputime */ ++ if (!cputime_error && ++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp))) ++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec; ++#endif ++ if (start_cpu_nsecs && !cputime_error) { ++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000; ++ // In case there are bad values, 2629743 is the #seconds in a month. ++ if (thd->cpu_time > 2629743) { ++ thd->cpu_time = 0; ++ } ++ } else ++ thd->cpu_time = 0; ++ } ++ // Updates THD stats and the global user stats. ++ thd->update_stats(true); ++ update_global_user_stats(thd, true, time(NULL)); + + DBUG_VOID_RETURN; + } +diff -r 23e5576aa59a sql/sql_show.cc +--- a/sql/sql_show.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_show.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -530,6 +530,7 @@ + sctx->master_access); + if (!(db_access & DB_ACLS) && (!grant_option || check_grant_db(thd,dbname))) + { ++ thd->diff_access_denied_errors++; + my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), + sctx->priv_user, sctx->host_or_ip, dbname); + mysql_log.write(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR), +@@ -1872,6 +1873,300 @@ + DBUG_RETURN(FALSE); + } + ++/* ++ Aggregate values for mapped_user entries by their role. ++ ++ SYNOPSIS ++ aggregate_user_stats ++ all_user_stats - input to aggregate ++ agg_user_stats - returns aggregated values ++ ++ RETURN ++ 0 - OK ++ 1 - error ++ */ ++static int ++aggregate_user_stats(HASH *all_user_stats, HASH *agg_user_stats) ++{ ++ DBUG_ENTER("aggregate_user_stats"); ++ if (hash_init(agg_user_stats, system_charset_info, ++ max(all_user_stats->records, 1), ++ 0, 0, (hash_get_key)get_key_user_stats, ++ (hash_free_key)free_user_stats, 0)) ++ { ++ sql_print_error("Malloc in aggregate_user_stats failed"); ++ DBUG_RETURN(1); ++ } ++ ++ for (int i = 0; i < all_user_stats->records; ++i) { ++ USER_STATS *user = (USER_STATS*)hash_element(all_user_stats, i); ++ USER_STATS *agg_user; ++ if (!(agg_user = (USER_STATS*)hash_search(agg_user_stats, ++ (byte*)user->priv_user, ++ strlen(user->priv_user)))) ++ { ++ // First entry for this role. ++ if (!(agg_user = ++ (USER_STATS*) my_malloc(sizeof(USER_STATS), MYF(MY_WME | MY_ZEROFILL)))) ++ { ++ sql_print_error("Malloc in aggregate_user_stats failed"); ++ DBUG_RETURN(1); ++ } ++ ++ init_user_stats(agg_user, user->priv_user, user->priv_user, ++ user->total_connections, user->concurrent_connections, ++ user->connected_time, user->busy_time, user->cpu_time, ++ user->bytes_received, user->bytes_sent, ++ user->binlog_bytes_written, ++ user->rows_fetched, user->rows_updated, user->rows_read, ++ user->select_commands, user->update_commands, ++ user->other_commands, ++ user->commit_trans, user->rollback_trans, ++ user->denied_connections, user->lost_connections, ++ user->access_denied_errors, user->empty_queries); ++ ++ if (my_hash_insert(agg_user_stats, (byte*)agg_user)) ++ { ++ // Out of memory. ++ my_free((char*)agg_user, 0); ++ sql_print_error("Malloc in aggregate_user_stats failed"); ++ DBUG_RETURN(1); ++ } ++ } ++ else ++ { ++ // Aggregate with existing values for this role. ++ add_user_stats(agg_user, ++ user->total_connections, user->concurrent_connections, ++ user->connected_time, user->busy_time, user->cpu_time, ++ user->bytes_received, user->bytes_sent, ++ user->binlog_bytes_written, ++ user->rows_fetched, user->rows_updated, user->rows_read, ++ user->select_commands, user->update_commands, ++ user->other_commands, ++ user->commit_trans, user->rollback_trans, ++ user->denied_connections, user->lost_connections, ++ user->access_denied_errors, user->empty_queries); ++ } ++ } ++ DBUG_PRINT("exit", ("aggregated %d input into %d output entries", ++ all_user_stats->records, agg_user_stats->records)); ++ DBUG_RETURN(0); ++} ++ ++/* ++ Write result to network for SHOW USER_STATISTICS ++ ++ SYNOPSIS ++ send_user_stats ++ all_user_stats - values to return ++ table - I_S table ++ ++ RETURN ++ 0 - OK ++ 1 - error ++ */ ++int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table) ++{ ++ DBUG_ENTER("send_user_stats"); ++ for (int i = 0; i < all_user_stats->records; ++i) { ++ restore_record(table, s->default_values); ++ USER_STATS *user_stats = (USER_STATS*)hash_element(all_user_stats, i); ++ table->field[0]->store(user_stats->user, strlen(user_stats->user), system_charset_info); ++ table->field[1]->store((longlong)user_stats->total_connections); ++ table->field[2]->store((longlong)user_stats->concurrent_connections); ++ table->field[3]->store((longlong)user_stats->connected_time); ++ table->field[4]->store((longlong)user_stats->busy_time); ++ table->field[5]->store((longlong)user_stats->cpu_time); ++ table->field[6]->store((longlong)user_stats->bytes_received); ++ table->field[7]->store((longlong)user_stats->bytes_sent); ++ table->field[8]->store((longlong)user_stats->binlog_bytes_written); ++ table->field[9]->store((longlong)user_stats->rows_fetched); ++ table->field[10]->store((longlong)user_stats->rows_updated); ++ table->field[11]->store((longlong)user_stats->rows_read); ++ table->field[12]->store((longlong)user_stats->select_commands); ++ table->field[13]->store((longlong)user_stats->update_commands); ++ table->field[14]->store((longlong)user_stats->other_commands); ++ table->field[15]->store((longlong)user_stats->commit_trans); ++ table->field[16]->store((longlong)user_stats->rollback_trans); ++ table->field[17]->store((longlong)user_stats->denied_connections); ++ table->field[18]->store((longlong)user_stats->lost_connections); ++ table->field[19]->store((longlong)user_stats->access_denied_errors); ++ table->field[20]->store((longlong)user_stats->empty_queries); ++ if (schema_table_store_record(thd, table)) ++ { ++ DBUG_PRINT("error", ("store record error")); ++ DBUG_RETURN(1); ++ } ++ } ++ DBUG_RETURN(0); ++} ++ ++/* ++ Process SHOW USER_STATISTICS ++ ++ SYNOPSIS ++ mysqld_show_user_stats ++ thd - current thread ++ wild - limit results to the entry for this user ++ with_roles - when true, display role for mapped users ++ ++ RETURN ++ 0 - OK ++ 1 - error ++ */ ++ ++ ++int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ DBUG_ENTER("fill_schema_user_stats"); ++ ++ if (check_global_access(thd, SUPER_ACL | PROCESS_ACL)) ++ DBUG_RETURN(1); ++ ++ // Iterates through all the global stats and sends them to the client. ++ // Pattern matching on the client IP is supported. ++ ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ int result= send_user_stats(thd, &global_user_stats, table); ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ if (result) ++ goto err; ++ ++ DBUG_PRINT("exit", ("fill_schema_user_stats result is 0")); ++ DBUG_RETURN(0); ++ ++ err: ++ DBUG_PRINT("exit", ("fill_schema_user_stats result is 1")); ++ DBUG_RETURN(1); ++} ++ ++/* ++ Process SHOW CLIENT_STATISTICS ++ ++ SYNOPSIS ++ mysqld_show_client_stats ++ thd - current thread ++ wild - limit results to the entry for this client ++ ++ RETURN ++ 0 - OK ++ 1 - error ++ */ ++ ++ ++int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ DBUG_ENTER("fill_schema_client_stats"); ++ ++ if (check_global_access(thd, SUPER_ACL | PROCESS_ACL)) ++ DBUG_RETURN(1); ++ ++ // Iterates through all the global stats and sends them to the client. ++ // Pattern matching on the client IP is supported. ++ ++ pthread_mutex_lock(&LOCK_global_user_client_stats); ++ int result= send_user_stats(thd, &global_client_stats, table); ++ pthread_mutex_unlock(&LOCK_global_user_client_stats); ++ if (result) ++ goto err; ++ ++ DBUG_PRINT("exit", ("mysqld_show_client_stats result is 0")); ++ DBUG_RETURN(0); ++ ++ err: ++ DBUG_PRINT("exit", ("mysqld_show_client_stats result is 1")); ++ DBUG_RETURN(1); ++} ++ ++ ++// Sends the global table stats back to the client. ++int fill_schema_table_stats(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ DBUG_ENTER("fill_schema_table_stats"); ++ char *table_full_name, *table_schema; ++ ++ pthread_mutex_lock(&LOCK_global_table_stats); ++ for (int i = 0; i < global_table_stats.records; ++i) { ++ restore_record(table, s->default_values); ++ TABLE_STATS *table_stats = ++ (TABLE_STATS*)hash_element(&global_table_stats, i); ++ ++ table_full_name= thd->strdup(table_stats->table); ++ table_schema= strsep(&table_full_name, "."); ++ ++ TABLE_LIST tmp_table; ++ bzero((char*) &tmp_table,sizeof(tmp_table)); ++ tmp_table.table_name= table_full_name; ++ tmp_table.db= table_schema; ++ tmp_table.grant.privilege= 0; ++ if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db, ++ &tmp_table.grant.privilege, 0, 0, ++ is_schema_db(table_schema)) || ++ grant_option && check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1)) ++ continue; ++ ++ table->field[0]->store(table_schema, strlen(table_schema), system_charset_info); ++ table->field[1]->store(table_full_name, strlen(table_full_name), system_charset_info); ++ table->field[2]->store((longlong)table_stats->rows_read, TRUE); ++ table->field[3]->store((longlong)table_stats->rows_changed, TRUE); ++ table->field[4]->store((longlong)table_stats->rows_changed_x_indexes, TRUE); ++ ++ if (schema_table_store_record(thd, table)) ++ { ++ VOID(pthread_mutex_unlock(&LOCK_global_table_stats)); ++ DBUG_RETURN(1); ++ } ++ } ++ pthread_mutex_unlock(&LOCK_global_table_stats); ++ DBUG_RETURN(0); ++} ++ ++// Sends the global index stats back to the client. ++int fill_schema_index_stats(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ DBUG_ENTER("fill_schema_index_stats"); ++ char *index_full_name, *table_schema, *table_name; ++ ++ pthread_mutex_lock(&LOCK_global_index_stats); ++ for (int i = 0; i < global_index_stats.records; ++i) { ++ restore_record(table, s->default_values); ++ INDEX_STATS *index_stats = ++ (INDEX_STATS*)hash_element(&global_index_stats, i); ++ ++ index_full_name= thd->strdup(index_stats->index); ++ table_schema= strsep(&index_full_name, "."); ++ table_name= strsep(&index_full_name, "."); ++ ++ TABLE_LIST tmp_table; ++ bzero((char*) &tmp_table,sizeof(tmp_table)); ++ tmp_table.table_name= table_name; ++ tmp_table.db= table_schema; ++ tmp_table.grant.privilege= 0; ++ if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db, ++ &tmp_table.grant.privilege, 0, 0, ++ is_schema_db(table_schema)) || ++ grant_option && check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1)) ++ continue; ++ ++ table->field[0]->store(table_schema, strlen(table_schema), system_charset_info); ++ table->field[1]->store(table_name, strlen(table_name), system_charset_info); ++ table->field[2]->store(index_full_name, strlen(index_full_name), system_charset_info); ++ table->field[3]->store((longlong)index_stats->rows_read, TRUE); ++ ++ if (schema_table_store_record(thd, table)) ++ { ++ VOID(pthread_mutex_unlock(&LOCK_global_index_stats)); ++ DBUG_RETURN(1); ++ } ++ } ++ pthread_mutex_unlock(&LOCK_global_index_stats); ++ DBUG_RETURN(0); ++} + + /* collect status for all running threads */ + +@@ -4482,6 +4777,77 @@ + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} + }; + ++ST_FIELD_INFO user_stats_fields_info[]= ++{ ++ {"USER", 16, MYSQL_TYPE_STRING, 0, 0, "User"}, ++ {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections"}, ++ {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections"}, ++ {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time"}, ++ {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time"}, ++ {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time"}, ++ {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received"}, ++ {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent"}, ++ {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written"}, ++ {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched"}, ++ {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated"}, ++ {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read"}, ++ {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands"}, ++ {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands"}, ++ {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands"}, ++ {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions"}, ++ {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions"}, ++ {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections"}, ++ {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections"}, ++ {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied"}, ++ {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ST_FIELD_INFO client_stats_fields_info[]= ++{ ++ {"CLIENT", 16, MYSQL_TYPE_STRING, 0, 0, "Client"}, ++ {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections"}, ++ {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections"}, ++ {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time"}, ++ {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time"}, ++ {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time"}, ++ {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received"}, ++ {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent"}, ++ {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written"}, ++ {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched"}, ++ {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated"}, ++ {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read"}, ++ {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands"}, ++ {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands"}, ++ {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands"}, ++ {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions"}, ++ {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions"}, ++ {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections"}, ++ {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections"}, ++ {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied"}, ++ {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ ++ST_FIELD_INFO table_stats_fields_info[]= ++{ ++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema"}, ++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name"}, ++ {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_read"}, ++ {"ROWS_CHANGED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_changed"}, ++ {"ROWS_CHANGED_X_INDEXES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_changed_x_#indexes"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ST_FIELD_INFO index_stats_fields_info[]= ++{ ++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema"}, ++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name"}, ++ {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name"}, ++ {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_read"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; + + /* + Description of ST_FIELD_INFO in table.h +@@ -4491,6 +4857,8 @@ + { + {"CHARACTER_SETS", charsets_fields_info, create_schema_table, + fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0}, ++ {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table, ++ fill_schema_client_stats, make_old_format, 0, -1, -1, 0}, + {"COLLATIONS", collation_fields_info, create_schema_table, + fill_schema_collation, make_old_format, 0, -1, -1, 0}, + {"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info, +@@ -4499,6 +4867,8 @@ + get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0}, + {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table, + fill_schema_column_privileges, 0, 0, -1, -1, 0}, ++ {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table, ++ fill_schema_index_stats, make_old_format, 0, -1, -1, 0}, + {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table, + get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0}, + {"OPEN_TABLES", open_tables_fields_info, create_schema_table, +@@ -4524,10 +4894,14 @@ + get_all_tables, make_table_names_old_format, 0, 1, 2, 1}, + {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table, + fill_schema_table_privileges, 0, 0, -1, -1, 0}, ++ {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table, ++ fill_schema_table_stats, make_old_format, 0, -1, -1, 0}, + {"TRIGGERS", triggers_fields_info, create_schema_table, + get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0}, + {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table, + fill_schema_user_privileges, 0, 0, -1, -1, 0}, ++ {"USER_STATISTICS", user_stats_fields_info, create_schema_table, ++ fill_schema_user_stats, make_old_format, 0, -1, -1, 0}, + {"VARIABLES", variables_fields_info, create_schema_table, fill_variables, + make_old_format, 0, -1, -1, 1}, + {"VIEWS", view_fields_info, create_schema_table, +diff -r 23e5576aa59a sql/sql_update.cc +--- a/sql/sql_update.cc Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_update.cc Tue Feb 17 22:33:23 2009 -0800 +@@ -601,7 +601,8 @@ + (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated; + send_ok(thd, (ulong) thd->row_count_func, + thd->insert_id_used ? thd->last_insert_id : 0L,buff); +- DBUG_PRINT("info",("%ld records updated", (long) updated)); ++ thd->updated_row_count += thd->row_count_func; ++ DBUG_PRINT("info",("%d records updated",updated)); + } + thd->count_cuted_fields= CHECK_FIELD_IGNORE; /* calc cuted fields */ + thd->abort_on_warning= 0; +@@ -1832,5 +1833,6 @@ + (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated; + ::send_ok(thd, (ulong) thd->row_count_func, + thd->insert_id_used ? thd->last_insert_id : 0L,buff); ++ thd->updated_row_count += thd->row_count_func; + return FALSE; + } +diff -r 23e5576aa59a sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/sql_yacc.yy Tue Feb 17 22:33:23 2009 -0800 +@@ -523,6 +523,7 @@ + %token CHECK_SYM + %token CIPHER_SYM + %token CLIENT_SYM ++%token CLIENT_STATS_SYM + %token CLOSE_SYM + %token COALESCE + %token CODE_SYM +@@ -680,6 +681,7 @@ + %token IMPORT + %token INDEXES + %token INDEX_SYM ++%token INDEX_STATS_SYM + %token INFILE + %token INNER_SYM + %token INNOBASE_SYM +@@ -909,6 +911,7 @@ + %token SIGNED_SYM + %token SIMPLE_SYM + %token SLAVE ++%token SLOW_SYM + %token SMALLINT + %token SNAPSHOT_SYM + %token SOUNDS_SYM +@@ -949,6 +952,7 @@ + %token TABLES + %token TABLESPACE + %token TABLE_SYM ++%token TABLE_STATS_SYM + %token TEMPORARY + %token TEMPTABLE_SYM + %token TERMINATED +@@ -991,6 +995,7 @@ + %token UPGRADE_SYM + %token USAGE + %token USER ++%token USER_STATS_SYM + %token USE_FRM + %token USE_SYM + %token USING +@@ -8244,6 +8249,38 @@ + { + Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT; + } ++ | CLIENT_STATS_SYM wild_and_where ++ { ++ LEX *lex= Lex; ++ Lex->sql_command = SQLCOM_SELECT; ++ lex->orig_sql_command= SQLCOM_SHOW_CLIENT_STATS; ++ if (prepare_schema_table(YYTHD, lex, 0, SCH_CLIENT_STATS)) ++ MYSQL_YYABORT; ++ } ++ | USER_STATS_SYM wild_and_where ++ { ++ LEX *lex= Lex; ++ lex->sql_command = SQLCOM_SELECT; ++ lex->orig_sql_command= SQLCOM_SHOW_USER_STATS; ++ if (prepare_schema_table(YYTHD, lex, 0, SCH_USER_STATS)) ++ MYSQL_YYABORT; ++ } ++ | TABLE_STATS_SYM wild_and_where ++ { ++ LEX *lex= Lex; ++ lex->sql_command= SQLCOM_SELECT; ++ lex->orig_sql_command= SQLCOM_SHOW_TABLE_STATS; ++ if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_STATS)) ++ MYSQL_YYABORT; ++ } ++ | INDEX_STATS_SYM wild_and_where ++ { ++ LEX *lex= Lex; ++ lex->sql_command= SQLCOM_SELECT; ++ lex->orig_sql_command= SQLCOM_SHOW_INDEX_STATS; ++ if (prepare_schema_table(YYTHD, lex, 0, SCH_INDEX_STATS)) ++ MYSQL_YYABORT; ++ } + | CREATE PROCEDURE sp_name + { + LEX *lex= Lex; +@@ -8448,9 +8485,14 @@ + | LOGS_SYM { Lex->type|= REFRESH_LOG; } + | STATUS_SYM { Lex->type|= REFRESH_STATUS; } + | SLAVE { Lex->type|= REFRESH_SLAVE; } ++ | SLOW_SYM QUERY_SYM LOGS_SYM { Lex->type |= REFRESH_SLOW_QUERY_LOG; } + | MASTER_SYM { Lex->type|= REFRESH_MASTER; } + | DES_KEY_FILE { Lex->type|= REFRESH_DES_KEY_FILE; } +- | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; }; ++ | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; } ++ | CLIENT_STATS_SYM { Lex->type|= REFRESH_CLIENT_STATS; } ++ | USER_STATS_SYM { Lex->type|= REFRESH_USER_STATS; } ++ | TABLE_STATS_SYM { Lex->type|= REFRESH_TABLE_STATS; } ++ | INDEX_STATS_SYM { Lex->type|= REFRESH_INDEX_STATS; }; + + opt_table_list: + /* empty */ {;} +@@ -9439,6 +9481,7 @@ + | CHAIN_SYM {} + | CHANGED {} + | CIPHER_SYM {} ++ | CLIENT_STATS_SYM {} + | CLIENT_SYM {} + | CODE_SYM {} + | COLLATION_SYM {} +@@ -9491,6 +9534,7 @@ + | HOSTS_SYM {} + | HOUR_SYM {} + | IDENTIFIED_SYM {} ++ | INDEX_STATS_SYM {} + | INVOKER_SYM {} + | IMPORT {} + | INDEXES {} +@@ -9600,6 +9644,7 @@ + | SIMPLE_SYM {} + | SHARE_SYM {} + | SHUTDOWN {} ++ | SLOW_SYM {} + | SNAPSHOT_SYM {} + | SOUNDS_SYM {} + | SOURCE_SYM {} +@@ -9616,6 +9661,7 @@ + | SUSPEND_SYM {} + | SWAPS_SYM {} + | SWITCHES_SYM {} ++ | TABLE_STATS_SYM {} + | TABLES {} + | TABLESPACE {} + | TEMPORARY {} +@@ -9636,6 +9682,7 @@ + | UNKNOWN_SYM {} + | UNTIL_SYM {} + | USER {} ++ | USER_STATS_SYM {} + | USE_FRM {} + | VARIABLES {} + | VIEW_SYM {} +diff -r 23e5576aa59a sql/structs.h +--- a/sql/structs.h Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/structs.h Tue Feb 17 22:33:23 2009 -0800 +@@ -273,6 +273,98 @@ + time_t intime; + } USER_CONN; + ++typedef struct st_user_stats { ++ char user[USERNAME_LENGTH + 1]; ++ // Account name the user is mapped to when this is a user from mapped_user. ++ // Otherwise, the same value as user. ++ char priv_user[USERNAME_LENGTH + 1]; ++ uint total_connections; ++ uint concurrent_connections; ++ time_t connected_time; // in seconds ++ double busy_time; // in seconds ++ double cpu_time; // in seconds ++ ulonglong bytes_received; ++ ulonglong bytes_sent; ++ ulonglong binlog_bytes_written; ++ ha_rows rows_fetched, rows_updated, rows_read; ++ ulonglong select_commands, update_commands, other_commands; ++ ulonglong commit_trans, rollback_trans; ++ ulonglong denied_connections, lost_connections; ++ ulonglong access_denied_errors; ++ ulonglong empty_queries; ++} USER_STATS; ++ ++/* Lookup function for hash tables with USER_STATS entries */ ++extern byte *get_key_user_stats(USER_STATS *user_stats, uint *length, ++ my_bool not_used __attribute__((unused))); ++ ++/* Free all memory for a hash table with USER_STATS entries */ ++extern void free_user_stats(USER_STATS* user_stats); ++ ++/* Intialize an instance of USER_STATS */ ++extern void ++init_user_stats(USER_STATS *user_stats, ++ const char *user, ++ const char *priv_user, ++ uint total_connections, ++ uint concurrent_connections, ++ time_t connected_time, ++ double busy_time, ++ double cpu_time, ++ ulonglong bytes_received, ++ ulonglong bytes_sent, ++ ulonglong binlog_bytes_written, ++ ha_rows rows_fetched, ++ ha_rows rows_updated, ++ ha_rows rows_read, ++ ulonglong select_commands, ++ ulonglong update_commands, ++ ulonglong other_commands, ++ ulonglong commit_trans, ++ ulonglong rollback_trans, ++ ulonglong denied_connections, ++ ulonglong lost_connections, ++ ulonglong access_denied_errors, ++ ulonglong empty_queries); ++ ++/* Increment values of an instance of USER_STATS */ ++extern void ++add_user_stats(USER_STATS *user_stats, ++ uint total_connections, ++ uint concurrent_connections, ++ time_t connected_time, ++ double busy_time, ++ double cpu_time, ++ ulonglong bytes_received, ++ ulonglong bytes_sent, ++ ulonglong binlog_bytes_written, ++ ha_rows rows_fetched, ++ ha_rows rows_updated, ++ ha_rows rows_read, ++ ulonglong select_commands, ++ ulonglong update_commands, ++ ulonglong other_commands, ++ ulonglong commit_trans, ++ ulonglong rollback_trans, ++ ulonglong denied_connections, ++ ulonglong lost_connections, ++ ulonglong access_denied_errors, ++ ulonglong empty_queries); ++ ++typedef struct st_table_stats { ++ char table[NAME_LEN * 2 + 2]; // [db] + '.' + [table] + '\0' ++ ulonglong rows_read, rows_changed; ++ ulonglong rows_changed_x_indexes; ++ /* Stores enum db_type, but forward declarations cannot be done */ ++ int engine_type; ++} TABLE_STATS; ++ ++typedef struct st_index_stats { ++ char index[NAME_LEN * 3 + 3]; // [db] + '.' + [table] + '.' + [index] + '\0' ++ ulonglong rows_read; ++} INDEX_STATS; ++ ++ + /* Bits in form->update */ + #define REG_MAKE_DUPP 1 /* Make a copy of record when read */ + #define REG_NEW_RECORD 2 /* Write a new record if not found */ +diff -r 23e5576aa59a sql/table.h +--- a/sql/table.h Tue Feb 17 22:33:00 2009 -0800 ++++ b/sql/table.h Tue Feb 17 22:33:23 2009 -0800 +@@ -371,10 +371,12 @@ + enum enum_schema_tables + { + SCH_CHARSETS= 0, ++ SCH_CLIENT_STATS, + SCH_COLLATIONS, + SCH_COLLATION_CHARACTER_SET_APPLICABILITY, + SCH_COLUMNS, + SCH_COLUMN_PRIVILEGES, ++ SCH_INDEX_STATS, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, + SCH_PROFILES, +@@ -387,8 +389,10 @@ + SCH_TABLE_CONSTRAINTS, + SCH_TABLE_NAMES, + SCH_TABLE_PRIVILEGES, ++ SCH_TABLE_STATS, + SCH_TRIGGERS, + SCH_USER_PRIVILEGES, ++ SCH_USER_STATS, + SCH_VARIABLES, + SCH_VIEWS + }; +diff -r 23e5576aa59a strings/Makefile.in +--- a/strings/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/strings/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -342,6 +342,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a support-files/MacOSX/Makefile.in +--- a/support-files/MacOSX/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/support-files/MacOSX/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -148,6 +148,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a support-files/Makefile.in +--- a/support-files/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/support-files/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -171,6 +171,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a support-files/RHEL4-SElinux/Makefile.in +--- a/support-files/RHEL4-SElinux/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/support-files/RHEL4-SElinux/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -146,6 +146,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a tests/Makefile.in +--- a/tests/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/tests/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -193,6 +193,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @CLIENT_LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a tools/Makefile.in +--- a/tools/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/tools/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -167,6 +167,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a vio/Makefile.in +--- a/vio/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/vio/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -176,6 +176,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a win/Makefile.in +--- a/win/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/win/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -144,6 +144,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = @LIBS@ + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ +diff -r 23e5576aa59a zlib/Makefile.in +--- a/zlib/Makefile.in Tue Feb 17 22:33:00 2009 -0800 ++++ b/zlib/Makefile.in Tue Feb 17 22:33:23 2009 -0800 +@@ -187,6 +187,7 @@ + LIBDL = @LIBDL@ + LIBEDIT_LOBJECTS = @LIBEDIT_LOBJECTS@ + LIBOBJS = @LIBOBJS@ ++LIBRT = @LIBRT@ + LIBS = $(NON_THREADED_LIBS) + LIBTOOL = @LIBTOOL@ + LIB_EXTRA_CCFLAGS = @LIB_EXTRA_CCFLAGS@ |