Drizzled Public API Documentation

btr0cur.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1994, 2010, Innobase Oy. All Rights Reserved.
4 Copyright (C) 2008, Google Inc.
5 
6 Portions of this file contain modifications contributed and copyrighted by
7 Google, Inc. Those modifications are gratefully acknowledged and are described
8 briefly in the InnoDB documentation. The contributions by Google are
9 incorporated with their permission, and subject to the conditions contained in
10 the file COPYING.Google.
11 
12 This program is free software; you can redistribute it and/or modify it under
13 the terms of the GNU General Public License as published by the Free Software
14 Foundation; version 2 of the License.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 St, Fifth Floor, Boston, MA 02110-1301 USA
23 
24 *****************************************************************************/
25 
26 /**************************************************/
44 #include "btr0cur.h"
45 
46 #ifdef UNIV_NONINL
47 #include "btr0cur.ic"
48 #endif
49 
50 #include "row0upd.h"
51 #ifndef UNIV_HOTBACKUP
52 #include "mtr0log.h"
53 #include "page0page.h"
54 #include "page0zip.h"
55 #include "rem0rec.h"
56 #include "rem0cmp.h"
57 #include "buf0lru.h"
58 #include "btr0btr.h"
59 #include "btr0sea.h"
60 #include "row0purge.h"
61 #include "row0upd.h"
62 #include "trx0rec.h"
63 #include "trx0roll.h" /* trx_is_recv() */
64 #include "que0que.h"
65 #include "row0row.h"
66 #include "srv0srv.h"
67 #include "ibuf0ibuf.h"
68 #include "lock0lock.h"
69 #include "zlib.h"
70 
72 typedef enum btr_op_enum {
73  BTR_NO_OP = 0,
74  BTR_INSERT_OP,
75  BTR_INSERT_IGNORE_UNIQUE_OP,
76  BTR_DELETE_OP,
77  BTR_DELMARK_OP
78 } btr_op_t;
79 
80 #ifdef UNIV_DEBUG
81 
83 UNIV_INTERN ibool btr_cur_print_record_ops = FALSE;
84 #endif /* UNIV_DEBUG */
85 
87 UNIV_INTERN ulint btr_cur_n_non_sea = 0;
90 UNIV_INTERN ulint btr_cur_n_sea = 0;
94 UNIV_INTERN ulint btr_cur_n_non_sea_old = 0;
98 UNIV_INTERN ulint btr_cur_n_sea_old = 0;
99 
102 #define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
103 
105 /* @{ */
106 /*--------------------------------------*/
107 #define BTR_BLOB_HDR_PART_LEN 0
109 #define BTR_BLOB_HDR_NEXT_PAGE_NO 4
111 /*--------------------------------------*/
112 #define BTR_BLOB_HDR_SIZE 8
122 #define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\
123  (((value) * (ib_int64_t) index->stat_n_leaf_pages \
124  + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size)))
125 
126 /* @} */
127 #endif /* !UNIV_HOTBACKUP */
128 
133 
134 #ifndef UNIV_HOTBACKUP
135 /*******************************************************************/
139 static
140 void
141 btr_cur_unmark_extern_fields(
142 /*=========================*/
143  page_zip_des_t* page_zip,
145  rec_t* rec,
146  dict_index_t* index,
147  const ulint* offsets,
148  mtr_t* mtr);
149 /*******************************************************************/
152 static
153 void
154 btr_cur_add_path_info(
155 /*==================*/
156  btr_cur_t* cursor,
157  ulint height,
159  ulint root_height);
160 /***********************************************************/
163 static
164 void
165 btr_rec_free_updated_extern_fields(
166 /*===============================*/
167  dict_index_t* index,
169  rec_t* rec,
170  page_zip_des_t* page_zip,
172  const ulint* offsets,
173  const upd_t* update,
174  enum trx_rb_ctx rb_ctx,
175  mtr_t* mtr);
177 /***********************************************************/
179 static
180 void
181 btr_rec_free_externally_stored_fields(
182 /*==================================*/
183  dict_index_t* index,
185  rec_t* rec,
186  const ulint* offsets,
187  page_zip_des_t* page_zip,
189  enum trx_rb_ctx rb_ctx,
190  mtr_t* mtr);
193 /***********************************************************/
196 static
197 ulint
198 btr_rec_get_externally_stored_len(
199 /*==============================*/
200  const rec_t* rec,
201  const ulint* offsets);
202 #endif /* !UNIV_HOTBACKUP */
203 
204 /******************************************************/
206 UNIV_INLINE
207 void
208 btr_rec_set_deleted_flag(
209 /*=====================*/
210  rec_t* rec,
211  page_zip_des_t* page_zip,
212  ulint flag)
213 {
214  if (page_rec_is_comp(rec)) {
215  rec_set_deleted_flag_new(rec, page_zip, flag);
216  } else {
217  ut_ad(!page_zip);
218  rec_set_deleted_flag_old(rec, flag);
219  }
220 }
221 
222 #ifndef UNIV_HOTBACKUP
223 /*==================== B-TREE SEARCH =========================*/
224 
225 /********************************************************************/
227 static
228 void
229 btr_cur_latch_leaves(
230 /*=================*/
231  page_t* page,
233  ulint space,
234  ulint zip_size,
236  ulint page_no,
237  ulint latch_mode,
238  btr_cur_t* cursor,
239  mtr_t* mtr)
240 {
241  ulint mode;
242  ulint left_page_no;
243  ulint right_page_no;
244  buf_block_t* get_block;
245 
246  ut_ad(page && mtr);
247 
248  switch (latch_mode) {
249  case BTR_SEARCH_LEAF:
250  case BTR_MODIFY_LEAF:
251  mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
252  get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
253 #ifdef UNIV_BTR_DEBUG
254  ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
255 #endif /* UNIV_BTR_DEBUG */
256  get_block->check_index_page_at_flush = TRUE;
257  return;
258  case BTR_MODIFY_TREE:
259  /* x-latch also brothers from left to right */
260  left_page_no = btr_page_get_prev(page, mtr);
261 
262  if (left_page_no != FIL_NULL) {
263  get_block = btr_block_get(space, zip_size,
264  left_page_no,
265  RW_X_LATCH, mtr);
266 #ifdef UNIV_BTR_DEBUG
267  ut_a(page_is_comp(get_block->frame)
268  == page_is_comp(page));
269  ut_a(btr_page_get_next(get_block->frame, mtr)
270  == page_get_page_no(page));
271 #endif /* UNIV_BTR_DEBUG */
272  get_block->check_index_page_at_flush = TRUE;
273  }
274 
275  get_block = btr_block_get(space, zip_size, page_no,
276  RW_X_LATCH, mtr);
277 #ifdef UNIV_BTR_DEBUG
278  ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
279 #endif /* UNIV_BTR_DEBUG */
280  get_block->check_index_page_at_flush = TRUE;
281 
282  right_page_no = btr_page_get_next(page, mtr);
283 
284  if (right_page_no != FIL_NULL) {
285  get_block = btr_block_get(space, zip_size,
286  right_page_no,
287  RW_X_LATCH, mtr);
288 #ifdef UNIV_BTR_DEBUG
289  ut_a(page_is_comp(get_block->frame)
290  == page_is_comp(page));
291  ut_a(btr_page_get_prev(get_block->frame, mtr)
292  == page_get_page_no(page));
293 #endif /* UNIV_BTR_DEBUG */
294  get_block->check_index_page_at_flush = TRUE;
295  }
296 
297  return;
298 
299  case BTR_SEARCH_PREV:
300  case BTR_MODIFY_PREV:
301  mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
302  /* latch also left brother */
303  left_page_no = btr_page_get_prev(page, mtr);
304 
305  if (left_page_no != FIL_NULL) {
306  get_block = btr_block_get(space, zip_size,
307  left_page_no, mode, mtr);
308  cursor->left_block = get_block;
309 #ifdef UNIV_BTR_DEBUG
310  ut_a(page_is_comp(get_block->frame)
311  == page_is_comp(page));
312  ut_a(btr_page_get_next(get_block->frame, mtr)
313  == page_get_page_no(page));
314 #endif /* UNIV_BTR_DEBUG */
315  get_block->check_index_page_at_flush = TRUE;
316  }
317 
318  get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
319 #ifdef UNIV_BTR_DEBUG
320  ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
321 #endif /* UNIV_BTR_DEBUG */
322  get_block->check_index_page_at_flush = TRUE;
323  return;
324  }
325 
326  ut_error;
327 }
328 
329 /********************************************************************/
341 UNIV_INTERN
342 void
344 /*========================*/
345  dict_index_t* index,
346  ulint level,
347  const dtuple_t* tuple,
350  ulint mode,
353  ulint latch_mode,
364  btr_cur_t* cursor,
366  ulint has_search_latch,
369  const char* file,
370  ulint line,
371  mtr_t* mtr)
372 {
373  page_t* page;
374  buf_block_t* block;
375  ulint space;
376  buf_block_t* guess;
377  ulint height;
378  ulint page_no;
379  ulint up_match;
380  ulint up_bytes;
381  ulint low_match;
382  ulint low_bytes;
383  ulint savepoint;
384  ulint rw_latch;
385  ulint page_mode;
386  ulint buf_mode;
387  ulint estimate;
388  ulint zip_size;
389  page_cur_t* page_cursor;
390  btr_op_t btr_op;
391  ulint root_height = 0; /* remove warning */
392 
393 #ifdef BTR_CUR_ADAPT
394  btr_search_t* info;
395 #endif
396  mem_heap_t* heap = NULL;
397  ulint offsets_[REC_OFFS_NORMAL_SIZE];
398  ulint* offsets = offsets_;
399  rec_offs_init(offsets_);
400  /* Currently, PAGE_CUR_LE is the only search mode used for searches
401  ending to upper levels */
402 
403  ut_ad(level == 0 || mode == PAGE_CUR_LE);
404  ut_ad(dict_index_check_search_tuple(index, tuple));
405  ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr));
406  ut_ad(dtuple_check_typed(tuple));
407 
408 #ifdef UNIV_DEBUG
409  cursor->up_match = ULINT_UNDEFINED;
410  cursor->low_match = ULINT_UNDEFINED;
411 #endif
412 
413  /* These flags are mutually exclusive, they are lumped together
414  with the latch mode for historical reasons. It's possible for
415  none of the flags to be set. */
416  switch (UNIV_EXPECT(latch_mode
418  0)) {
419  case 0:
420  btr_op = BTR_NO_OP;
421  break;
422  case BTR_INSERT:
423  btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE)
424  ? BTR_INSERT_IGNORE_UNIQUE_OP
425  : BTR_INSERT_OP;
426  break;
427  case BTR_DELETE:
428  btr_op = BTR_DELETE_OP;
429  ut_a(cursor->purge_node);
430  break;
431  case BTR_DELETE_MARK:
432  btr_op = BTR_DELMARK_OP;
433  break;
434  default:
435  /* only one of BTR_INSERT, BTR_DELETE, BTR_DELETE_MARK
436  should be specified at a time */
437  ut_error;
438  }
439 
440  /* Operations on the insert buffer tree cannot be buffered. */
441  ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
442  /* Operations on the clustered index cannot be buffered. */
443  ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
444 
445  estimate = latch_mode & BTR_ESTIMATE;
446 
447  /* Turn the flags unrelated to the latch mode off. */
448  latch_mode &= ~(BTR_INSERT
450  | BTR_DELETE
451  | BTR_ESTIMATE
453 
454  cursor->flag = BTR_CUR_BINARY;
455  cursor->index = index;
456 
457  cursor->ibuf_cnt = ULINT_UNDEFINED;
458 
459 #ifndef BTR_CUR_ADAPT
460  guess = NULL;
461 #else
462  info = btr_search_get_info(index);
463 
464  guess = info->root_guess;
465 
466 #ifdef BTR_CUR_HASH_ADAPT
467 
468 #ifdef UNIV_SEARCH_PERF_STAT
469  info->n_searches++;
470 #endif
471  if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
472  && latch_mode <= BTR_MODIFY_LEAF
473  && info->last_hash_succ
474  && !estimate
475 #ifdef PAGE_CUR_LE_OR_EXTENDS
476  && mode != PAGE_CUR_LE_OR_EXTENDS
477 #endif /* PAGE_CUR_LE_OR_EXTENDS */
478  /* If !has_search_latch, we do a dirty read of
479  btr_search_enabled below, and btr_search_guess_on_hash()
480  will have to check it again. */
481  && UNIV_LIKELY(btr_search_enabled)
482  && btr_search_guess_on_hash(index, info, tuple, mode,
483  latch_mode, cursor,
484  has_search_latch, mtr)) {
485 
486  /* Search using the hash index succeeded */
487 
488  ut_ad(cursor->up_match != ULINT_UNDEFINED
489  || mode != PAGE_CUR_GE);
490  ut_ad(cursor->up_match != ULINT_UNDEFINED
491  || mode != PAGE_CUR_LE);
492  ut_ad(cursor->low_match != ULINT_UNDEFINED
493  || mode != PAGE_CUR_LE);
494  btr_cur_n_sea++;
495 
496  return;
497  }
498 #endif /* BTR_CUR_HASH_ADAPT */
499 #endif /* BTR_CUR_ADAPT */
501 
502  /* If the hash search did not succeed, do binary search down the
503  tree */
504 
505  if (has_search_latch) {
506  /* Release possible search latch to obey latching order */
507  rw_lock_s_unlock(&btr_search_latch);
508  }
509 
510  /* Store the position of the tree latch we push to mtr so that we
511  know how to release it when we have latched leaf node(s) */
512 
513  savepoint = mtr_set_savepoint(mtr);
514 
515  if (latch_mode == BTR_MODIFY_TREE) {
516  mtr_x_lock(dict_index_get_lock(index), mtr);
517 
518  } else if (latch_mode == BTR_CONT_MODIFY_TREE) {
519  /* Do nothing */
520  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
521  MTR_MEMO_X_LOCK));
522  } else {
523  mtr_s_lock(dict_index_get_lock(index), mtr);
524  }
525 
526  page_cursor = btr_cur_get_page_cur(cursor);
527 
528  space = dict_index_get_space(index);
529  page_no = dict_index_get_page(index);
530 
531  up_match = 0;
532  up_bytes = 0;
533  low_match = 0;
534  low_bytes = 0;
535 
536  height = ULINT_UNDEFINED;
537 
538  /* We use these modified search modes on non-leaf levels of the
539  B-tree. These let us end up in the right B-tree leaf. In that leaf
540  we use the original search mode. */
541 
542  switch (mode) {
543  case PAGE_CUR_GE:
544  page_mode = PAGE_CUR_L;
545  break;
546  case PAGE_CUR_G:
547  page_mode = PAGE_CUR_LE;
548  break;
549  default:
550 #ifdef PAGE_CUR_LE_OR_EXTENDS
551  ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
552  || mode == PAGE_CUR_LE_OR_EXTENDS);
553 #else /* PAGE_CUR_LE_OR_EXTENDS */
554  ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
555 #endif /* PAGE_CUR_LE_OR_EXTENDS */
556  page_mode = mode;
557  break;
558  }
559 
560  /* Loop and search until we arrive at the desired level */
561 
562 search_loop:
563  buf_mode = BUF_GET;
564  rw_latch = RW_NO_LATCH;
565 
566  if (height != 0) {
567  /* We are about to fetch the root or a non-leaf page. */
568  } else if (latch_mode <= BTR_MODIFY_LEAF) {
569  rw_latch = latch_mode;
570 
571  if (btr_op != BTR_NO_OP
572  && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) {
573 
574  /* Try to buffer the operation if the leaf
575  page is not in the buffer pool. */
576 
577  buf_mode = btr_op == BTR_DELETE_OP
580  }
581  }
582 
583  zip_size = dict_table_zip_size(index->table);
584 
585 retry_page_get:
586  block = buf_page_get_gen(
587  space, zip_size, page_no, rw_latch, guess, buf_mode,
588  file, line, mtr);
589 
590  if (block == NULL) {
591  /* This must be a search to perform an insert/delete
592  mark/ delete; try using the insert/delete buffer */
593 
594  ut_ad(height == 0);
595  ut_ad(cursor->thr);
596 
597  switch (btr_op) {
598  case BTR_INSERT_OP:
599  case BTR_INSERT_IGNORE_UNIQUE_OP:
600  ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
601 
602  if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
603  space, zip_size, page_no,
604  cursor->thr)) {
605 
606  cursor->flag = BTR_CUR_INSERT_TO_IBUF;
607 
608  goto func_exit;
609  }
610  break;
611 
612  case BTR_DELMARK_OP:
613  ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
614 
615  if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
616  index, space, zip_size,
617  page_no, cursor->thr)) {
618 
619  cursor->flag = BTR_CUR_DEL_MARK_IBUF;
620 
621  goto func_exit;
622  }
623 
624  break;
625 
626  case BTR_DELETE_OP:
627  ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
628 
629  if (!row_purge_poss_sec(cursor->purge_node,
630  index, tuple)) {
631 
632  /* The record cannot be purged yet. */
633  cursor->flag = BTR_CUR_DELETE_REF;
634  } else if (ibuf_insert(IBUF_OP_DELETE, tuple,
635  index, space, zip_size,
636  page_no,
637  cursor->thr)) {
638 
639  /* The purge was buffered. */
640  cursor->flag = BTR_CUR_DELETE_IBUF;
641  } else {
642  /* The purge could not be buffered. */
643  buf_pool_watch_unset(space, page_no);
644  break;
645  }
646 
647  buf_pool_watch_unset(space, page_no);
648  goto func_exit;
649 
650  default:
651  ut_error;
652  }
653 
654  /* Insert to the insert/delete buffer did not succeed, we
655  must read the page from disk. */
656 
657  buf_mode = BUF_GET;
658 
659  goto retry_page_get;
660  }
661 
662  block->check_index_page_at_flush = TRUE;
663  page = buf_block_get_frame(block);
664 
665  if (rw_latch != RW_NO_LATCH) {
666 #ifdef UNIV_ZIP_DEBUG
667  const page_zip_des_t* page_zip
668  = buf_block_get_page_zip(block);
669  ut_a(!page_zip || page_zip_validate(page_zip, page));
670 #endif /* UNIV_ZIP_DEBUG */
671 
672  buf_block_dbg_add_level(block, SYNC_TREE_NODE);
673  }
674 
675  ut_ad(index->id == btr_page_get_index_id(page));
676 
677  if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
678  /* We are in the root node */
679 
680  height = btr_page_get_level(page, mtr);
681  root_height = height;
682  cursor->tree_height = root_height + 1;
683 
684 #ifdef BTR_CUR_ADAPT
685  if (block != guess) {
686  info->root_guess = block;
687  }
688 #endif
689  }
690 
691  if (height == 0) {
692  if (rw_latch == RW_NO_LATCH) {
693 
694  btr_cur_latch_leaves(
695  page, space, zip_size, page_no, latch_mode,
696  cursor, mtr);
697  }
698 
699  if (latch_mode != BTR_MODIFY_TREE
700  && latch_mode != BTR_CONT_MODIFY_TREE) {
701 
702  /* Release the tree s-latch */
703 
705  mtr, savepoint, dict_index_get_lock(index));
706  }
707 
708  page_mode = mode;
709  }
710 
712  block, index, tuple, page_mode, &up_match, &up_bytes,
713  &low_match, &low_bytes, page_cursor);
714 
715  if (estimate) {
716  btr_cur_add_path_info(cursor, height, root_height);
717  }
718 
719  /* If this is the desired level, leave the loop */
720 
721  ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
722  mtr));
723 
724  if (level != height) {
725 
726  const rec_t* node_ptr;
727  ut_ad(height > 0);
728 
729  height--;
730  guess = NULL;
731 
732  node_ptr = page_cur_get_rec(page_cursor);
733 
734  offsets = rec_get_offsets(
735  node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
736 
737  /* Go to the child node */
738  page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
739 
740  if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
741  /* We're doing a search on an ibuf tree and we're one
742  level above the leaf page. */
743 
744  ulint is_min_rec;
745 
746  ut_ad(level == 0);
747 
748  is_min_rec = rec_get_info_bits(node_ptr, 0)
749  & REC_INFO_MIN_REC_FLAG;
750 
751  if (!is_min_rec) {
752  cursor->ibuf_cnt
753  = ibuf_rec_get_counter(node_ptr);
754 
755  ut_a(cursor->ibuf_cnt <= 0xFFFF
756  || cursor->ibuf_cnt == ULINT_UNDEFINED);
757  }
758 
759  buf_mode = BUF_GET;
760  rw_latch = RW_NO_LATCH;
761  goto retry_page_get;
762  }
763 
764  goto search_loop;
765  }
766 
767  if (level != 0) {
768  /* x-latch the page */
769  page = btr_page_get(
770  space, zip_size, page_no, RW_X_LATCH, mtr);
771 
772  ut_a((ibool)!!page_is_comp(page)
773  == dict_table_is_comp(index->table));
774  } else {
775  cursor->low_match = low_match;
776  cursor->low_bytes = low_bytes;
777  cursor->up_match = up_match;
778  cursor->up_bytes = up_bytes;
779 
780 #ifdef BTR_CUR_ADAPT
781  /* We do a dirty read of btr_search_enabled here. We
782  will properly check btr_search_enabled again in
783  btr_search_build_page_hash_index() before building a
784  page hash index, while holding btr_search_latch. */
785  if (UNIV_LIKELY(btr_search_enabled)) {
786 
787  btr_search_info_update(index, cursor);
788  }
789 #endif
790  ut_ad(cursor->up_match != ULINT_UNDEFINED
791  || mode != PAGE_CUR_GE);
792  ut_ad(cursor->up_match != ULINT_UNDEFINED
793  || mode != PAGE_CUR_LE);
794  ut_ad(cursor->low_match != ULINT_UNDEFINED
795  || mode != PAGE_CUR_LE);
796  }
797 
798 func_exit:
799 
800  if (UNIV_LIKELY_NULL(heap)) {
801  mem_heap_free(heap);
802  }
803 
804  if (has_search_latch) {
805 
807  }
808 }
809 
810 /*****************************************************************/
812 UNIV_INTERN
813 void
815 /*============================*/
816  ibool from_left,
818  dict_index_t* index,
819  ulint latch_mode,
820  btr_cur_t* cursor,
821  const char* file,
822  ulint line,
823  mtr_t* mtr)
824 {
825  page_cur_t* page_cursor;
826  ulint page_no;
827  ulint space;
828  ulint zip_size;
829  ulint height;
830  ulint root_height = 0; /* remove warning */
831  rec_t* node_ptr;
832  ulint estimate;
833  ulint savepoint;
834  mem_heap_t* heap = NULL;
835  ulint offsets_[REC_OFFS_NORMAL_SIZE];
836  ulint* offsets = offsets_;
837  rec_offs_init(offsets_);
838 
839  estimate = latch_mode & BTR_ESTIMATE;
840  latch_mode = latch_mode & ~BTR_ESTIMATE;
841 
842  /* Store the position of the tree latch we push to mtr so that we
843  know how to release it when we have latched the leaf node */
844 
845  savepoint = mtr_set_savepoint(mtr);
846 
847  if (latch_mode == BTR_MODIFY_TREE) {
848  mtr_x_lock(dict_index_get_lock(index), mtr);
849  } else {
850  mtr_s_lock(dict_index_get_lock(index), mtr);
851  }
852 
853  page_cursor = btr_cur_get_page_cur(cursor);
854  cursor->index = index;
855 
856  space = dict_index_get_space(index);
857  zip_size = dict_table_zip_size(index->table);
858  page_no = dict_index_get_page(index);
859 
860  height = ULINT_UNDEFINED;
861 
862  for (;;) {
863  buf_block_t* block;
864  page_t* page;
865  block = buf_page_get_gen(space, zip_size, page_no,
866  RW_NO_LATCH, NULL, BUF_GET,
867  file, line, mtr);
868  page = buf_block_get_frame(block);
869  ut_ad(index->id == btr_page_get_index_id(page));
870 
871  block->check_index_page_at_flush = TRUE;
872 
873  if (height == ULINT_UNDEFINED) {
874  /* We are in the root node */
875 
876  height = btr_page_get_level(page, mtr);
877  root_height = height;
878  }
879 
880  if (height == 0) {
881  btr_cur_latch_leaves(page, space, zip_size, page_no,
882  latch_mode, cursor, mtr);
883 
884  /* In versions <= 3.23.52 we had forgotten to
885  release the tree latch here. If in an index scan
886  we had to scan far to find a record visible to the
887  current transaction, that could starve others
888  waiting for the tree latch. */
889 
890  if ((latch_mode != BTR_MODIFY_TREE)
891  && (latch_mode != BTR_CONT_MODIFY_TREE)) {
892 
893  /* Release the tree s-latch */
894 
896  mtr, savepoint,
897  dict_index_get_lock(index));
898  }
899  }
900 
901  if (from_left) {
902  page_cur_set_before_first(block, page_cursor);
903  } else {
904  page_cur_set_after_last(block, page_cursor);
905  }
906 
907  if (height == 0) {
908  if (estimate) {
909  btr_cur_add_path_info(cursor, height,
910  root_height);
911  }
912 
913  break;
914  }
915 
916  ut_ad(height > 0);
917 
918  if (from_left) {
919  page_cur_move_to_next(page_cursor);
920  } else {
921  page_cur_move_to_prev(page_cursor);
922  }
923 
924  if (estimate) {
925  btr_cur_add_path_info(cursor, height, root_height);
926  }
927 
928  height--;
929 
930  node_ptr = page_cur_get_rec(page_cursor);
931  offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
932  ULINT_UNDEFINED, &heap);
933  /* Go to the child node */
934  page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
935  }
936 
937  if (UNIV_LIKELY_NULL(heap)) {
938  mem_heap_free(heap);
939  }
940 }
941 
942 /**********************************************************************/
944 UNIV_INTERN
945 void
947 /*=========================*/
948  dict_index_t* index,
949  ulint latch_mode,
950  btr_cur_t* cursor,
951  const char* file,
952  ulint line,
953  mtr_t* mtr)
954 {
955  page_cur_t* page_cursor;
956  ulint page_no;
957  ulint space;
958  ulint zip_size;
959  ulint height;
960  rec_t* node_ptr;
961  mem_heap_t* heap = NULL;
962  ulint offsets_[REC_OFFS_NORMAL_SIZE];
963  ulint* offsets = offsets_;
964  rec_offs_init(offsets_);
965 
966  if (latch_mode == BTR_MODIFY_TREE) {
967  mtr_x_lock(dict_index_get_lock(index), mtr);
968  } else {
969  mtr_s_lock(dict_index_get_lock(index), mtr);
970  }
971 
972  page_cursor = btr_cur_get_page_cur(cursor);
973  cursor->index = index;
974 
975  space = dict_index_get_space(index);
976  zip_size = dict_table_zip_size(index->table);
977  page_no = dict_index_get_page(index);
978 
979  height = ULINT_UNDEFINED;
980 
981  for (;;) {
982  buf_block_t* block;
983  page_t* page;
984 
985  block = buf_page_get_gen(space, zip_size, page_no,
986  RW_NO_LATCH, NULL, BUF_GET,
987  file, line, mtr);
988  page = buf_block_get_frame(block);
989  ut_ad(index->id == btr_page_get_index_id(page));
990 
991  if (height == ULINT_UNDEFINED) {
992  /* We are in the root node */
993 
994  height = btr_page_get_level(page, mtr);
995  }
996 
997  if (height == 0) {
998  btr_cur_latch_leaves(page, space, zip_size, page_no,
999  latch_mode, cursor, mtr);
1000  }
1001 
1002  page_cur_open_on_rnd_user_rec(block, page_cursor);
1003 
1004  if (height == 0) {
1005 
1006  break;
1007  }
1008 
1009  ut_ad(height > 0);
1010 
1011  height--;
1012 
1013  node_ptr = page_cur_get_rec(page_cursor);
1014  offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
1015  ULINT_UNDEFINED, &heap);
1016  /* Go to the child node */
1017  page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
1018  }
1019 
1020  if (UNIV_LIKELY_NULL(heap)) {
1021  mem_heap_free(heap);
1022  }
1023 }
1024 
1025 /*==================== B-TREE INSERT =========================*/
1026 
1027 /*************************************************************/
1033 static
1034 rec_t*
1035 btr_cur_insert_if_possible(
1036 /*=======================*/
1037  btr_cur_t* cursor,
1039  const dtuple_t* tuple,
1041  ulint n_ext,
1042  mtr_t* mtr)
1043 {
1044  page_cur_t* page_cursor;
1045  buf_block_t* block;
1046  rec_t* rec;
1047 
1048  ut_ad(dtuple_check_typed(tuple));
1049 
1050  block = btr_cur_get_block(cursor);
1051 
1052  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1053  page_cursor = btr_cur_get_page_cur(cursor);
1054 
1055  /* Now, try the insert */
1056  rec = page_cur_tuple_insert(page_cursor, tuple,
1057  cursor->index, n_ext, mtr);
1058 
1059  if (UNIV_UNLIKELY(!rec)) {
1060  /* If record did not fit, reorganize */
1061 
1062  if (btr_page_reorganize(block, cursor->index, mtr)) {
1063 
1064  page_cur_search(block, cursor->index, tuple,
1065  PAGE_CUR_LE, page_cursor);
1066 
1067  rec = page_cur_tuple_insert(page_cursor, tuple,
1068  cursor->index, n_ext, mtr);
1069  }
1070  }
1071 
1072  return(rec);
1073 }
1074 
1075 /*************************************************************/
1078 UNIV_INLINE
1079 ulint
1080 btr_cur_ins_lock_and_undo(
1081 /*======================*/
1082  ulint flags,
1085  btr_cur_t* cursor,
1086  dtuple_t* entry,
1087  que_thr_t* thr,
1088  mtr_t* mtr,
1089  ibool* inherit)
1092 {
1093  dict_index_t* index;
1094  ulint err;
1095  rec_t* rec;
1096  roll_ptr_t roll_ptr;
1097 
1098  /* Check if we have to wait for a lock: enqueue an explicit lock
1099  request if yes */
1100 
1101  rec = btr_cur_get_rec(cursor);
1102  index = cursor->index;
1103 
1104  err = lock_rec_insert_check_and_lock(flags, rec,
1105  btr_cur_get_block(cursor),
1106  index, thr, mtr, inherit);
1107 
1108  if (err != DB_SUCCESS) {
1109 
1110  return(err);
1111  }
1112 
1113  if (dict_index_is_clust(index) && !dict_index_is_ibuf(index)) {
1114 
1115  err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
1116  thr, index, entry,
1117  NULL, 0, NULL,
1118  &roll_ptr);
1119  if (err != DB_SUCCESS) {
1120 
1121  return(err);
1122  }
1123 
1124  /* Now we can fill in the roll ptr field in entry */
1125 
1126  if (!(flags & BTR_KEEP_SYS_FLAG)) {
1127 
1128  row_upd_index_entry_sys_field(entry, index,
1129  DATA_ROLL_PTR, roll_ptr);
1130  }
1131  }
1132 
1133  return(DB_SUCCESS);
1134 }
1135 
1136 #ifdef UNIV_DEBUG
1137 /*************************************************************/
1139 static
1140 void
1141 btr_cur_trx_report(
1142 /*===============*/
1143  trx_t* trx,
1144  const dict_index_t* index,
1145  const char* op)
1146 {
1147  fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ",
1148  (ullint) trx->id);
1149  fputs(op, stderr);
1150  dict_index_name_print(stderr, trx, index);
1151  putc('\n', stderr);
1152 }
1153 #endif /* UNIV_DEBUG */
1154 
1155 /*************************************************************/
1162 UNIV_INTERN
1163 ulint
1165 /*======================*/
1166  ulint flags,
1169  btr_cur_t* cursor,
1171  dtuple_t* entry,
1172  rec_t** rec,
1174  big_rec_t** big_rec,
1177  ulint n_ext,
1178  que_thr_t* thr,
1179  mtr_t* mtr)
1184 {
1185  big_rec_t* big_rec_vec = NULL;
1186  dict_index_t* index;
1187  page_cur_t* page_cursor;
1188  buf_block_t* block;
1189  page_t* page;
1190  ulint max_size;
1191  rec_t* dummy_rec;
1192  ibool leaf;
1193  ibool reorg;
1194  ibool inherit;
1195  ulint zip_size;
1196  ulint rec_size;
1197  ulint err;
1198 
1199  *big_rec = NULL;
1200 
1201  block = btr_cur_get_block(cursor);
1202  page = buf_block_get_frame(block);
1203  index = cursor->index;
1204  zip_size = buf_block_get_zip_size(block);
1205 #ifdef UNIV_DEBUG_VALGRIND
1206  if (zip_size) {
1207  UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1208  UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
1209  }
1210 #endif /* UNIV_DEBUG_VALGRIND */
1211 
1212  if (!dtuple_check_typed_no_assert(entry)) {
1213  fputs("InnoDB: Error in a tuple to insert into ", stderr);
1214  dict_index_name_print(stderr, thr_get_trx(thr), index);
1215  }
1216 #ifdef UNIV_DEBUG
1217  if (btr_cur_print_record_ops && thr) {
1218  btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
1219  dtuple_print(stderr, entry);
1220  }
1221 #endif /* UNIV_DEBUG */
1222 
1223  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1224  max_size = page_get_max_insert_size_after_reorganize(page, 1);
1225  leaf = page_is_leaf(page);
1226 
1227  /* Calculate the record size when entry is converted to a record */
1228  rec_size = rec_get_converted_size(index, entry, n_ext);
1229 
1230  if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
1231  dtuple_get_n_fields(entry), zip_size)) {
1232 
1233  /* The record is so big that we have to store some fields
1234  externally on separate database pages */
1235  big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
1236 
1237  if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
1238 
1239  return(DB_TOO_BIG_RECORD);
1240  }
1241 
1242  rec_size = rec_get_converted_size(index, entry, n_ext);
1243  }
1244 
1245  if (UNIV_UNLIKELY(zip_size)) {
1246  /* Estimate the free space of an empty compressed page.
1247  Subtract one byte for the encoded heap_no in the
1248  modification log. */
1249  ulint free_space_zip = page_zip_empty_size(
1250  cursor->index->n_fields, zip_size) - 1;
1251  ulint n_uniq = dict_index_get_n_unique_in_tree(index);
1252 
1253  ut_ad(dict_table_is_comp(index->table));
1254 
1255  /* There should be enough room for two node pointer
1256  records on an empty non-leaf page. This prevents
1257  infinite page splits. */
1258 
1259  if (UNIV_LIKELY(entry->n_fields >= n_uniq)
1260  && UNIV_UNLIKELY(REC_NODE_PTR_SIZE
1262  index, entry->fields, n_uniq,
1263  NULL)
1264  /* On a compressed page, there is
1265  a two-byte entry in the dense
1266  page directory for every record.
1267  But there is no record header. */
1268  - (REC_N_NEW_EXTRA_BYTES - 2)
1269  > free_space_zip / 2)) {
1270 
1271  if (big_rec_vec) {
1273  index, entry, big_rec_vec);
1274  }
1275 
1276  return(DB_TOO_BIG_RECORD);
1277  }
1278  }
1279 
1280  /* If there have been many consecutive inserts, and we are on the leaf
1281  level, check if we have to split the page to reserve enough free space
1282  for future updates of records. */
1283 
1284  if (dict_index_is_clust(index)
1285  && (page_get_n_recs(page) >= 2)
1286  && UNIV_LIKELY(leaf)
1287  && (dict_index_get_space_reserve() + rec_size > max_size)
1288  && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
1289  || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
1290 fail:
1291  err = DB_FAIL;
1292 fail_err:
1293 
1294  if (big_rec_vec) {
1295  dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1296  }
1297 
1298  return(err);
1299  }
1300 
1301  if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
1302  || max_size < rec_size)
1303  && UNIV_LIKELY(page_get_n_recs(page) > 1)
1304  && page_get_max_insert_size(page, 1) < rec_size) {
1305 
1306  goto fail;
1307  }
1308 
1309  /* Check locks and write to the undo log, if specified */
1310  err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
1311  thr, mtr, &inherit);
1312 
1313  if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1314 
1315  goto fail_err;
1316  }
1317 
1318  page_cursor = btr_cur_get_page_cur(cursor);
1319 
1320  /* Now, try the insert */
1321 
1322  {
1323  const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
1324  *rec = page_cur_tuple_insert(page_cursor, entry, index,
1325  n_ext, mtr);
1326  reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
1327 
1328  if (UNIV_UNLIKELY(reorg)) {
1329  ut_a(zip_size);
1330  ut_a(*rec);
1331  }
1332  }
1333 
1334  if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
1335  /* If the record did not fit, reorganize */
1336  if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
1337  ut_a(zip_size);
1338 
1339  goto fail;
1340  }
1341 
1342  ut_ad(zip_size
1343  || page_get_max_insert_size(page, 1) == max_size);
1344 
1345  reorg = TRUE;
1346 
1347  page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
1348 
1349  *rec = page_cur_tuple_insert(page_cursor, entry, index,
1350  n_ext, mtr);
1351 
1352  if (UNIV_UNLIKELY(!*rec)) {
1353  if (UNIV_LIKELY(zip_size != 0)) {
1354 
1355  goto fail;
1356  }
1357 
1358  fputs("InnoDB: Error: cannot insert tuple ", stderr);
1359  dtuple_print(stderr, entry);
1360  fputs(" into ", stderr);
1361  dict_index_name_print(stderr, thr_get_trx(thr), index);
1362  fprintf(stderr, "\nInnoDB: max insert size %lu\n",
1363  (ulong) max_size);
1364  ut_error;
1365  }
1366  }
1367 
1368 #ifdef BTR_CUR_HASH_ADAPT
1369  if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
1371  } else {
1373  }
1374 #endif
1375 
1376  if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
1377 
1378  lock_update_insert(block, *rec);
1379  }
1380 
1381 #if 0
1382  fprintf(stderr, "Insert into page %lu, max ins size %lu,"
1383  " rec %lu ind type %lu\n",
1384  buf_block_get_page_no(block), max_size,
1385  rec_size + PAGE_DIR_SLOT_SIZE, index->type);
1386 #endif
1387  if (leaf && !dict_index_is_clust(index)) {
1388  /* Update the free bits of the B-tree page in the
1389  insert buffer bitmap. */
1390 
1391  /* The free bits in the insert buffer bitmap must
1392  never exceed the free space on a page. It is safe to
1393  decrement or reset the bits in the bitmap in a
1394  mini-transaction that is committed before the
1395  mini-transaction that affects the free space. */
1396 
1397  /* It is unsafe to increment the bits in a separately
1398  committed mini-transaction, because in crash recovery,
1399  the free bits could momentarily be set too high. */
1400 
1401  if (zip_size) {
1402  /* Update the bits in the same mini-transaction. */
1403  ibuf_update_free_bits_zip(block, mtr);
1404  } else {
1405  /* Decrement the bits in a separate
1406  mini-transaction. */
1408  block, max_size,
1409  rec_size + PAGE_DIR_SLOT_SIZE);
1410  }
1411  }
1412 
1413  *big_rec = big_rec_vec;
1414 
1415  return(DB_SUCCESS);
1416 }
1417 
1418 /*************************************************************/
1424 UNIV_INTERN
1425 ulint
1427 /*=======================*/
1428  ulint flags,
1434  btr_cur_t* cursor,
1436  dtuple_t* entry,
1437  rec_t** rec,
1439  big_rec_t** big_rec,
1442  ulint n_ext,
1443  que_thr_t* thr,
1444  mtr_t* mtr)
1445 {
1446  dict_index_t* index = cursor->index;
1447  ulint zip_size = dict_table_zip_size(index->table);
1448  big_rec_t* big_rec_vec = NULL;
1449  mem_heap_t* heap = NULL;
1450  ulint err;
1451  ibool dummy_inh;
1452  ibool success;
1453  ulint n_extents = 0;
1454  ulint n_reserved;
1455 
1456  ut_ad(dtuple_check_typed(entry));
1457 
1458  *big_rec = NULL;
1459 
1460  ut_ad(mtr_memo_contains(mtr,
1462  MTR_MEMO_X_LOCK));
1463  ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
1464  MTR_MEMO_PAGE_X_FIX));
1465 
1466  /* Try first an optimistic insert; reset the cursor flag: we do not
1467  assume anything of how it was positioned */
1468 
1469  cursor->flag = BTR_CUR_BINARY;
1470 
1471  err = btr_cur_optimistic_insert(flags, cursor, entry, rec,
1472  big_rec, n_ext, thr, mtr);
1473  if (err != DB_FAIL) {
1474 
1475  return(err);
1476  }
1477 
1478  /* Retry with a pessimistic insert. Check locks and write to undo log,
1479  if specified */
1480 
1481  err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
1482  thr, mtr, &dummy_inh);
1483 
1484  if (err != DB_SUCCESS) {
1485 
1486  return(err);
1487  }
1488 
1489  if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
1490  /* First reserve enough free space for the file segments
1491  of the index tree, so that the insert will not fail because
1492  of lack of space */
1493 
1494  n_extents = cursor->tree_height / 16 + 3;
1495 
1496  success = fsp_reserve_free_extents(&n_reserved, index->space,
1497  n_extents, FSP_NORMAL, mtr);
1498  if (!success) {
1499  return(DB_OUT_OF_FILE_SPACE);
1500  }
1501  }
1502 
1503  if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
1504  dict_table_is_comp(index->table),
1505  dict_index_get_n_fields(index),
1506  zip_size)) {
1507  /* The record is so big that we have to store some fields
1508  externally on separate database pages */
1509 
1510  if (UNIV_LIKELY_NULL(big_rec_vec)) {
1511  /* This should never happen, but we handle
1512  the situation in a robust manner. */
1513  ut_ad(0);
1514  dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1515  }
1516 
1517  big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
1518 
1519  if (big_rec_vec == NULL) {
1520 
1521  if (n_extents > 0) {
1523  n_reserved);
1524  }
1525  return(DB_TOO_BIG_RECORD);
1526  }
1527  }
1528 
1529  if (dict_index_get_page(index)
1531 
1532  /* The page is the root page */
1533  *rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
1534  } else {
1535  *rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
1536  }
1537 
1538  if (UNIV_LIKELY_NULL(heap)) {
1539  mem_heap_free(heap);
1540  }
1541 
1542  ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
1543 
1544 #ifdef BTR_CUR_ADAPT
1546 #endif
1547  if (!(flags & BTR_NO_LOCKING_FLAG)) {
1548 
1549  lock_update_insert(btr_cur_get_block(cursor), *rec);
1550  }
1551 
1552  if (n_extents > 0) {
1553  fil_space_release_free_extents(index->space, n_reserved);
1554  }
1555 
1556  *big_rec = big_rec_vec;
1557 
1558  return(DB_SUCCESS);
1559 }
1560 
1561 /*==================== B-TREE UPDATE =========================*/
1562 
1563 /*************************************************************/
1566 UNIV_INLINE
1567 ulint
1568 btr_cur_upd_lock_and_undo(
1569 /*======================*/
1570  ulint flags,
1571  btr_cur_t* cursor,
1572  const upd_t* update,
1573  ulint cmpl_info,
1575  que_thr_t* thr,
1576  mtr_t* mtr,
1577  roll_ptr_t* roll_ptr)
1578 {
1579  dict_index_t* index;
1580  rec_t* rec;
1581  ulint err;
1582 
1583  ut_ad(cursor && update && thr && roll_ptr);
1584 
1585  rec = btr_cur_get_rec(cursor);
1586  index = cursor->index;
1587 
1588  if (!dict_index_is_clust(index)) {
1589  /* We do undo logging only when we update a clustered index
1590  record */
1592  flags, btr_cur_get_block(cursor), rec,
1593  index, thr, mtr));
1594  }
1595 
1596  /* Check if we have to wait for a lock: enqueue an explicit lock
1597  request if yes */
1598 
1599  err = DB_SUCCESS;
1600 
1601  if (!(flags & BTR_NO_LOCKING_FLAG)) {
1602  mem_heap_t* heap = NULL;
1603  ulint offsets_[REC_OFFS_NORMAL_SIZE];
1604  rec_offs_init(offsets_);
1605 
1607  flags, btr_cur_get_block(cursor), rec, index,
1608  rec_get_offsets(rec, index, offsets_,
1609  ULINT_UNDEFINED, &heap), thr);
1610  if (UNIV_LIKELY_NULL(heap)) {
1611  mem_heap_free(heap);
1612  }
1613  if (err != DB_SUCCESS) {
1614 
1615  return(err);
1616  }
1617  }
1618 
1619  /* Append the info about the update in the undo log */
1620 
1621  err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
1622  index, NULL, update,
1623  cmpl_info, rec, roll_ptr);
1624  return(err);
1625 }
1626 
1627 /***********************************************************/
1629 UNIV_INLINE
1630 void
1631 btr_cur_update_in_place_log(
1632 /*========================*/
1633  ulint flags,
1634  rec_t* rec,
1635  dict_index_t* index,
1636  const upd_t* update,
1637  trx_t* trx,
1638  roll_ptr_t roll_ptr,
1639  mtr_t* mtr)
1640 {
1641  byte* log_ptr;
1642  page_t* page = page_align(rec);
1643  ut_ad(flags < 256);
1644  ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
1645 
1646  log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
1649  1 + DATA_ROLL_PTR_LEN + 14 + 2
1650  + MLOG_BUF_MARGIN);
1651 
1652  if (!log_ptr) {
1653  /* Logging in mtr is switched off during crash recovery */
1654  return;
1655  }
1656 
1657  /* The code below assumes index is a clustered index: change index to
1658  the clustered index if we are updating a secondary index record (or we
1659  could as well skip writing the sys col values to the log in this case
1660  because they are not needed for a secondary index record update) */
1661 
1662  index = dict_table_get_first_index(index->table);
1663 
1664  mach_write_to_1(log_ptr, flags);
1665  log_ptr++;
1666 
1667  log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
1668  mtr);
1669  mach_write_to_2(log_ptr, page_offset(rec));
1670  log_ptr += 2;
1671 
1672  row_upd_index_write_log(update, log_ptr, mtr);
1673 }
1674 #endif /* UNIV_HOTBACKUP */
1675 
1676 /***********************************************************/
1679 UNIV_INTERN
1680 byte*
1682 /*==========================*/
1683  byte* ptr,
1684  byte* end_ptr,
1685  page_t* page,
1686  page_zip_des_t* page_zip,
1687  dict_index_t* index)
1688 {
1689  ulint flags;
1690  rec_t* rec;
1691  upd_t* update;
1692  ulint pos;
1693  trx_id_t trx_id;
1694  roll_ptr_t roll_ptr;
1695  ulint rec_offset;
1696  mem_heap_t* heap;
1697  ulint* offsets;
1698 
1699  if (end_ptr < ptr + 1) {
1700 
1701  return(NULL);
1702  }
1703 
1704  flags = mach_read_from_1(ptr);
1705  ptr++;
1706 
1707  ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
1708 
1709  if (ptr == NULL) {
1710 
1711  return(NULL);
1712  }
1713 
1714  if (end_ptr < ptr + 2) {
1715 
1716  return(NULL);
1717  }
1718 
1719  rec_offset = mach_read_from_2(ptr);
1720  ptr += 2;
1721 
1722  ut_a(rec_offset <= UNIV_PAGE_SIZE);
1723 
1724  heap = mem_heap_create(256);
1725 
1726  ptr = row_upd_index_parse(ptr, end_ptr, heap, &update);
1727 
1728  if (!ptr || !page) {
1729 
1730  goto func_exit;
1731  }
1732 
1733  ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
1734  rec = page + rec_offset;
1735 
1736  /* We do not need to reserve btr_search_latch, as the page is only
1737  being recovered, and there cannot be a hash index to it. */
1738 
1739  offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1740 
1741  if (!(flags & BTR_KEEP_SYS_FLAG)) {
1742  row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
1743  pos, trx_id, roll_ptr);
1744  }
1745 
1746  row_upd_rec_in_place(rec, index, offsets, update, page_zip);
1747 
1748 func_exit:
1749  mem_heap_free(heap);
1750 
1751  return(ptr);
1752 }
1753 
1754 #ifndef UNIV_HOTBACKUP
1755 /*************************************************************/
1759 UNIV_INTERN
1760 ibool
1762 /*=====================*/
1763  page_zip_des_t* page_zip,
1764  buf_block_t* block,
1765  dict_index_t* index,
1766  ulint length,
1767  ibool create,
1769  mtr_t* mtr)
1770 {
1771  ut_a(page_zip == buf_block_get_page_zip(block));
1772  ut_ad(page_zip);
1773  ut_ad(!dict_index_is_ibuf(index));
1774 
1775  if (page_zip_available(page_zip, dict_index_is_clust(index),
1776  length, create)) {
1777  return(TRUE);
1778  }
1779 
1780  if (!page_zip->m_nonempty) {
1781  /* The page has been freshly compressed, so
1782  recompressing it will not help. */
1783  return(FALSE);
1784  }
1785 
1786  if (!page_zip_compress(page_zip, buf_block_get_frame(block),
1787  index, mtr)) {
1788  /* Unable to compress the page */
1789  return(FALSE);
1790  }
1791 
1792  /* After recompressing a page, we must make sure that the free
1793  bits in the insert buffer bitmap will not exceed the free
1794  space on the page. Because this function will not attempt
1795  recompression unless page_zip_available() fails above, it is
1796  safe to reset the free bits if page_zip_available() fails
1797  again, below. The free bits can safely be reset in a separate
1798  mini-transaction. If page_zip_available() succeeds below, we
1799  can be sure that the page_zip_compress() above did not reduce
1800  the free space available on the page. */
1801 
1802  if (!page_zip_available(page_zip, dict_index_is_clust(index),
1803  length, create)) {
1804  /* Out of space: reset the free bits. */
1805  if (!dict_index_is_clust(index)
1806  && page_is_leaf(buf_block_get_frame(block))) {
1807  ibuf_reset_free_bits(block);
1808  }
1809  return(FALSE);
1810  }
1811 
1812  return(TRUE);
1813 }
1814 
1815 /*************************************************************/
1819 UNIV_INTERN
1820 ulint
1822 /*====================*/
1823  ulint flags,
1824  btr_cur_t* cursor,
1827  const upd_t* update,
1828  ulint cmpl_info,
1830  que_thr_t* thr,
1831  mtr_t* mtr)
1833 {
1834  dict_index_t* index;
1835  buf_block_t* block;
1836  page_zip_des_t* page_zip;
1837  ulint err;
1838  rec_t* rec;
1839  roll_ptr_t roll_ptr = 0;
1840  trx_t* trx;
1841  ulint was_delete_marked;
1842  mem_heap_t* heap = NULL;
1843  ulint offsets_[REC_OFFS_NORMAL_SIZE];
1844  ulint* offsets = offsets_;
1845  rec_offs_init(offsets_);
1846 
1847  rec = btr_cur_get_rec(cursor);
1848  index = cursor->index;
1849  ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
1850  /* The insert buffer tree should never be updated in place. */
1851  ut_ad(!dict_index_is_ibuf(index));
1852 
1853  trx = thr_get_trx(thr);
1854  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1855 #ifdef UNIV_DEBUG
1856  if (btr_cur_print_record_ops && thr) {
1857  btr_cur_trx_report(trx, index, "update ");
1858  rec_print_new(stderr, rec, offsets);
1859  }
1860 #endif /* UNIV_DEBUG */
1861 
1862  block = btr_cur_get_block(cursor);
1863  page_zip = buf_block_get_page_zip(block);
1864 
1865  /* Check that enough space is available on the compressed page. */
1866  if (UNIV_LIKELY_NULL(page_zip)
1867  && !btr_cur_update_alloc_zip(page_zip, block, index,
1868  rec_offs_size(offsets), FALSE, mtr)) {
1869  return(DB_ZIP_OVERFLOW);
1870  }
1871 
1872  /* Do lock checking and undo logging */
1873  err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
1874  thr, mtr, &roll_ptr);
1875  if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1876 
1877  if (UNIV_LIKELY_NULL(heap)) {
1878  mem_heap_free(heap);
1879  }
1880  return(err);
1881  }
1882 
1883  if (block->is_hashed) {
1884  /* The function row_upd_changes_ord_field_binary works only
1885  if the update vector was built for a clustered index, we must
1886  NOT call it if index is secondary */
1887 
1888  if (!dict_index_is_clust(index)
1889  || row_upd_changes_ord_field_binary(index, update, thr,
1890  NULL, NULL)) {
1891 
1892  /* Remove possible hash index pointer to this record */
1894  }
1895 
1896  rw_lock_x_lock(&btr_search_latch);
1897  }
1898 
1899  if (!(flags & BTR_KEEP_SYS_FLAG)) {
1900  row_upd_rec_sys_fields(rec, NULL,
1901  index, offsets, trx, roll_ptr);
1902  }
1903 
1904  was_delete_marked = rec_get_deleted_flag(
1905  rec, page_is_comp(buf_block_get_frame(block)));
1906 
1907  row_upd_rec_in_place(rec, index, offsets, update, page_zip);
1908 
1909  if (block->is_hashed) {
1910  rw_lock_x_unlock(&btr_search_latch);
1911  }
1912 
1913  if (page_zip && !dict_index_is_clust(index)
1914  && page_is_leaf(buf_block_get_frame(block))) {
1915  /* Update the free bits in the insert buffer. */
1916  ibuf_update_free_bits_zip(block, mtr);
1917  }
1918 
1919  btr_cur_update_in_place_log(flags, rec, index, update,
1920  trx, roll_ptr, mtr);
1921 
1922  if (was_delete_marked
1924  buf_block_get_frame(block)))) {
1925  /* The new updated record owns its possible externally
1926  stored fields */
1927 
1928  btr_cur_unmark_extern_fields(page_zip,
1929  rec, index, offsets, mtr);
1930  }
1931 
1932  if (UNIV_LIKELY_NULL(heap)) {
1933  mem_heap_free(heap);
1934  }
1935  return(DB_SUCCESS);
1936 }
1937 
1938 /*************************************************************/
1947 UNIV_INTERN
1948 ulint
1950 /*======================*/
1951  ulint flags,
1952  btr_cur_t* cursor,
1955  const upd_t* update,
1957  ulint cmpl_info,
1959  que_thr_t* thr,
1960  mtr_t* mtr)
1962 {
1963  dict_index_t* index;
1964  page_cur_t* page_cursor;
1965  ulint err;
1966  buf_block_t* block;
1967  page_t* page;
1968  page_zip_des_t* page_zip;
1969  rec_t* rec;
1970  ulint max_size;
1971  ulint new_rec_size;
1972  ulint old_rec_size;
1973  dtuple_t* new_entry;
1974  roll_ptr_t roll_ptr;
1975  trx_t* trx;
1976  mem_heap_t* heap;
1977  ulint i;
1978  ulint n_ext;
1979  ulint* offsets;
1980 
1981  block = btr_cur_get_block(cursor);
1982  page = buf_block_get_frame(block);
1983  rec = btr_cur_get_rec(cursor);
1984  index = cursor->index;
1985  ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
1986  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1987  /* The insert buffer tree should never be updated in place. */
1988  ut_ad(!dict_index_is_ibuf(index));
1989 
1990  heap = mem_heap_create(1024);
1991  offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1992 
1993 #ifdef UNIV_DEBUG
1994  if (btr_cur_print_record_ops && thr) {
1995  btr_cur_trx_report(thr_get_trx(thr), index, "update ");
1996  rec_print_new(stderr, rec, offsets);
1997  }
1998 #endif /* UNIV_DEBUG */
1999 
2000  if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
2001 
2002  /* The simplest and the most common case: the update does not
2003  change the size of any field and none of the updated fields is
2004  externally stored in rec or update, and there is enough space
2005  on the compressed page to log the update. */
2006 
2007  mem_heap_free(heap);
2008  return(btr_cur_update_in_place(flags, cursor, update,
2009  cmpl_info, thr, mtr));
2010  }
2011 
2012  if (rec_offs_any_extern(offsets)) {
2013 any_extern:
2014  /* Externally stored fields are treated in pessimistic
2015  update */
2016 
2017  mem_heap_free(heap);
2018  return(DB_OVERFLOW);
2019  }
2020 
2021  for (i = 0; i < upd_get_n_fields(update); i++) {
2022  if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
2023 
2024  goto any_extern;
2025  }
2026  }
2027 
2028  page_cursor = btr_cur_get_page_cur(cursor);
2029 
2030  new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
2031  &n_ext, heap);
2032  /* We checked above that there are no externally stored fields. */
2033  ut_a(!n_ext);
2034 
2035  /* The page containing the clustered index record
2036  corresponding to new_entry is latched in mtr.
2037  Thus the following call is safe. */
2038  row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
2039  FALSE, heap);
2040  old_rec_size = rec_offs_size(offsets);
2041  new_rec_size = rec_get_converted_size(index, new_entry, 0);
2042 
2043  page_zip = buf_block_get_page_zip(block);
2044 #ifdef UNIV_ZIP_DEBUG
2045  ut_a(!page_zip || page_zip_validate(page_zip, page));
2046 #endif /* UNIV_ZIP_DEBUG */
2047 
2048  if (UNIV_LIKELY_NULL(page_zip)
2049  && !btr_cur_update_alloc_zip(page_zip, block, index,
2050  new_rec_size, TRUE, mtr)) {
2051  err = DB_ZIP_OVERFLOW;
2052  goto err_exit;
2053  }
2054 
2055  if (UNIV_UNLIKELY(new_rec_size
2057  / 2))) {
2058 
2059  err = DB_OVERFLOW;
2060  goto err_exit;
2061  }
2062 
2063  if (UNIV_UNLIKELY(page_get_data_size(page)
2064  - old_rec_size + new_rec_size
2066 
2067  /* The page would become too empty */
2068 
2069  err = DB_UNDERFLOW;
2070  goto err_exit;
2071  }
2072 
2073  max_size = old_rec_size
2075 
2076  if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
2077  && (max_size >= new_rec_size))
2078  || (page_get_n_recs(page) <= 1))) {
2079 
2080  /* There was not enough space, or it did not pay to
2081  reorganize: for simplicity, we decide what to do assuming a
2082  reorganization is needed, though it might not be necessary */
2083 
2084  err = DB_OVERFLOW;
2085  goto err_exit;
2086  }
2087 
2088  /* Do lock checking and undo logging */
2089  err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
2090  thr, mtr, &roll_ptr);
2091  if (err != DB_SUCCESS) {
2092 
2093  goto err_exit;
2094  }
2095 
2096  /* Ok, we may do the replacement. Store on the page infimum the
2097  explicit locks on rec, before deleting rec (see the comment in
2098  btr_cur_pessimistic_update). */
2099 
2100  lock_rec_store_on_page_infimum(block, rec);
2101 
2103 
2104  /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
2105  invokes rec_offs_make_valid() to point to the copied record that
2106  the fields of new_entry point to. We have to undo it here. */
2107  ut_ad(rec_offs_validate(NULL, index, offsets));
2108  rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
2109 
2110  page_cur_delete_rec(page_cursor, index, offsets, mtr);
2111 
2112  page_cur_move_to_prev(page_cursor);
2113 
2114  trx = thr_get_trx(thr);
2115 
2116  if (!(flags & BTR_KEEP_SYS_FLAG)) {
2117  row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
2118  roll_ptr);
2119  row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
2120  trx->id);
2121  }
2122 
2123  /* There are no externally stored columns in new_entry */
2124  rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
2125  ut_a(rec); /* <- We calculated above the insert would fit */
2126 
2127  if (page_zip && !dict_index_is_clust(index)
2128  && page_is_leaf(page)) {
2129  /* Update the free bits in the insert buffer. */
2130  ibuf_update_free_bits_zip(block, mtr);
2131  }
2132 
2133  /* Restore the old explicit lock state on the record */
2134 
2135  lock_rec_restore_from_page_infimum(block, rec, block);
2136 
2137  page_cur_move_to_next(page_cursor);
2138 
2139  err = DB_SUCCESS;
2140 err_exit:
2141  mem_heap_free(heap);
2142  return(err);
2143 }
2144 
2145 /*************************************************************/
2151 static
2152 void
2153 btr_cur_pess_upd_restore_supremum(
2154 /*==============================*/
2155  buf_block_t* block,
2156  const rec_t* rec,
2157  mtr_t* mtr)
2158 {
2159  page_t* page;
2160  buf_block_t* prev_block;
2161  ulint space;
2162  ulint zip_size;
2163  ulint prev_page_no;
2164 
2165  page = buf_block_get_frame(block);
2166 
2167  if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
2168  /* Updated record is not the first user record on its page */
2169 
2170  return;
2171  }
2172 
2173  space = buf_block_get_space(block);
2174  zip_size = buf_block_get_zip_size(block);
2175  prev_page_no = btr_page_get_prev(page, mtr);
2176 
2177  ut_ad(prev_page_no != FIL_NULL);
2178  prev_block = buf_page_get_with_no_latch(space, zip_size,
2179  prev_page_no, mtr);
2180 #ifdef UNIV_BTR_DEBUG
2181  ut_a(btr_page_get_next(prev_block->frame, mtr)
2182  == page_get_page_no(page));
2183 #endif /* UNIV_BTR_DEBUG */
2184 
2185  /* We must already have an x-latch on prev_block! */
2186  ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX));
2187 
2188  lock_rec_reset_and_inherit_gap_locks(prev_block, block,
2189  PAGE_HEAP_NO_SUPREMUM,
2190  page_rec_get_heap_no(rec));
2191 }
2192 
2193 /*************************************************************/
2200 UNIV_INTERN
2201 ulint
2203 /*=======================*/
2204  ulint flags,
2206  btr_cur_t* cursor,
2207  mem_heap_t** heap,
2208  big_rec_t** big_rec,
2210  const upd_t* update,
2213  ulint cmpl_info,
2215  que_thr_t* thr,
2216  mtr_t* mtr)
2218 {
2219  big_rec_t* big_rec_vec = NULL;
2220  big_rec_t* dummy_big_rec;
2221  dict_index_t* index;
2222  buf_block_t* block;
2223  page_t* page;
2224  page_zip_des_t* page_zip;
2225  rec_t* rec;
2226  page_cur_t* page_cursor;
2227  dtuple_t* new_entry;
2228  ulint err;
2229  ulint optim_err;
2230  roll_ptr_t roll_ptr;
2231  trx_t* trx;
2232  ibool was_first;
2233  ulint n_extents = 0;
2234  ulint n_reserved;
2235  ulint n_ext;
2236  ulint* offsets = NULL;
2237 
2238  *big_rec = NULL;
2239 
2240  block = btr_cur_get_block(cursor);
2241  page = buf_block_get_frame(block);
2242  page_zip = buf_block_get_page_zip(block);
2243  rec = btr_cur_get_rec(cursor);
2244  index = cursor->index;
2245 
2246  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2247  MTR_MEMO_X_LOCK));
2248  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2249 #ifdef UNIV_ZIP_DEBUG
2250  ut_a(!page_zip || page_zip_validate(page_zip, page));
2251 #endif /* UNIV_ZIP_DEBUG */
2252  /* The insert buffer tree should never be updated in place. */
2253  ut_ad(!dict_index_is_ibuf(index));
2254 
2255  optim_err = btr_cur_optimistic_update(flags, cursor, update,
2256  cmpl_info, thr, mtr);
2257 
2258  switch (optim_err) {
2259  case DB_UNDERFLOW:
2260  case DB_OVERFLOW:
2261  case DB_ZIP_OVERFLOW:
2262  break;
2263  default:
2264  return(optim_err);
2265  }
2266 
2267  /* Do lock checking and undo logging */
2268  err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
2269  thr, mtr, &roll_ptr);
2270  if (err != DB_SUCCESS) {
2271 
2272  return(err);
2273  }
2274 
2275  if (optim_err == DB_OVERFLOW) {
2276  ulint reserve_flag;
2277 
2278  /* First reserve enough free space for the file segments
2279  of the index tree, so that the update will not fail because
2280  of lack of space */
2281 
2282  n_extents = cursor->tree_height / 16 + 3;
2283 
2284  if (flags & BTR_NO_UNDO_LOG_FLAG) {
2285  reserve_flag = FSP_CLEANING;
2286  } else {
2287  reserve_flag = FSP_NORMAL;
2288  }
2289 
2290  if (!fsp_reserve_free_extents(&n_reserved, index->space,
2291  n_extents, reserve_flag, mtr)) {
2292  return(DB_OUT_OF_FILE_SPACE);
2293  }
2294  }
2295 
2296  if (!*heap) {
2297  *heap = mem_heap_create(1024);
2298  }
2299  offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
2300 
2301  trx = thr_get_trx(thr);
2302 
2303  new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
2304  &n_ext, *heap);
2305  /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
2306  invokes rec_offs_make_valid() to point to the copied record that
2307  the fields of new_entry point to. We have to undo it here. */
2308  ut_ad(rec_offs_validate(NULL, index, offsets));
2309  rec_offs_make_valid(rec, index, offsets);
2310 
2311  /* The page containing the clustered index record
2312  corresponding to new_entry is latched in mtr. If the
2313  clustered index record is delete-marked, then its externally
2314  stored fields cannot have been purged yet, because then the
2315  purge would also have removed the clustered index record
2316  itself. Thus the following call is safe. */
2317  row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
2318  FALSE, *heap);
2319  if (!(flags & BTR_KEEP_SYS_FLAG)) {
2320  row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
2321  roll_ptr);
2322  row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
2323  trx->id);
2324  }
2325 
2326  if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
2327  /* We are in a transaction rollback undoing a row
2328  update: we must free possible externally stored fields
2329  which got new values in the update, if they are not
2330  inherited values. They can be inherited if we have
2331  updated the primary key to another value, and then
2332  update it back again. */
2333 
2334  ut_ad(big_rec_vec == NULL);
2335 
2336  btr_rec_free_updated_extern_fields(
2337  index, rec, page_zip, offsets, update,
2338  trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
2339  }
2340 
2341  /* We have to set appropriate extern storage bits in the new
2342  record to be inserted: we have to remember which fields were such */
2343 
2344  ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
2345  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
2346  n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
2347 
2348  if (UNIV_LIKELY_NULL(page_zip)) {
2349  ut_ad(page_is_comp(page));
2351  rec_get_converted_size(index, new_entry, n_ext),
2352  TRUE,
2353  dict_index_get_n_fields(index),
2354  page_zip_get_size(page_zip))) {
2355 
2356  goto make_external;
2357  }
2358  } else if (page_zip_rec_needs_ext(
2359  rec_get_converted_size(index, new_entry, n_ext),
2360  page_is_comp(page), 0, 0)) {
2361 make_external:
2362  big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
2363  if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
2364 
2365  err = DB_TOO_BIG_RECORD;
2366  goto return_after_reservations;
2367  }
2368  }
2369 
2370  /* Store state of explicit locks on rec on the page infimum record,
2371  before deleting rec. The page infimum acts as a dummy carrier of the
2372  locks, taking care also of lock releases, before we can move the locks
2373  back on the actual record. There is a special case: if we are
2374  inserting on the root page and the insert causes a call of
2375  btr_root_raise_and_insert. Therefore we cannot in the lock system
2376  delete the lock structs set on the root page even if the root
2377  page carries just node pointers. */
2378 
2379  lock_rec_store_on_page_infimum(block, rec);
2380 
2382 
2383 #ifdef UNIV_ZIP_DEBUG
2384  ut_a(!page_zip || page_zip_validate(page_zip, page));
2385 #endif /* UNIV_ZIP_DEBUG */
2386  page_cursor = btr_cur_get_page_cur(cursor);
2387 
2388  page_cur_delete_rec(page_cursor, index, offsets, mtr);
2389 
2390  page_cur_move_to_prev(page_cursor);
2391 
2392  rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
2393 
2394  if (rec) {
2396  rec, block);
2397 
2398  offsets = rec_get_offsets(rec, index, offsets,
2399  ULINT_UNDEFINED, heap);
2400 
2401  if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
2402  /* The new inserted record owns its possible externally
2403  stored fields */
2404  btr_cur_unmark_extern_fields(page_zip,
2405  rec, index, offsets, mtr);
2406  }
2407 
2408  btr_cur_compress_if_useful(cursor, mtr);
2409 
2410  if (page_zip && !dict_index_is_clust(index)
2411  && page_is_leaf(page)) {
2412  /* Update the free bits in the insert buffer. */
2413  ibuf_update_free_bits_zip(block, mtr);
2414  }
2415 
2416  err = DB_SUCCESS;
2417  goto return_after_reservations;
2418  } else {
2419  ut_a(optim_err != DB_UNDERFLOW);
2420 
2421  /* Out of space: reset the free bits. */
2422  if (!dict_index_is_clust(index)
2423  && page_is_leaf(page)) {
2424  ibuf_reset_free_bits(block);
2425  }
2426  }
2427 
2428  /* Was the record to be updated positioned as the first user
2429  record on its page? */
2430  was_first = page_cur_is_before_first(page_cursor);
2431 
2432  /* The first parameter means that no lock checking and undo logging
2433  is made in the insert */
2434 
2435  err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
2436  | BTR_NO_LOCKING_FLAG
2437  | BTR_KEEP_SYS_FLAG,
2438  cursor, new_entry, &rec,
2439  &dummy_big_rec, n_ext, NULL, mtr);
2440  ut_a(rec);
2441  ut_a(err == DB_SUCCESS);
2442  ut_a(dummy_big_rec == NULL);
2443 
2444  if (dict_index_is_sec_or_ibuf(index)) {
2445  /* Update PAGE_MAX_TRX_ID in the index page header.
2446  It was not updated by btr_cur_pessimistic_insert()
2447  because of BTR_NO_LOCKING_FLAG. */
2448  buf_block_t* rec_block;
2449 
2450  rec_block = btr_cur_get_block(cursor);
2451 
2452  page_update_max_trx_id(rec_block,
2453  buf_block_get_page_zip(rec_block),
2454  trx->id, mtr);
2455  }
2456 
2457  if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
2458  /* The new inserted record owns its possible externally
2459  stored fields */
2460  buf_block_t* rec_block = btr_cur_get_block(cursor);
2461 
2462 #ifdef UNIV_ZIP_DEBUG
2463  ut_a(!page_zip || page_zip_validate(page_zip, page));
2464  page = buf_block_get_frame(rec_block);
2465 #endif /* UNIV_ZIP_DEBUG */
2466  page_zip = buf_block_get_page_zip(rec_block);
2467 
2468  offsets = rec_get_offsets(rec, index, offsets,
2469  ULINT_UNDEFINED, heap);
2470  btr_cur_unmark_extern_fields(page_zip,
2471  rec, index, offsets, mtr);
2472  }
2473 
2475  rec, block);
2476 
2477  /* If necessary, restore also the correct lock state for a new,
2478  preceding supremum record created in a page split. While the old
2479  record was nonexistent, the supremum might have inherited its locks
2480  from a wrong record. */
2481 
2482  if (!was_first) {
2483  btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
2484  rec, mtr);
2485  }
2486 
2487 return_after_reservations:
2488 #ifdef UNIV_ZIP_DEBUG
2489  ut_a(!page_zip || page_zip_validate(page_zip, page));
2490 #endif /* UNIV_ZIP_DEBUG */
2491 
2492  if (n_extents > 0) {
2493  fil_space_release_free_extents(index->space, n_reserved);
2494  }
2495 
2496  *big_rec = big_rec_vec;
2497 
2498  return(err);
2499 }
2500 
2501 /*==================== B-TREE DELETE MARK AND UNMARK ===============*/
2502 
2503 /****************************************************************/
2506 UNIV_INLINE
2507 void
2508 btr_cur_del_mark_set_clust_rec_log(
2509 /*===============================*/
2510  ulint flags,
2511  rec_t* rec,
2512  dict_index_t* index,
2513  ibool val,
2514  trx_t* trx,
2515  roll_ptr_t roll_ptr,
2516  mtr_t* mtr)
2517 {
2518  byte* log_ptr;
2519  ut_ad(flags < 256);
2520  ut_ad(val <= 1);
2521 
2522  ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
2523 
2524  log_ptr = mlog_open_and_write_index(mtr, rec, index,
2525  page_rec_is_comp(rec)
2528  1 + 1 + DATA_ROLL_PTR_LEN
2529  + 14 + 2);
2530 
2531  if (!log_ptr) {
2532  /* Logging in mtr is switched off during crash recovery */
2533  return;
2534  }
2535 
2536  mach_write_to_1(log_ptr, flags);
2537  log_ptr++;
2538  mach_write_to_1(log_ptr, val);
2539  log_ptr++;
2540 
2541  log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
2542  mtr);
2543  mach_write_to_2(log_ptr, page_offset(rec));
2544  log_ptr += 2;
2545 
2546  mlog_close(mtr, log_ptr);
2547 }
2548 #endif /* !UNIV_HOTBACKUP */
2549 
2550 /****************************************************************/
2554 UNIV_INTERN
2555 byte*
2557 /*=================================*/
2558  byte* ptr,
2559  byte* end_ptr,
2560  page_t* page,
2561  page_zip_des_t* page_zip,
2562  dict_index_t* index)
2563 {
2564  ulint flags;
2565  ulint val;
2566  ulint pos;
2567  trx_id_t trx_id;
2568  roll_ptr_t roll_ptr;
2569  ulint offset;
2570  rec_t* rec;
2571 
2572  ut_ad(!page
2573  || !!page_is_comp(page) == dict_table_is_comp(index->table));
2574 
2575  if (end_ptr < ptr + 2) {
2576 
2577  return(NULL);
2578  }
2579 
2580  flags = mach_read_from_1(ptr);
2581  ptr++;
2582  val = mach_read_from_1(ptr);
2583  ptr++;
2584 
2585  ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
2586 
2587  if (ptr == NULL) {
2588 
2589  return(NULL);
2590  }
2591 
2592  if (end_ptr < ptr + 2) {
2593 
2594  return(NULL);
2595  }
2596 
2597  offset = mach_read_from_2(ptr);
2598  ptr += 2;
2599 
2600  ut_a(offset <= UNIV_PAGE_SIZE);
2601 
2602  if (page) {
2603  rec = page + offset;
2604 
2605  /* We do not need to reserve btr_search_latch, as the page
2606  is only being recovered, and there cannot be a hash index to
2607  it. */
2608 
2609  btr_rec_set_deleted_flag(rec, page_zip, val);
2610 
2611  if (!(flags & BTR_KEEP_SYS_FLAG)) {
2612  mem_heap_t* heap = NULL;
2613  ulint offsets_[REC_OFFS_NORMAL_SIZE];
2614  rec_offs_init(offsets_);
2615 
2617  rec, page_zip,
2618  rec_get_offsets(rec, index, offsets_,
2619  ULINT_UNDEFINED, &heap),
2620  pos, trx_id, roll_ptr);
2621  if (UNIV_LIKELY_NULL(heap)) {
2622  mem_heap_free(heap);
2623  }
2624  }
2625  }
2626 
2627  return(ptr);
2628 }
2629 
2630 #ifndef UNIV_HOTBACKUP
2631 /***********************************************************/
2637 UNIV_INTERN
2638 ulint
2640 /*===========================*/
2641  ulint flags,
2642  buf_block_t* block,
2643  rec_t* rec,
2644  dict_index_t* index,
2645  const ulint* offsets,
2646  ibool val,
2647  que_thr_t* thr,
2648  mtr_t* mtr)
2649 {
2650  roll_ptr_t roll_ptr;
2651  ulint err;
2652  page_zip_des_t* page_zip;
2653  trx_t* trx;
2654 
2655  ut_ad(dict_index_is_clust(index));
2656  ut_ad(rec_offs_validate(rec, index, offsets));
2657  ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
2658  ut_ad(buf_block_get_frame(block) == page_align(rec));
2659  ut_ad(page_is_leaf(page_align(rec)));
2660 
2661 #ifdef UNIV_DEBUG
2662  if (btr_cur_print_record_ops && thr) {
2663  btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
2664  rec_print_new(stderr, rec, offsets);
2665  }
2666 #endif /* UNIV_DEBUG */
2667 
2668  ut_ad(dict_index_is_clust(index));
2669  ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
2670 
2671  err = lock_clust_rec_modify_check_and_lock(flags, block,
2672  rec, index, offsets, thr);
2673 
2674  if (err != DB_SUCCESS) {
2675 
2676  return(err);
2677  }
2678 
2679  err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
2680  index, NULL, NULL, 0, rec,
2681  &roll_ptr);
2682  if (err != DB_SUCCESS) {
2683 
2684  return(err);
2685  }
2686 
2687  if (block->is_hashed) {
2688  rw_lock_x_lock(&btr_search_latch);
2689  }
2690 
2691  page_zip = buf_block_get_page_zip(block);
2692 
2693  btr_blob_dbg_set_deleted_flag(rec, index, offsets, val);
2694  btr_rec_set_deleted_flag(rec, page_zip, val);
2695 
2696  trx = thr_get_trx(thr);
2697 
2698  if (!(flags & BTR_KEEP_SYS_FLAG)) {
2699  row_upd_rec_sys_fields(rec, page_zip,
2700  index, offsets, trx, roll_ptr);
2701  }
2702 
2703  if (block->is_hashed) {
2704  rw_lock_x_unlock(&btr_search_latch);
2705  }
2706 
2707  btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
2708  roll_ptr, mtr);
2709 
2710  return(err);
2711 }
2712 
2713 /****************************************************************/
2716 UNIV_INLINE
2717 void
2718 btr_cur_del_mark_set_sec_rec_log(
2719 /*=============================*/
2720  rec_t* rec,
2721  ibool val,
2722  mtr_t* mtr)
2723 {
2724  byte* log_ptr;
2725  ut_ad(val <= 1);
2726 
2727  log_ptr = mlog_open(mtr, 11 + 1 + 2);
2728 
2729  if (!log_ptr) {
2730  /* Logging in mtr is switched off during crash recovery:
2731  in that case mlog_open returns NULL */
2732  return;
2733  }
2734 
2736  rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr);
2737  mach_write_to_1(log_ptr, val);
2738  log_ptr++;
2739 
2740  mach_write_to_2(log_ptr, page_offset(rec));
2741  log_ptr += 2;
2742 
2743  mlog_close(mtr, log_ptr);
2744 }
2745 #endif /* !UNIV_HOTBACKUP */
2746 
2747 /****************************************************************/
2751 UNIV_INTERN
2752 byte*
2754 /*===============================*/
2755  byte* ptr,
2756  byte* end_ptr,
2757  page_t* page,
2758  page_zip_des_t* page_zip)
2759 {
2760  ulint val;
2761  ulint offset;
2762  rec_t* rec;
2763 
2764  if (end_ptr < ptr + 3) {
2765 
2766  return(NULL);
2767  }
2768 
2769  val = mach_read_from_1(ptr);
2770  ptr++;
2771 
2772  offset = mach_read_from_2(ptr);
2773  ptr += 2;
2774 
2775  ut_a(offset <= UNIV_PAGE_SIZE);
2776 
2777  if (page) {
2778  rec = page + offset;
2779 
2780  /* We do not need to reserve btr_search_latch, as the page
2781  is only being recovered, and there cannot be a hash index to
2782  it. */
2783 
2784  btr_rec_set_deleted_flag(rec, page_zip, val);
2785  }
2786 
2787  return(ptr);
2788 }
2789 
2790 #ifndef UNIV_HOTBACKUP
2791 /***********************************************************/
2794 UNIV_INTERN
2795 ulint
2797 /*=========================*/
2798  ulint flags,
2799  btr_cur_t* cursor,
2800  ibool val,
2801  que_thr_t* thr,
2802  mtr_t* mtr)
2803 {
2804  buf_block_t* block;
2805  rec_t* rec;
2806  ulint err;
2807 
2808  block = btr_cur_get_block(cursor);
2809  rec = btr_cur_get_rec(cursor);
2810 
2811 #ifdef UNIV_DEBUG
2812  if (btr_cur_print_record_ops && thr) {
2813  btr_cur_trx_report(thr_get_trx(thr), cursor->index,
2814  "del mark ");
2815  rec_print(stderr, rec, cursor->index);
2816  }
2817 #endif /* UNIV_DEBUG */
2818 
2820  btr_cur_get_block(cursor),
2821  rec, cursor->index, thr, mtr);
2822  if (err != DB_SUCCESS) {
2823 
2824  return(err);
2825  }
2826 
2827  ut_ad(!!page_rec_is_comp(rec)
2828  == dict_table_is_comp(cursor->index->table));
2829 
2830  if (block->is_hashed) {
2831  rw_lock_x_lock(&btr_search_latch);
2832  }
2833 
2834  btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
2835 
2836  if (block->is_hashed) {
2837  rw_lock_x_unlock(&btr_search_latch);
2838  }
2839 
2840  btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
2841 
2842  return(DB_SUCCESS);
2843 }
2844 
2845 /***********************************************************/
2848 UNIV_INTERN
2849 void
2851 /*==============================*/
2852  rec_t* rec,
2853  page_zip_des_t* page_zip,
2857  ibool val,
2858  mtr_t* mtr)
2859 {
2860  /* We do not need to reserve btr_search_latch, as the page has just
2861  been read to the buffer pool and there cannot be a hash index to it. */
2862 
2863  btr_rec_set_deleted_flag(rec, page_zip, val);
2864 
2865  btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
2866 }
2867 
2868 /*==================== B-TREE RECORD REMOVE =========================*/
2869 
2870 /*************************************************************/
2877 UNIV_INTERN
2878 ibool
2880 /*=======================*/
2881  btr_cur_t* cursor,
2884  mtr_t* mtr)
2885 {
2886  ut_ad(mtr_memo_contains(mtr,
2888  MTR_MEMO_X_LOCK));
2889  ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
2890  MTR_MEMO_PAGE_X_FIX));
2891 
2892  return(btr_cur_compress_recommendation(cursor, mtr)
2893  && btr_compress(cursor, mtr));
2894 }
2895 
2896 /*******************************************************/
2901 UNIV_INTERN
2902 ibool
2904 /*======================*/
2905  btr_cur_t* cursor,
2909  mtr_t* mtr)
2913 {
2914  buf_block_t* block;
2915  rec_t* rec;
2916  mem_heap_t* heap = NULL;
2917  ulint offsets_[REC_OFFS_NORMAL_SIZE];
2918  ulint* offsets = offsets_;
2919  ibool no_compress_needed;
2920  rec_offs_init(offsets_);
2921 
2922  ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
2923  MTR_MEMO_PAGE_X_FIX));
2924  /* This is intended only for leaf page deletions */
2925 
2926  block = btr_cur_get_block(cursor);
2927 
2928  ut_ad(page_is_leaf(buf_block_get_frame(block)));
2929 
2930  rec = btr_cur_get_rec(cursor);
2931  offsets = rec_get_offsets(rec, cursor->index, offsets,
2932  ULINT_UNDEFINED, &heap);
2933 
2934  no_compress_needed = !rec_offs_any_extern(offsets)
2935  && btr_cur_can_delete_without_compress(
2936  cursor, rec_offs_size(offsets), mtr);
2937 
2938  if (no_compress_needed) {
2939 
2940  page_t* page = buf_block_get_frame(block);
2941  page_zip_des_t* page_zip= buf_block_get_page_zip(block);
2942  ulint max_ins = 0;
2943 
2944  lock_update_delete(block, rec);
2945 
2947 
2948  if (!page_zip) {
2950  page, 1);
2951  }
2952 #ifdef UNIV_ZIP_DEBUG
2953  ut_a(!page_zip || page_zip_validate(page_zip, page));
2954 #endif /* UNIV_ZIP_DEBUG */
2955  page_cur_delete_rec(btr_cur_get_page_cur(cursor),
2956  cursor->index, offsets, mtr);
2957 #ifdef UNIV_ZIP_DEBUG
2958  ut_a(!page_zip || page_zip_validate(page_zip, page));
2959 #endif /* UNIV_ZIP_DEBUG */
2960 
2961  if (dict_index_is_clust(cursor->index)
2962  || dict_index_is_ibuf(cursor->index)
2963  || !page_is_leaf(page)) {
2964  /* The insert buffer does not handle
2965  inserts to clustered indexes, to
2966  non-leaf pages of secondary index B-trees,
2967  or to the insert buffer. */
2968  } else if (page_zip) {
2969  ibuf_update_free_bits_zip(block, mtr);
2970  } else {
2971  ibuf_update_free_bits_low(block, max_ins, mtr);
2972  }
2973  }
2974 
2975  if (UNIV_LIKELY_NULL(heap)) {
2976  mem_heap_free(heap);
2977  }
2978 
2979  return(no_compress_needed);
2980 }
2981 
2982 /*************************************************************/
2990 UNIV_INTERN
2991 ibool
2993 /*=======================*/
2994  ulint* err,
2999  ibool has_reserved_extents,
3003  btr_cur_t* cursor,
3007  enum trx_rb_ctx rb_ctx,
3008  mtr_t* mtr)
3009 {
3010  buf_block_t* block;
3011  page_t* page;
3012  page_zip_des_t* page_zip;
3013  dict_index_t* index;
3014  rec_t* rec;
3015  dtuple_t* node_ptr;
3016  ulint n_extents = 0;
3017  ulint n_reserved;
3018  ibool success;
3019  ibool ret = FALSE;
3020  ulint level;
3021  mem_heap_t* heap;
3022  ulint* offsets;
3023 
3024  block = btr_cur_get_block(cursor);
3025  page = buf_block_get_frame(block);
3026  index = btr_cur_get_index(cursor);
3027 
3028  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
3029  MTR_MEMO_X_LOCK));
3030  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3031  if (!has_reserved_extents) {
3032  /* First reserve enough free space for the file segments
3033  of the index tree, so that the node pointer updates will
3034  not fail because of lack of space */
3035 
3036  n_extents = cursor->tree_height / 32 + 1;
3037 
3038  success = fsp_reserve_free_extents(&n_reserved,
3039  index->space,
3040  n_extents,
3041  FSP_CLEANING, mtr);
3042  if (!success) {
3043  *err = DB_OUT_OF_FILE_SPACE;
3044 
3045  return(FALSE);
3046  }
3047  }
3048 
3049  heap = mem_heap_create(1024);
3050  rec = btr_cur_get_rec(cursor);
3051  page_zip = buf_block_get_page_zip(block);
3052 #ifdef UNIV_ZIP_DEBUG
3053  ut_a(!page_zip || page_zip_validate(page_zip, page));
3054 #endif /* UNIV_ZIP_DEBUG */
3055 
3056  offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
3057 
3058  if (rec_offs_any_extern(offsets)) {
3059  btr_rec_free_externally_stored_fields(index,
3060  rec, offsets, page_zip,
3061  rb_ctx, mtr);
3062 #ifdef UNIV_ZIP_DEBUG
3063  ut_a(!page_zip || page_zip_validate(page_zip, page));
3064 #endif /* UNIV_ZIP_DEBUG */
3065  }
3066 
3067  if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
3068  && UNIV_UNLIKELY(dict_index_get_page(index)
3069  != buf_block_get_page_no(block))) {
3070 
3071  /* If there is only one record, drop the whole page in
3072  btr_discard_page, if this is not the root page */
3073 
3074  btr_discard_page(cursor, mtr);
3075 
3076  *err = DB_SUCCESS;
3077  ret = TRUE;
3078 
3079  goto return_after_reservations;
3080  }
3081 
3082  lock_update_delete(block, rec);
3083  level = btr_page_get_level(page, mtr);
3084 
3085  if (level > 0
3086  && UNIV_UNLIKELY(rec == page_rec_get_next(
3087  page_get_infimum_rec(page)))) {
3088 
3089  rec_t* next_rec = page_rec_get_next(rec);
3090 
3091  if (btr_page_get_prev(page, mtr) == FIL_NULL) {
3092 
3093  /* If we delete the leftmost node pointer on a
3094  non-leaf level, we must mark the new leftmost node
3095  pointer as the predefined minimum record */
3096 
3097  /* This will make page_zip_validate() fail until
3098  page_cur_delete_rec() completes. This is harmless,
3099  because everything will take place within a single
3100  mini-transaction and because writing to the redo log
3101  is an atomic operation (performed by mtr_commit()). */
3102  btr_set_min_rec_mark(next_rec, mtr);
3103  } else {
3104  /* Otherwise, if we delete the leftmost node pointer
3105  on a page, we have to change the father node pointer
3106  so that it is equal to the new leftmost node pointer
3107  on the page */
3108 
3109  btr_node_ptr_delete(index, block, mtr);
3110 
3111  node_ptr = dict_index_build_node_ptr(
3112  index, next_rec, buf_block_get_page_no(block),
3113  heap, level);
3114 
3115  btr_insert_on_non_leaf_level(index,
3116  level + 1, node_ptr, mtr);
3117  }
3118  }
3119 
3121 
3122  page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
3123 #ifdef UNIV_ZIP_DEBUG
3124  ut_a(!page_zip || page_zip_validate(page_zip, page));
3125 #endif /* UNIV_ZIP_DEBUG */
3126 
3127  ut_ad(btr_check_node_ptr(index, block, mtr));
3128 
3129  *err = DB_SUCCESS;
3130 
3131 return_after_reservations:
3132  mem_heap_free(heap);
3133 
3134  if (ret == FALSE) {
3135  ret = btr_cur_compress_if_useful(cursor, mtr);
3136  }
3137 
3138  if (n_extents > 0) {
3139  fil_space_release_free_extents(index->space, n_reserved);
3140  }
3141 
3142  return(ret);
3143 }
3144 
3145 /*******************************************************************/
3148 static
3149 void
3150 btr_cur_add_path_info(
3151 /*==================*/
3152  btr_cur_t* cursor,
3153  ulint height,
3155  ulint root_height)
3156 {
3157  btr_path_t* slot;
3158  rec_t* rec;
3159  page_t* page;
3160 
3161  ut_a(cursor->path_arr);
3162 
3163  if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) {
3164  /* Do nothing; return empty path */
3165 
3166  slot = cursor->path_arr;
3167  slot->nth_rec = ULINT_UNDEFINED;
3168 
3169  return;
3170  }
3171 
3172  if (height == 0) {
3173  /* Mark end of slots for path */
3174  slot = cursor->path_arr + root_height + 1;
3175  slot->nth_rec = ULINT_UNDEFINED;
3176  }
3177 
3178  rec = btr_cur_get_rec(cursor);
3179 
3180  slot = cursor->path_arr + (root_height - height);
3181 
3182  page = page_align(rec);
3183 
3184  slot->nth_rec = page_rec_get_n_recs_before(rec);
3185  slot->n_recs = page_get_n_recs(page);
3186  slot->page_no = page_get_page_no(page);
3187  slot->page_level = btr_page_get_level_low(page);
3188 }
3189 
3190 /*******************************************************************/
3202 static
3203 ib_int64_t
3204 btr_estimate_n_rows_in_range_on_level(
3205 /*==================================*/
3206  dict_index_t* index,
3207  btr_path_t* slot1,
3208  btr_path_t* slot2,
3209  ib_int64_t n_rows_on_prev_level,
3214  ibool* is_n_rows_exact)
3217 {
3218  ulint space;
3219  ib_int64_t n_rows;
3220  ulint n_pages_read;
3221  ulint page_no;
3222  ulint zip_size;
3223  ulint level;
3224 
3225  space = dict_index_get_space(index);
3226 
3227  n_rows = 0;
3228  n_pages_read = 0;
3229 
3230  /* Assume by default that we will scan all pages between
3231  slot1->page_no and slot2->page_no */
3232  *is_n_rows_exact = TRUE;
3233 
3234  /* add records from slot1->page_no which are to the right of
3235  the record which serves as a left border of the range, if any */
3236  if (slot1->nth_rec < slot1->n_recs) {
3237  n_rows += slot1->n_recs - slot1->nth_rec;
3238  }
3239 
3240  /* add records from slot2->page_no which are to the left of
3241  the record which servers as a right border of the range, if any */
3242  if (slot2->nth_rec > 1) {
3243  n_rows += slot2->nth_rec - 1;
3244  }
3245 
3246  /* count the records in the pages between slot1->page_no and
3247  slot2->page_no (non inclusive), if any */
3248 
3249  zip_size = fil_space_get_zip_size(space);
3250 
3251  /* Do not read more than this number of pages in order not to hurt
3252  performance with this code which is just an estimation. If we read
3253  this many pages before reaching slot2->page_no then we estimate the
3254  average from the pages scanned so far */
3255 # define N_PAGES_READ_LIMIT 10
3256 
3257  page_no = slot1->page_no;
3258  level = slot1->page_level;
3259 
3260  do {
3261  mtr_t mtr;
3262  page_t* page;
3263  buf_block_t* block;
3264 
3265  mtr_start(&mtr);
3266 
3267  /* fetch the page */
3268  block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
3269  &mtr);
3270 
3271  page = buf_block_get_frame(block);
3272 
3273  /* It is possible that the tree has been reorganized in the
3274  meantime and this is a different page. If this happens the
3275  calculated estimate will be bogus, which is not fatal as
3276  this is only an estimate. We are sure that a page with
3277  page_no exists because InnoDB never frees pages, only
3278  reuses them. */
3279  if (fil_page_get_type(page) != FIL_PAGE_INDEX
3280  || btr_page_get_index_id(page) != index->id
3281  || btr_page_get_level_low(page) != level) {
3282 
3283  /* The page got reused for something else */
3284  mtr_commit(&mtr);
3285  goto inexact;
3286  }
3287 
3288  n_pages_read++;
3289 
3290  if (page_no != slot1->page_no) {
3291  /* Do not count the records on slot1->page_no,
3292  we already counted them before this loop. */
3293  n_rows += page_get_n_recs(page);
3294  }
3295 
3296  page_no = btr_page_get_next(page, &mtr);
3297 
3298  mtr_commit(&mtr);
3299 
3300  if (n_pages_read == N_PAGES_READ_LIMIT
3301  || page_no == FIL_NULL) {
3302  /* Either we read too many pages or
3303  we reached the end of the level without passing
3304  through slot2->page_no, the tree must have changed
3305  in the meantime */
3306  goto inexact;
3307  }
3308 
3309  } while (page_no != slot2->page_no);
3310 
3311  return(n_rows);
3312 
3313 inexact:
3314 
3315  *is_n_rows_exact = FALSE;
3316 
3317  /* We did interrupt before reaching slot2->page */
3318 
3319  if (n_pages_read > 0) {
3320  /* The number of pages on this level is
3321  n_rows_on_prev_level, multiply it by the
3322  average number of recs per page so far */
3323  n_rows = n_rows_on_prev_level
3324  * n_rows / n_pages_read;
3325  } else {
3326  /* The tree changed before we could even
3327  start with slot1->page_no */
3328  n_rows = 10;
3329  }
3330 
3331  return(n_rows);
3332 }
3333 
3334 /*******************************************************************/
3337 UNIV_INTERN
3338 ib_int64_t
3340 /*=========================*/
3341  dict_index_t* index,
3342  const dtuple_t* tuple1,
3343  ulint mode1,
3344  const dtuple_t* tuple2,
3345  ulint mode2)
3346 {
3349  btr_cur_t cursor;
3350  btr_path_t* slot1;
3351  btr_path_t* slot2;
3352  ibool diverged;
3353  ibool diverged_lot;
3354  ulint divergence_level;
3355  ib_int64_t n_rows;
3356  ibool is_n_rows_exact;
3357  ulint i;
3358  mtr_t mtr;
3359 
3360  mtr_start(&mtr);
3361 
3362  cursor.path_arr = path1;
3363 
3364  if (dtuple_get_n_fields(tuple1) > 0) {
3365 
3366  btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
3367  BTR_SEARCH_LEAF | BTR_ESTIMATE,
3368  &cursor, 0,
3369  __FILE__, __LINE__, &mtr);
3370  } else {
3371  btr_cur_open_at_index_side(TRUE, index,
3372  BTR_SEARCH_LEAF | BTR_ESTIMATE,
3373  &cursor, &mtr);
3374  }
3375 
3376  mtr_commit(&mtr);
3377 
3378  mtr_start(&mtr);
3379 
3380  cursor.path_arr = path2;
3381 
3382  if (dtuple_get_n_fields(tuple2) > 0) {
3383 
3384  btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
3385  BTR_SEARCH_LEAF | BTR_ESTIMATE,
3386  &cursor, 0,
3387  __FILE__, __LINE__, &mtr);
3388  } else {
3389  btr_cur_open_at_index_side(FALSE, index,
3390  BTR_SEARCH_LEAF | BTR_ESTIMATE,
3391  &cursor, &mtr);
3392  }
3393 
3394  mtr_commit(&mtr);
3395 
3396  /* We have the path information for the range in path1 and path2 */
3397 
3398  n_rows = 1;
3399  is_n_rows_exact = TRUE;
3400  diverged = FALSE; /* This becomes true when the path is not
3401  the same any more */
3402  diverged_lot = FALSE; /* This becomes true when the paths are
3403  not the same or adjacent any more */
3404  divergence_level = 1000000; /* This is the level where paths diverged
3405  a lot */
3406  for (i = 0; ; i++) {
3408 
3409  slot1 = path1 + i;
3410  slot2 = path2 + i;
3411 
3412  if (slot1->nth_rec == ULINT_UNDEFINED
3413  || slot2->nth_rec == ULINT_UNDEFINED) {
3414 
3415  if (i > divergence_level + 1 && !is_n_rows_exact) {
3416  /* In trees whose height is > 1 our algorithm
3417  tends to underestimate: multiply the estimate
3418  by 2: */
3419 
3420  n_rows = n_rows * 2;
3421  }
3422 
3423  /* Do not estimate the number of rows in the range
3424  to over 1 / 2 of the estimated rows in the whole
3425  table */
3426 
3427  if (n_rows > index->table->stat_n_rows / 2
3428  && !is_n_rows_exact) {
3429 
3430  n_rows = index->table->stat_n_rows / 2;
3431 
3432  /* If there are just 0 or 1 rows in the table,
3433  then we estimate all rows are in the range */
3434 
3435  if (n_rows == 0) {
3436  n_rows = index->table->stat_n_rows;
3437  }
3438  }
3439 
3440  return(n_rows);
3441  }
3442 
3443  if (!diverged && slot1->nth_rec != slot2->nth_rec) {
3444 
3445  diverged = TRUE;
3446 
3447  if (slot1->nth_rec < slot2->nth_rec) {
3448  n_rows = slot2->nth_rec - slot1->nth_rec;
3449 
3450  if (n_rows > 1) {
3451  diverged_lot = TRUE;
3452  divergence_level = i;
3453  }
3454  } else {
3455  /* It is possible that
3456  slot1->nth_rec >= slot2->nth_rec
3457  if, for example, we have a single page
3458  tree which contains (inf, 5, 6, supr)
3459  and we select where x > 20 and x < 30;
3460  in this case slot1->nth_rec will point
3461  to the supr record and slot2->nth_rec
3462  will point to 6 */
3463  n_rows = 0;
3464  }
3465 
3466  } else if (diverged && !diverged_lot) {
3467 
3468  if (slot1->nth_rec < slot1->n_recs
3469  || slot2->nth_rec > 1) {
3470 
3471  diverged_lot = TRUE;
3472  divergence_level = i;
3473 
3474  n_rows = 0;
3475 
3476  if (slot1->nth_rec < slot1->n_recs) {
3477  n_rows += slot1->n_recs
3478  - slot1->nth_rec;
3479  }
3480 
3481  if (slot2->nth_rec > 1) {
3482  n_rows += slot2->nth_rec - 1;
3483  }
3484  }
3485  } else if (diverged_lot) {
3486 
3487  n_rows = btr_estimate_n_rows_in_range_on_level(
3488  index, slot1, slot2, n_rows,
3489  &is_n_rows_exact);
3490  }
3491  }
3492 }
3493 
3494 /*******************************************************************/
3499 static
3500 void
3501 btr_record_not_null_field_in_rec(
3502 /*=============================*/
3503  rec_t* rec,
3504  ulint n_unique,
3507  const ulint* offsets,
3510  ib_int64_t* n_not_null)
3512 {
3513  ulint i;
3514 
3515  ut_ad(rec_offs_n_fields(offsets) >= n_unique);
3516 
3517  if (n_not_null == NULL) {
3518  return;
3519  }
3520 
3521  for (i = 0; i < n_unique; i++) {
3522  ulint rec_len;
3523 
3524  rec_get_nth_field(rec, offsets, i, &rec_len);
3525 
3526  if (rec_len != UNIV_SQL_NULL) {
3527  n_not_null[i]++;
3528  } else {
3529  /* Break if we hit the first NULL value */
3530  break;
3531  }
3532  }
3533 }
3534 
3535 /*******************************************************************/
3542 UNIV_INTERN
3543 void
3545 /*======================================*/
3546  dict_index_t* index)
3548  btr_cur_t cursor;
3549  page_t* page;
3550  rec_t* rec;
3551  ulint n_cols;
3552  ulint matched_fields;
3553  ulint matched_bytes;
3554  ib_int64_t* n_diff;
3555  ib_int64_t* n_not_null;
3556  ibool stats_null_not_equal;
3557  ullint n_sample_pages; /* number of pages to sample */
3558  ulint not_empty_flag = 0;
3559  ulint total_external_size = 0;
3560  ulint i;
3561  ulint j;
3562  ullint add_on;
3563  mtr_t mtr;
3564  mem_heap_t* heap = NULL;
3565  ulint* offsets_rec = NULL;
3566  ulint* offsets_next_rec = NULL;
3567 
3568  n_cols = dict_index_get_n_unique(index);
3569 
3570  heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
3571  * (n_cols + 1)
3572  + dict_index_get_n_fields(index)
3573  * (sizeof *offsets_rec
3574  + sizeof *offsets_next_rec));
3575 
3576  n_diff = static_cast<ib_int64_t *>(mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)));
3577 
3578  n_not_null = NULL;
3579 
3580  /* Check srv_innodb_stats_method setting, and decide whether we
3581  need to record non-null value and also decide if NULL is
3582  considered equal (by setting stats_null_not_equal value) */
3583  switch (srv_innodb_stats_method) {
3584  case SRV_STATS_NULLS_IGNORED:
3585  n_not_null = static_cast<ib_int64_t *>(mem_heap_zalloc(heap, (n_cols + 1)
3586  * sizeof *n_not_null));
3587  /* fall through */
3588 
3589  case SRV_STATS_NULLS_UNEQUAL:
3590  /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
3591  case, we will treat NULLs as unequal value */
3592  stats_null_not_equal = TRUE;
3593  break;
3594 
3595  case SRV_STATS_NULLS_EQUAL:
3596  stats_null_not_equal = FALSE;
3597  break;
3598 
3599  default:
3600  ut_error;
3601  }
3602 
3603  /* It makes no sense to test more pages than are contained
3604  in the index, thus we lower the number if it is too high */
3605  if (srv_stats_sample_pages > index->stat_index_size) {
3606  if (index->stat_index_size > 0) {
3607  n_sample_pages = index->stat_index_size;
3608  } else {
3609  n_sample_pages = 1;
3610  }
3611  } else {
3612  n_sample_pages = srv_stats_sample_pages;
3613  }
3614 
3615  /* We sample some pages in the index to get an estimate */
3616 
3617  for (i = 0; i < n_sample_pages; i++) {
3618  mtr_start(&mtr);
3619 
3620  btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
3621 
3622  /* Count the number of different key values for each prefix of
3623  the key on this index page. If the prefix does not determine
3624  the index record uniquely in the B-tree, then we subtract one
3625  because otherwise our algorithm would give a wrong estimate
3626  for an index where there is just one key value. */
3627 
3628  page = btr_cur_get_page(&cursor);
3629 
3630  rec = page_rec_get_next(page_get_infimum_rec(page));
3631 
3632  if (!page_rec_is_supremum(rec)) {
3633  not_empty_flag = 1;
3634  offsets_rec = rec_get_offsets(rec, index, offsets_rec,
3635  ULINT_UNDEFINED, &heap);
3636 
3637  if (n_not_null) {
3638  btr_record_not_null_field_in_rec(
3639  rec, n_cols, offsets_rec, n_not_null);
3640  }
3641  }
3642 
3643  while (!page_rec_is_supremum(rec)) {
3644  rec_t* next_rec = page_rec_get_next(rec);
3645  if (page_rec_is_supremum(next_rec)) {
3646  total_external_size +=
3647  btr_rec_get_externally_stored_len(
3648  rec, offsets_rec);
3649  break;
3650  }
3651 
3652  matched_fields = 0;
3653  matched_bytes = 0;
3654  offsets_next_rec = rec_get_offsets(next_rec, index,
3655  offsets_next_rec,
3656  ULINT_UNDEFINED,
3657  &heap);
3658 
3659  cmp_rec_rec_with_match(rec, next_rec,
3660  offsets_rec, offsets_next_rec,
3661  index, stats_null_not_equal,
3662  &matched_fields,
3663  &matched_bytes);
3664 
3665  for (j = matched_fields + 1; j <= n_cols; j++) {
3666  /* We add one if this index record has
3667  a different prefix from the previous */
3668 
3669  n_diff[j]++;
3670  }
3671 
3672  if (n_not_null) {
3673  btr_record_not_null_field_in_rec(
3674  next_rec, n_cols, offsets_next_rec,
3675  n_not_null);
3676  }
3677 
3678  total_external_size
3679  += btr_rec_get_externally_stored_len(
3680  rec, offsets_rec);
3681 
3682  rec = next_rec;
3683  /* Initialize offsets_rec for the next round
3684  and assign the old offsets_rec buffer to
3685  offsets_next_rec. */
3686  {
3687  ulint* offsets_tmp = offsets_rec;
3688  offsets_rec = offsets_next_rec;
3689  offsets_next_rec = offsets_tmp;
3690  }
3691  }
3692 
3693 
3694  if (n_cols == dict_index_get_n_unique_in_tree(index)) {
3695 
3696  /* If there is more than one leaf page in the tree,
3697  we add one because we know that the first record
3698  on the page certainly had a different prefix than the
3699  last record on the previous index page in the
3700  alphabetical order. Before this fix, if there was
3701  just one big record on each clustered index page, the
3702  algorithm grossly underestimated the number of rows
3703  in the table. */
3704 
3705  if (btr_page_get_prev(page, &mtr) != FIL_NULL
3706  || btr_page_get_next(page, &mtr) != FIL_NULL) {
3707 
3708  n_diff[n_cols]++;
3709  }
3710  }
3711 
3712  mtr_commit(&mtr);
3713  }
3714 
3715  /* If we saw k borders between different key values on
3716  n_sample_pages leaf pages, we can estimate how many
3717  there will be in index->stat_n_leaf_pages */
3718 
3719  /* We must take into account that our sample actually represents
3720  also the pages used for external storage of fields (those pages are
3721  included in index->stat_n_leaf_pages) */
3722 
3723  for (j = 0; j <= n_cols; j++) {
3724  index->stat_n_diff_key_vals[j]
3725  = BTR_TABLE_STATS_FROM_SAMPLE(
3726  n_diff[j], index, n_sample_pages,
3727  total_external_size, not_empty_flag);
3728 
3729  /* If the tree is small, smaller than
3730  10 * n_sample_pages + total_external_size, then
3731  the above estimate is ok. For bigger trees it is common that we
3732  do not see any borders between key values in the few pages
3733  we pick. But still there may be n_sample_pages
3734  different key values, or even more. Let us try to approximate
3735  that: */
3736 
3737  add_on = index->stat_n_leaf_pages
3738  / (10 * (n_sample_pages
3739  + total_external_size));
3740 
3741  if (add_on > n_sample_pages) {
3742  add_on = n_sample_pages;
3743  }
3744 
3745  index->stat_n_diff_key_vals[j] += add_on;
3746 
3747  /* Update the stat_n_non_null_key_vals[] with our
3748  sampled result. stat_n_non_null_key_vals[] is created
3749  and initialized to zero in dict_index_add_to_cache(),
3750  along with stat_n_diff_key_vals[] array */
3751  if (n_not_null != NULL && (j < n_cols)) {
3752  index->stat_n_non_null_key_vals[j] =
3753  BTR_TABLE_STATS_FROM_SAMPLE(
3754  n_not_null[j], index, n_sample_pages,
3755  total_external_size, not_empty_flag);
3756  }
3757  }
3758 
3759  mem_heap_free(heap);
3760 }
3761 
3762 /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
3763 
3764 /***********************************************************/
3767 static
3768 ulint
3769 btr_rec_get_field_ref_offs(
3770 /*=======================*/
3771  const ulint* offsets,
3772  ulint n)
3773 {
3774  ulint field_ref_offs;
3775  ulint local_len;
3776 
3777  ut_a(rec_offs_nth_extern(offsets, n));
3778  field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len);
3779  ut_a(local_len != UNIV_SQL_NULL);
3780  ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3781 
3782  return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE);
3783 }
3784 
3790 #define btr_rec_get_field_ref(rec, offsets, n) \
3791  ((rec) + btr_rec_get_field_ref_offs(offsets, n))
3792 
3793 /***********************************************************/
3796 static
3797 ulint
3798 btr_rec_get_externally_stored_len(
3799 /*==============================*/
3800  const rec_t* rec,
3801  const ulint* offsets)
3802 {
3803  ulint n_fields;
3804  ulint total_extern_len = 0;
3805  ulint i;
3806 
3807  ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3808 
3809  if (!rec_offs_any_extern(offsets)) {
3810  return(0);
3811  }
3812 
3813  n_fields = rec_offs_n_fields(offsets);
3814 
3815  for (i = 0; i < n_fields; i++) {
3816  if (rec_offs_nth_extern(offsets, i)) {
3817 
3818  ulint extern_len = mach_read_from_4(
3819  btr_rec_get_field_ref(rec, offsets, i)
3820  + BTR_EXTERN_LEN + 4);
3821 
3822  total_extern_len += ut_calc_align(extern_len,
3823  UNIV_PAGE_SIZE);
3824  }
3825  }
3826 
3827  return(total_extern_len / UNIV_PAGE_SIZE);
3828 }
3829 
3830 /*******************************************************************/
3832 static
3833 void
3834 btr_cur_set_ownership_of_extern_field(
3835 /*==================================*/
3836  page_zip_des_t* page_zip,
3838  rec_t* rec,
3839  dict_index_t* index,
3840  const ulint* offsets,
3841  ulint i,
3842  ibool val,
3843  mtr_t* mtr)
3844 {
3845  byte* data;
3846  ulint local_len;
3847  ulint byte_val;
3848 
3849  data = rec_get_nth_field(rec, offsets, i, &local_len);
3850  ut_ad(rec_offs_nth_extern(offsets, i));
3851  ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3852 
3853  local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3854 
3855  byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN);
3856 
3857  if (val) {
3858  byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
3859  } else {
3860 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
3861  ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG));
3862 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
3863  byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
3864  }
3865 
3866  if (UNIV_LIKELY_NULL(page_zip)) {
3867  mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
3868  page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
3869  } else if (UNIV_LIKELY(mtr != NULL)) {
3870 
3871  mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
3872  MLOG_1BYTE, mtr);
3873  } else {
3874  mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
3875  }
3876 
3877  btr_blob_dbg_owner(rec, index, offsets, i, val);
3878 }
3879 
3880 /*******************************************************************/
3885 UNIV_INTERN
3886 void
3888 /*============================*/
3889  page_zip_des_t* page_zip,
3891  rec_t* rec,
3892  dict_index_t* index,
3893  const ulint* offsets,
3894  const upd_t* update,
3895  mtr_t* mtr)
3896 {
3897  ulint i;
3898 
3899  ut_ad(rec_offs_validate(rec, index, offsets));
3900  ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3901  ut_ad(rec_offs_any_extern(offsets));
3902  ut_ad(mtr);
3903 
3904  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3905  if (rec_offs_nth_extern(offsets, i)
3906  && !upd_get_field_by_field_no(update, i)) {
3907  btr_cur_set_ownership_of_extern_field(
3908  page_zip, rec, index, offsets, i, FALSE, mtr);
3909  }
3910  }
3911 }
3912 
3913 /*******************************************************************/
3917 static
3918 void
3919 btr_cur_unmark_extern_fields(
3920 /*=========================*/
3921  page_zip_des_t* page_zip,
3923  rec_t* rec,
3924  dict_index_t* index,
3925  const ulint* offsets,
3926  mtr_t* mtr)
3927 {
3928  ulint n;
3929  ulint i;
3930 
3931  ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3932  n = rec_offs_n_fields(offsets);
3933 
3934  if (!rec_offs_any_extern(offsets)) {
3935 
3936  return;
3937  }
3938 
3939  for (i = 0; i < n; i++) {
3940  if (rec_offs_nth_extern(offsets, i)) {
3941 
3942  btr_cur_set_ownership_of_extern_field(
3943  page_zip, rec, index, offsets, i, TRUE, mtr);
3944  }
3945  }
3946 }
3947 
3948 /*******************************************************************/
3953 UNIV_INTERN
3954 ulint
3956 /*==========================*/
3957  dtuple_t* tuple,
3958  const upd_t* update,
3959  mem_heap_t* heap)
3960 {
3961  ulint n_pushed = 0;
3962  ulint n;
3963  const upd_field_t* uf;
3964 
3965  ut_ad(tuple);
3966  ut_ad(update);
3967 
3968  uf = update->fields;
3969  n = upd_get_n_fields(update);
3970 
3971  for (; n--; uf++) {
3972  if (dfield_is_ext(&uf->new_val)) {
3973  dfield_t* field
3974  = dtuple_get_nth_field(tuple, uf->field_no);
3975 
3976  if (!dfield_is_ext(field)) {
3977  dfield_set_ext(field);
3978  n_pushed++;
3979  }
3980 
3981  switch (uf->orig_len) {
3982  byte* data;
3983  ulint len;
3984  byte* buf;
3985  case 0:
3986  break;
3988  /* Restore the original locally stored
3989  part of the column. In the undo log,
3990  InnoDB writes a longer prefix of externally
3991  stored columns, so that column prefixes
3992  in secondary indexes can be reconstructed. */
3993  dfield_set_data(field, (byte*) dfield_get_data(field)
3994  + dfield_get_len(field)
3997  dfield_set_ext(field);
3998  break;
3999  default:
4000  /* Reconstruct the original locally
4001  stored part of the column. The data
4002  will have to be copied. */
4004 
4005  data = (unsigned char *)dfield_get_data(field);
4006  len = dfield_get_len(field);
4007 
4008  buf = (unsigned char *)mem_heap_alloc(heap, uf->orig_len);
4009  /* Copy the locally stored prefix. */
4010  memcpy(buf, data,
4011  uf->orig_len
4013  /* Copy the BLOB pointer. */
4014  memcpy(buf + uf->orig_len
4016  data + len - BTR_EXTERN_FIELD_REF_SIZE,
4018 
4019  dfield_set_data(field, buf, uf->orig_len);
4020  dfield_set_ext(field);
4021  }
4022  }
4023  }
4024 
4025  return(n_pushed);
4026 }
4027 
4028 /*******************************************************************/
4031 static
4032 ulint
4033 btr_blob_get_part_len(
4034 /*==================*/
4035  const byte* blob_header)
4036 {
4037  return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
4038 }
4039 
4040 /*******************************************************************/
4043 static
4044 ulint
4045 btr_blob_get_next_page_no(
4046 /*======================*/
4047  const byte* blob_header)
4048 {
4049  return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
4050 }
4051 
4052 /*******************************************************************/
4054 static
4055 void
4056 btr_blob_free(
4057 /*==========*/
4058  buf_block_t* block,
4059  ibool all,
4061  mtr_t* mtr)
4062 {
4063  buf_pool_t* buf_pool = buf_pool_from_block(block);
4064  ulint space = buf_block_get_space(block);
4065  ulint page_no = buf_block_get_page_no(block);
4066 
4067  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4068 
4069  mtr_commit(mtr);
4070 
4071  buf_pool_mutex_enter(buf_pool);
4072  mutex_enter(&block->mutex);
4073 
4074  /* Only free the block if it is still allocated to
4075  the same file page. */
4076 
4077  if (buf_block_get_state(block)
4079  && buf_block_get_space(block) == space
4080  && buf_block_get_page_no(block) == page_no) {
4081 
4082  if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
4083  && all && block->page.zip.data) {
4084  /* Attempt to deallocate the uncompressed page
4085  if the whole block cannot be deallocted. */
4086 
4087  buf_LRU_free_block(&block->page, FALSE);
4088  }
4089  }
4090 
4091  buf_pool_mutex_exit(buf_pool);
4092  mutex_exit(&block->mutex);
4093 }
4094 
4095 /*******************************************************************/
4101 UNIV_INTERN
4102 ulint
4104 /*=================================*/
4105  dict_index_t* index,
4107  buf_block_t* rec_block,
4108  rec_t* rec,
4109  const ulint* offsets,
4113 #ifdef UNIV_DEBUG
4114  mtr_t* local_mtr,
4116 #endif /* UNIV_DEBUG */
4117 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
4118  ibool update_in_place,
4120 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
4121  const big_rec_t*big_rec_vec)
4123 {
4124  ulint rec_page_no;
4125  byte* field_ref;
4126  ulint extern_len;
4127  ulint store_len;
4128  ulint page_no;
4129  ulint space_id;
4130  ulint zip_size;
4131  ulint prev_page_no;
4132  ulint hint_page_no;
4133  ulint i;
4134  mtr_t mtr;
4135  mem_heap_t* heap = NULL;
4136  page_zip_des_t* page_zip;
4137  z_stream c_stream;
4138 
4139  ut_ad(rec_offs_validate(rec, index, offsets));
4140  ut_ad(rec_offs_any_extern(offsets));
4141  ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
4142  MTR_MEMO_X_LOCK));
4143  ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
4144  ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
4145  ut_a(dict_index_is_clust(index));
4146 
4147  page_zip = buf_block_get_page_zip(rec_block);
4149  == buf_block_get_zip_size(rec_block));
4150 
4151  space_id = buf_block_get_space(rec_block);
4152  zip_size = buf_block_get_zip_size(rec_block);
4153  rec_page_no = buf_block_get_page_no(rec_block);
4155 
4156  if (UNIV_LIKELY_NULL(page_zip)) {
4157  int err;
4158 
4159  /* Zlib deflate needs 128 kilobytes for the default
4160  window size, plus 512 << memLevel, plus a few
4161  kilobytes for small objects. We use reduced memLevel
4162  to limit the memory consumption, and preallocate the
4163  heap, hoping to avoid memory fragmentation. */
4164  heap = mem_heap_create(250000);
4165  page_zip_set_alloc(&c_stream, heap);
4166 
4167  err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
4168  Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
4169  ut_a(err == Z_OK);
4170  }
4171 
4172 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
4173  /* All pointers to externally stored columns in the record
4174  must either be zero or they must be pointers to inherited
4175  columns, owned by this record or an earlier record version. */
4176  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
4177  if (!rec_offs_nth_extern(offsets, i)) {
4178  continue;
4179  }
4180  field_ref = btr_rec_get_field_ref(rec, offsets, i);
4181 
4182  ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
4183  /* Either this must be an update in place,
4184  or the BLOB must be inherited, or the BLOB pointer
4185  must be zero (will be written in this function). */
4186  ut_a(update_in_place
4187  || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
4188  || !memcmp(field_ref, field_ref_zero,
4190  }
4191 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
4192  /* We have to create a file segment to the tablespace
4193  for each field and put the pointer to the field in rec */
4194 
4195  for (i = 0; i < big_rec_vec->n_fields; i++) {
4196  field_ref = btr_rec_get_field_ref(
4197  rec, offsets, big_rec_vec->fields[i].field_no);
4198 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
4199  /* A zero BLOB pointer should have been initially inserted. */
4200  ut_a(!memcmp(field_ref, field_ref_zero,
4202 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
4203  extern_len = big_rec_vec->fields[i].len;
4204  UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data,
4205  extern_len);
4206 
4207  ut_a(extern_len > 0);
4208 
4209  prev_page_no = FIL_NULL;
4210 
4211  if (UNIV_LIKELY_NULL(page_zip)) {
4212  int err = deflateReset(&c_stream);
4213  ut_a(err == Z_OK);
4214 
4215  c_stream.next_in = (Bytef *) big_rec_vec->fields[i].data;
4216  c_stream.avail_in = extern_len;
4217  }
4218 
4219  for (;;) {
4220  buf_block_t* block;
4221  page_t* page;
4222 
4223  mtr_start(&mtr);
4224 
4225  if (prev_page_no == FIL_NULL) {
4226  hint_page_no = 1 + rec_page_no;
4227  } else {
4228  hint_page_no = prev_page_no + 1;
4229  }
4230 
4231  block = btr_page_alloc(index, hint_page_no,
4232  FSP_NO_DIR, 0, &mtr);
4233  if (UNIV_UNLIKELY(block == NULL)) {
4234 
4235  mtr_commit(&mtr);
4236 
4237  if (UNIV_LIKELY_NULL(page_zip)) {
4238  deflateEnd(&c_stream);
4239  mem_heap_free(heap);
4240  }
4241 
4242  return(DB_OUT_OF_FILE_SPACE);
4243  }
4244 
4245  page_no = buf_block_get_page_no(block);
4246  page = buf_block_get_frame(block);
4247 
4248  if (prev_page_no != FIL_NULL) {
4249  buf_block_t* prev_block;
4250  page_t* prev_page;
4251 
4252  prev_block = buf_page_get(space_id, zip_size,
4253  prev_page_no,
4254  RW_X_LATCH, &mtr);
4255  buf_block_dbg_add_level(prev_block,
4256  SYNC_EXTERN_STORAGE);
4257  prev_page = buf_block_get_frame(prev_block);
4258 
4259  if (UNIV_LIKELY_NULL(page_zip)) {
4261  prev_page + FIL_PAGE_NEXT,
4262  page_no, MLOG_4BYTES, &mtr);
4263  memcpy(buf_block_get_page_zip(
4264  prev_block)
4265  ->data + FIL_PAGE_NEXT,
4266  prev_page + FIL_PAGE_NEXT, 4);
4267  } else {
4269  prev_page + FIL_PAGE_DATA
4270  + BTR_BLOB_HDR_NEXT_PAGE_NO,
4271  page_no, MLOG_4BYTES, &mtr);
4272  }
4273 
4274  }
4275 
4276  if (UNIV_LIKELY_NULL(page_zip)) {
4277  int err;
4278  page_zip_des_t* blob_page_zip;
4279 
4280  /* Write FIL_PAGE_TYPE to the redo log
4281  separately, before logging any other
4282  changes to the page, so that the debug
4283  assertions in
4284  recv_parse_or_apply_log_rec_body() can
4285  be made simpler. Before InnoDB Plugin
4286  1.0.4, the initialization of
4287  FIL_PAGE_TYPE was logged as part of
4288  the mlog_log_string() below. */
4289 
4291  prev_page_no == FIL_NULL
4294  MLOG_2BYTES, &mtr);
4295 
4296  c_stream.next_out = page
4297  + FIL_PAGE_DATA;
4298  c_stream.avail_out
4299  = page_zip_get_size(page_zip)
4300  - FIL_PAGE_DATA;
4301 
4302  err = deflate(&c_stream, Z_FINISH);
4303  ut_a(err == Z_OK || err == Z_STREAM_END);
4304  ut_a(err == Z_STREAM_END
4305  || c_stream.avail_out == 0);
4306 
4307  /* Write the "next BLOB page" pointer */
4309  FIL_NULL, MLOG_4BYTES, &mtr);
4310  /* Initialize the unused "prev page" pointer */
4312  FIL_NULL, MLOG_4BYTES, &mtr);
4313  /* Write a back pointer to the record
4314  into the otherwise unused area. This
4315  information could be useful in
4316  debugging. Later, we might want to
4317  implement the possibility to relocate
4318  BLOB pages. Then, we would need to be
4319  able to adjust the BLOB pointer in the
4320  record. We do not store the heap
4321  number of the record, because it can
4322  change in page_zip_reorganize() or
4323  btr_page_reorganize(). However, also
4324  the page number of the record may
4325  change when B-tree nodes are split or
4326  merged. */
4327  mlog_write_ulint(page
4329  space_id,
4330  MLOG_4BYTES, &mtr);
4331  mlog_write_ulint(page
4333  rec_page_no,
4334  MLOG_4BYTES, &mtr);
4335 
4336  /* Zero out the unused part of the page. */
4337  memset(page + page_zip_get_size(page_zip)
4338  - c_stream.avail_out,
4339  0, c_stream.avail_out);
4341  page_zip_get_size(page_zip)
4343  &mtr);
4344  /* Copy the page to compressed storage,
4345  because it will be flushed to disk
4346  from there. */
4347  blob_page_zip = buf_block_get_page_zip(block);
4348  ut_ad(blob_page_zip);
4349  ut_ad(page_zip_get_size(blob_page_zip)
4350  == page_zip_get_size(page_zip));
4351  memcpy(blob_page_zip->data, page,
4352  page_zip_get_size(page_zip));
4353 
4354  if (err == Z_OK && prev_page_no != FIL_NULL) {
4355 
4356  goto next_zip_page;
4357  }
4358 
4359  rec_block = buf_page_get(space_id, zip_size,
4360  rec_page_no,
4361  RW_X_LATCH, &mtr);
4362  buf_block_dbg_add_level(rec_block,
4363  SYNC_NO_ORDER_CHECK);
4364 
4365  if (err == Z_STREAM_END) {
4366  mach_write_to_4(field_ref
4367  + BTR_EXTERN_LEN, 0);
4368  mach_write_to_4(field_ref
4369  + BTR_EXTERN_LEN + 4,
4370  c_stream.total_in);
4371  } else {
4372  memset(field_ref + BTR_EXTERN_LEN,
4373  0, 8);
4374  }
4375 
4376  if (prev_page_no == FIL_NULL) {
4377  btr_blob_dbg_add_blob(
4378  rec, big_rec_vec->fields[i]
4379  .field_no, page_no, index,
4380  "store");
4381 
4382  mach_write_to_4(field_ref
4384  space_id);
4385 
4386  mach_write_to_4(field_ref
4388  page_no);
4389 
4390  mach_write_to_4(field_ref
4392  FIL_PAGE_NEXT);
4393  }
4394 
4396  page_zip, rec, index, offsets,
4397  big_rec_vec->fields[i].field_no, &mtr);
4398 
4399 next_zip_page:
4400  prev_page_no = page_no;
4401 
4402  /* Commit mtr and release the
4403  uncompressed page frame to save memory. */
4404  btr_blob_free(block, FALSE, &mtr);
4405 
4406  if (err == Z_STREAM_END) {
4407  break;
4408  }
4409  } else {
4412  MLOG_2BYTES, &mtr);
4413 
4414  if (extern_len > (UNIV_PAGE_SIZE
4415  - FIL_PAGE_DATA
4416  - BTR_BLOB_HDR_SIZE
4417  - FIL_PAGE_DATA_END)) {
4418  store_len = UNIV_PAGE_SIZE
4419  - FIL_PAGE_DATA
4420  - BTR_BLOB_HDR_SIZE
4422  } else {
4423  store_len = extern_len;
4424  }
4425 
4426  mlog_write_string(page + FIL_PAGE_DATA
4427  + BTR_BLOB_HDR_SIZE,
4428  (const byte*)
4429  big_rec_vec->fields[i].data
4430  + big_rec_vec->fields[i].len
4431  - extern_len,
4432  store_len, &mtr);
4433  mlog_write_ulint(page + FIL_PAGE_DATA
4434  + BTR_BLOB_HDR_PART_LEN,
4435  store_len, MLOG_4BYTES, &mtr);
4436  mlog_write_ulint(page + FIL_PAGE_DATA
4437  + BTR_BLOB_HDR_NEXT_PAGE_NO,
4438  FIL_NULL, MLOG_4BYTES, &mtr);
4439 
4440  extern_len -= store_len;
4441 
4442  rec_block = buf_page_get(space_id, zip_size,
4443  rec_page_no,
4444  RW_X_LATCH, &mtr);
4445  buf_block_dbg_add_level(rec_block,
4446  SYNC_NO_ORDER_CHECK);
4447 
4448  mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
4449  MLOG_4BYTES, &mtr);
4450  mlog_write_ulint(field_ref
4451  + BTR_EXTERN_LEN + 4,
4452  big_rec_vec->fields[i].len
4453  - extern_len,
4454  MLOG_4BYTES, &mtr);
4455 
4456  if (prev_page_no == FIL_NULL) {
4457  btr_blob_dbg_add_blob(
4458  rec, big_rec_vec->fields[i]
4459  .field_no, page_no, index,
4460  "store");
4461 
4462  mlog_write_ulint(field_ref
4464  space_id,
4465  MLOG_4BYTES, &mtr);
4466 
4467  mlog_write_ulint(field_ref
4469  page_no,
4470  MLOG_4BYTES, &mtr);
4471 
4472  mlog_write_ulint(field_ref
4474  FIL_PAGE_DATA,
4475  MLOG_4BYTES, &mtr);
4476  }
4477 
4478  prev_page_no = page_no;
4479 
4480  mtr_commit(&mtr);
4481 
4482  if (extern_len == 0) {
4483  break;
4484  }
4485  }
4486  }
4487  }
4488 
4489  if (UNIV_LIKELY_NULL(page_zip)) {
4490  deflateEnd(&c_stream);
4491  mem_heap_free(heap);
4492  }
4493 
4494 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
4495  /* All pointers to externally stored columns in the record
4496  must be valid. */
4497  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
4498  if (!rec_offs_nth_extern(offsets, i)) {
4499  continue;
4500  }
4501 
4502  field_ref = btr_rec_get_field_ref(rec, offsets, i);
4503 
4504  /* The pointer must not be zero. */
4505  ut_a(0 != memcmp(field_ref, field_ref_zero,
4507  /* The column must not be disowned by this record. */
4508  ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
4509  }
4510 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
4511  return(DB_SUCCESS);
4512 }
4513 
4514 /*******************************************************************/
4516 static
4517 void
4518 btr_check_blob_fil_page_type(
4519 /*=========================*/
4520  ulint space_id,
4521  ulint page_no,
4522  const page_t* page,
4523  ibool read)
4524 {
4525  ulint type = fil_page_get_type(page);
4526 
4527  ut_a(space_id == page_get_space_id(page));
4528  ut_a(page_no == page_get_page_no(page));
4529 
4530  if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) {
4531  ulint flags = fil_space_get_flags(space_id);
4532 
4533 #ifndef UNIV_DEBUG /* Improve debug test coverage */
4534  if (UNIV_LIKELY
4535  ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) {
4536  /* Old versions of InnoDB did not initialize
4537  FIL_PAGE_TYPE on BLOB pages. Do not print
4538  anything about the type mismatch when reading
4539  a BLOB page that is in Antelope format.*/
4540  return;
4541  }
4542 #endif /* !UNIV_DEBUG */
4543 
4544  ut_print_timestamp(stderr);
4545  fprintf(stderr,
4546  " InnoDB: FIL_PAGE_TYPE=%lu"
4547  " on BLOB %s space %lu page %lu flags %lx\n",
4548  (ulong) type, read ? "read" : "purge",
4549  (ulong) space_id, (ulong) page_no, (ulong) flags);
4550  ut_error;
4551  }
4552 }
4553 
4554 /*******************************************************************/
4559 UNIV_INTERN
4560 void
4562 /*=============================*/
4563  dict_index_t* index,
4571  byte* field_ref,
4572  const rec_t* rec,
4574  const ulint* offsets,
4576  page_zip_des_t* page_zip,
4578  ulint i,
4580  enum trx_rb_ctx rb_ctx,
4581  mtr_t* /*local_mtr __attribute__((unused))*/)
4584 {
4585  page_t* page;
4586  ulint space_id;
4587  ulint rec_zip_size = dict_table_zip_size(index->table);
4588  ulint ext_zip_size;
4589  ulint page_no;
4590  ulint next_page_no;
4591  mtr_t mtr;
4592 
4593  ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
4594  MTR_MEMO_X_LOCK));
4595  ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
4596  MTR_MEMO_PAGE_X_FIX));
4597  ut_ad(!rec || rec_offs_validate(rec, index, offsets));
4598  ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i));
4599 
4600  if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
4602  /* In the rollback of uncommitted transactions, we may
4603  encounter a clustered index record whose BLOBs have
4604  not been written. There is nothing to free then. */
4605  ut_a(rb_ctx == RB_RECOVERY || rb_ctx == RB_RECOVERY_PURGE_REC);
4606  return;
4607  }
4608 
4609  space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
4610 
4611  if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
4612  ext_zip_size = fil_space_get_zip_size(space_id);
4613  /* This must be an undo log record in the system tablespace,
4614  that is, in row_purge_upd_exist_or_extern().
4615  Currently, externally stored records are stored in the
4616  same tablespace as the referring records. */
4617  ut_ad(!page_get_space_id(page_align(field_ref)));
4618  ut_ad(!rec);
4619  ut_ad(!page_zip);
4620  } else {
4621  ext_zip_size = rec_zip_size;
4622  }
4623 
4624  if (!rec) {
4625  /* This is a call from row_purge_upd_exist_or_extern(). */
4626  ut_ad(!page_zip);
4627  rec_zip_size = 0;
4628  }
4629 
4630 #ifdef UNIV_BLOB_DEBUG
4631  if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)
4632  && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
4633  && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) {
4634  /* This off-page column will be freed.
4635  Check that no references remain. */
4636 
4637  btr_blob_dbg_t b;
4638 
4639  b.blob_page_no = mach_read_from_4(
4640  field_ref + BTR_EXTERN_PAGE_NO);
4641 
4642  if (rec) {
4643  /* Remove the reference from the record to the
4644  BLOB. If the BLOB were not freed, the
4645  reference would be removed when the record is
4646  removed. Freeing the BLOB will overwrite the
4647  BTR_EXTERN_PAGE_NO in the field_ref of the
4648  record with FIL_NULL, which would make the
4649  btr_blob_dbg information inconsistent with the
4650  record. */
4651  b.ref_page_no = page_get_page_no(page_align(rec));
4652  b.ref_heap_no = page_rec_get_heap_no(rec);
4653  b.ref_field_no = i;
4654  btr_blob_dbg_rbt_delete(index, &b, "free");
4655  }
4656 
4657  btr_blob_dbg_assert_empty(index, b.blob_page_no);
4658  }
4659 #endif /* UNIV_BLOB_DEBUG */
4660 
4661  for (;;) {
4662 #ifdef UNIV_SYNC_DEBUG
4663  buf_block_t* rec_block;
4664 #endif /* UNIV_SYNC_DEBUG */
4665  buf_block_t* ext_block;
4666 
4667  mtr_start(&mtr);
4668 
4669 #ifdef UNIV_SYNC_DEBUG
4670  rec_block =
4671 #endif /* UNIV_SYNC_DEBUG */
4673  page_align(field_ref)),
4674  rec_zip_size,
4676  page_align(field_ref)),
4677  RW_X_LATCH, &mtr);
4678  buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
4679  page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
4680 
4681  if (/* There is no external storage data */
4682  page_no == FIL_NULL
4683  /* This field does not own the externally stored field */
4684  || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4685  & BTR_EXTERN_OWNER_FLAG)
4686  /* Rollback and inherited field */
4687  || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
4688  && (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4689  & BTR_EXTERN_INHERITED_FLAG))) {
4690 
4691  /* Do not free */
4692  mtr_commit(&mtr);
4693 
4694  return;
4695  }
4696 
4697  ext_block = buf_page_get(space_id, ext_zip_size, page_no,
4698  RW_X_LATCH, &mtr);
4699  buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
4700  page = buf_block_get_frame(ext_block);
4701 
4702  if (ext_zip_size) {
4703  /* Note that page_zip will be NULL
4704  in row_purge_upd_exist_or_extern(). */
4705  switch (fil_page_get_type(page)) {
4706  case FIL_PAGE_TYPE_ZBLOB:
4707  case FIL_PAGE_TYPE_ZBLOB2:
4708  break;
4709  default:
4710  ut_error;
4711  }
4712  next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
4713 
4714  btr_page_free_low(index, ext_block, 0, &mtr);
4715 
4716  if (UNIV_LIKELY(page_zip != NULL)) {
4717  mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
4718  next_page_no);
4719  mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
4720  0);
4721  page_zip_write_blob_ptr(page_zip, rec, index,
4722  offsets, i, &mtr);
4723  } else {
4724  mlog_write_ulint(field_ref
4726  next_page_no,
4727  MLOG_4BYTES, &mtr);
4728  mlog_write_ulint(field_ref
4729  + BTR_EXTERN_LEN + 4, 0,
4730  MLOG_4BYTES, &mtr);
4731  }
4732  } else {
4733  ut_a(!page_zip);
4734  btr_check_blob_fil_page_type(space_id, page_no, page,
4735  FALSE);
4736 
4737  next_page_no = mach_read_from_4(
4738  page + FIL_PAGE_DATA
4739  + BTR_BLOB_HDR_NEXT_PAGE_NO);
4740 
4741  /* We must supply the page level (= 0) as an argument
4742  because we did not store it on the page (we save the
4743  space overhead from an index page header. */
4744 
4745  btr_page_free_low(index, ext_block, 0, &mtr);
4746 
4748  next_page_no,
4749  MLOG_4BYTES, &mtr);
4750  /* Zero out the BLOB length. If the server
4751  crashes during the execution of this function,
4752  trx_rollback_or_clean_all_recovered() could
4753  dereference the half-deleted BLOB, fetching a
4754  wrong prefix for the BLOB. */
4755  mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
4756  0,
4757  MLOG_4BYTES, &mtr);
4758  }
4759 
4760  /* Commit mtr and release the BLOB block to save memory. */
4761  btr_blob_free(ext_block, TRUE, &mtr);
4762  }
4763 }
4764 
4765 /***********************************************************/
4767 static
4768 void
4769 btr_rec_free_externally_stored_fields(
4770 /*==================================*/
4771  dict_index_t* index,
4773  rec_t* rec,
4774  const ulint* offsets,
4775  page_zip_des_t* page_zip,
4777  enum trx_rb_ctx rb_ctx,
4778  mtr_t* mtr)
4781 {
4782  ulint n_fields;
4783  ulint i;
4784 
4785  ut_ad(rec_offs_validate(rec, index, offsets));
4786  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
4787  /* Free possible externally stored fields in the record */
4788 
4789  ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
4790  n_fields = rec_offs_n_fields(offsets);
4791 
4792  for (i = 0; i < n_fields; i++) {
4793  if (rec_offs_nth_extern(offsets, i)) {
4795  index, btr_rec_get_field_ref(rec, offsets, i),
4796  rec, offsets, page_zip, i, rb_ctx, mtr);
4797  }
4798  }
4799 }
4800 
4801 /***********************************************************/
4804 static
4805 void
4806 btr_rec_free_updated_extern_fields(
4807 /*===============================*/
4808  dict_index_t* index,
4810  rec_t* rec,
4811  page_zip_des_t* page_zip,
4813  const ulint* offsets,
4814  const upd_t* update,
4815  enum trx_rb_ctx rb_ctx,
4816  mtr_t* mtr)
4818 {
4819  ulint n_fields;
4820  ulint i;
4821 
4822  ut_ad(rec_offs_validate(rec, index, offsets));
4823  ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
4824 
4825  /* Free possible externally stored fields in the record */
4826 
4827  n_fields = upd_get_n_fields(update);
4828 
4829  for (i = 0; i < n_fields; i++) {
4830  const upd_field_t* ufield = upd_get_nth_field(update, i);
4831 
4832  if (rec_offs_nth_extern(offsets, ufield->field_no)) {
4833  ulint len;
4834  byte* data = rec_get_nth_field(
4835  rec, offsets, ufield->field_no, &len);
4837 
4839  index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
4840  rec, offsets, page_zip,
4841  ufield->field_no, rb_ctx, mtr);
4842  }
4843  }
4844 }
4845 
4846 /*******************************************************************/
4850 static
4851 ulint
4852 btr_copy_blob_prefix(
4853 /*=================*/
4854  byte* buf,
4856  ulint len,
4857  ulint space_id,
4858  ulint page_no,
4859  ulint offset)
4860 {
4861  ulint copied_len = 0;
4862 
4863  for (;;) {
4864  mtr_t mtr;
4865  buf_block_t* block;
4866  const page_t* page;
4867  const byte* blob_header;
4868  ulint part_len;
4869  ulint copy_len;
4870 
4871  mtr_start(&mtr);
4872 
4873  block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
4874  buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
4875  page = buf_block_get_frame(block);
4876 
4877  btr_check_blob_fil_page_type(space_id, page_no, page, TRUE);
4878 
4879  blob_header = page + offset;
4880  part_len = btr_blob_get_part_len(blob_header);
4881  copy_len = ut_min(part_len, len - copied_len);
4882 
4883  memcpy(buf + copied_len,
4884  blob_header + BTR_BLOB_HDR_SIZE, copy_len);
4885  copied_len += copy_len;
4886 
4887  page_no = btr_blob_get_next_page_no(blob_header);
4888 
4889  mtr_commit(&mtr);
4890 
4891  if (page_no == FIL_NULL || copy_len != part_len) {
4892  UNIV_MEM_ASSERT_RW(buf, copied_len);
4893  return(copied_len);
4894  }
4895 
4896  /* On other BLOB pages except the first the BLOB header
4897  always is at the page data start: */
4898 
4899  offset = FIL_PAGE_DATA;
4900 
4901  ut_ad(copied_len <= len);
4902  }
4903 }
4904 
4905 /*******************************************************************/
4909 static
4910 ulint
4911 btr_copy_zblob_prefix(
4912 /*==================*/
4913  byte* buf,
4915  ulint len,
4916  ulint zip_size,
4917  ulint space_id,
4918  ulint page_no,
4919  ulint offset)
4920 {
4921  ulint page_type = FIL_PAGE_TYPE_ZBLOB;
4922  mem_heap_t* heap;
4923  int err;
4924  z_stream d_stream;
4925 
4926  d_stream.next_out = buf;
4927  d_stream.avail_out = len;
4928  d_stream.next_in = Z_NULL;
4929  d_stream.avail_in = 0;
4930 
4931  /* Zlib inflate needs 32 kilobytes for the default
4932  window size, plus a few kilobytes for small objects. */
4933  heap = mem_heap_create(40000);
4934  page_zip_set_alloc(&d_stream, heap);
4935 
4936  ut_ad(ut_is_2pow(zip_size));
4937  ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
4938  ut_ad(zip_size <= UNIV_PAGE_SIZE);
4939  ut_ad(space_id);
4940 
4941  err = inflateInit(&d_stream);
4942  ut_a(err == Z_OK);
4943 
4944  for (;;) {
4945  buf_page_t* bpage;
4946  ulint next_page_no;
4947 
4948  /* There is no latch on bpage directly. Instead,
4949  bpage is protected by the B-tree page latch that
4950  is being held on the clustered index record, or,
4951  in row_merge_copy_blobs(), by an exclusive table lock. */
4952  bpage = buf_page_get_zip(space_id, zip_size, page_no);
4953 
4954  if (UNIV_UNLIKELY(!bpage)) {
4955  ut_print_timestamp(stderr);
4956  fprintf(stderr,
4957  " InnoDB: Cannot load"
4958  " compressed BLOB"
4959  " page %lu space %lu\n",
4960  (ulong) page_no, (ulong) space_id);
4961  goto func_exit;
4962  }
4963 
4964  if (UNIV_UNLIKELY
4965  (fil_page_get_type(bpage->zip.data) != page_type)) {
4966  ut_print_timestamp(stderr);
4967  fprintf(stderr,
4968  " InnoDB: Unexpected type %lu of"
4969  " compressed BLOB"
4970  " page %lu space %lu\n",
4971  (ulong) fil_page_get_type(bpage->zip.data),
4972  (ulong) page_no, (ulong) space_id);
4973  goto end_of_blob;
4974  }
4975 
4976  next_page_no = mach_read_from_4(bpage->zip.data + offset);
4977 
4978  if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
4979  /* When the BLOB begins at page header,
4980  the compressed data payload does not
4981  immediately follow the next page pointer. */
4982  offset = FIL_PAGE_DATA;
4983  } else {
4984  offset += 4;
4985  }
4986 
4987  d_stream.next_in = bpage->zip.data + offset;
4988  d_stream.avail_in = zip_size - offset;
4989 
4990  err = inflate(&d_stream, Z_NO_FLUSH);
4991  switch (err) {
4992  case Z_OK:
4993  if (!d_stream.avail_out) {
4994  goto end_of_blob;
4995  }
4996  break;
4997  case Z_STREAM_END:
4998  if (next_page_no == FIL_NULL) {
4999  goto end_of_blob;
5000  }
5001  /* fall through */
5002  default:
5003 inflate_error:
5004  ut_print_timestamp(stderr);
5005  fprintf(stderr,
5006  " InnoDB: inflate() of"
5007  " compressed BLOB"
5008  " page %lu space %lu returned %d (%s)\n",
5009  (ulong) page_no, (ulong) space_id,
5010  err, d_stream.msg);
5011  case Z_BUF_ERROR:
5012  goto end_of_blob;
5013  }
5014 
5015  if (next_page_no == FIL_NULL) {
5016  if (!d_stream.avail_in) {
5017  ut_print_timestamp(stderr);
5018  fprintf(stderr,
5019  " InnoDB: unexpected end of"
5020  " compressed BLOB"
5021  " page %lu space %lu\n",
5022  (ulong) page_no,
5023  (ulong) space_id);
5024  } else {
5025  err = inflate(&d_stream, Z_FINISH);
5026  switch (err) {
5027  case Z_STREAM_END:
5028  case Z_BUF_ERROR:
5029  break;
5030  default:
5031  goto inflate_error;
5032  }
5033  }
5034 
5035 end_of_blob:
5036  buf_page_release_zip(bpage);
5037  goto func_exit;
5038  }
5039 
5040  buf_page_release_zip(bpage);
5041 
5042  /* On other BLOB pages except the first
5043  the BLOB header always is at the page header: */
5044 
5045  page_no = next_page_no;
5046  offset = FIL_PAGE_NEXT;
5047  page_type = FIL_PAGE_TYPE_ZBLOB2;
5048  }
5049 
5050 func_exit:
5051  inflateEnd(&d_stream);
5052  mem_heap_free(heap);
5053  UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
5054  return(d_stream.total_out);
5055 }
5056 
5057 /*******************************************************************/
5062 static
5063 ulint
5064 btr_copy_externally_stored_field_prefix_low(
5065 /*========================================*/
5066  byte* buf,
5068  ulint len,
5069  ulint zip_size,
5071  ulint space_id,
5072  ulint page_no,
5073  ulint offset)
5074 {
5075  if (UNIV_UNLIKELY(len == 0)) {
5076  return(0);
5077  }
5078 
5079  if (UNIV_UNLIKELY(zip_size)) {
5080  return(btr_copy_zblob_prefix(buf, len, zip_size,
5081  space_id, page_no, offset));
5082  } else {
5083  return(btr_copy_blob_prefix(buf, len, space_id,
5084  page_no, offset));
5085  }
5086 }
5087 
5088 /*******************************************************************/
5093 UNIV_INTERN
5094 ulint
5096 /*====================================*/
5097  byte* buf,
5098  ulint len,
5099  ulint zip_size,
5101  const byte* data,
5105  ulint local_len)
5106 {
5107  ulint space_id;
5108  ulint page_no;
5109  ulint offset;
5110 
5111  ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
5112 
5113  local_len -= BTR_EXTERN_FIELD_REF_SIZE;
5114 
5115  if (UNIV_UNLIKELY(local_len >= len)) {
5116  memcpy(buf, data, len);
5117  return(len);
5118  }
5119 
5120  memcpy(buf, data, local_len);
5121  data += local_len;
5122 
5124 
5125  if (!mach_read_from_4(data + BTR_EXTERN_LEN + 4)) {
5126  /* The externally stored part of the column has been
5127  (partially) deleted. Signal the half-deleted BLOB
5128  to the caller. */
5129 
5130  return(0);
5131  }
5132 
5133  space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
5134 
5135  page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
5136 
5137  offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
5138 
5139  return(local_len
5140  + btr_copy_externally_stored_field_prefix_low(buf + local_len,
5141  len - local_len,
5142  zip_size,
5143  space_id, page_no,
5144  offset));
5145 }
5146 
5147 /*******************************************************************/
5151 static
5152 byte*
5153 btr_copy_externally_stored_field(
5154 /*=============================*/
5155  ulint* len,
5156  const byte* data,
5160  ulint zip_size,
5162  ulint local_len,
5163  mem_heap_t* heap)
5164 {
5165  ulint space_id;
5166  ulint page_no;
5167  ulint offset;
5168  ulint extern_len;
5169  byte* buf;
5170 
5171  ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
5172 
5173  local_len -= BTR_EXTERN_FIELD_REF_SIZE;
5174 
5175  space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
5176 
5177  page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
5178 
5179  offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
5180 
5181  /* Currently a BLOB cannot be bigger than 4 GB; we
5182  leave the 4 upper bytes in the length field unused */
5183 
5184  extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
5185 
5186  buf = (unsigned char *)mem_heap_alloc(heap, local_len + extern_len);
5187 
5188  memcpy(buf, data, local_len);
5189  *len = local_len
5190  + btr_copy_externally_stored_field_prefix_low(buf + local_len,
5191  extern_len,
5192  zip_size,
5193  space_id,
5194  page_no, offset);
5195 
5196  return(buf);
5197 }
5198 
5199 /*******************************************************************/
5202 UNIV_INTERN
5203 byte*
5205 /*=================================*/
5206  const rec_t* rec,
5208  const ulint* offsets,
5209  ulint zip_size,
5211  ulint no,
5212  ulint* len,
5213  mem_heap_t* heap)
5214 {
5215  ulint local_len;
5216  const byte* data;
5217 
5218  ut_a(rec_offs_nth_extern(offsets, no));
5219 
5220  /* An externally stored field can contain some initial
5221  data from the field, and in the last 20 bytes it has the
5222  space id, page number, and offset where the rest of the
5223  field data is stored, and the data length in addition to
5224  the data stored locally. We may need to store some data
5225  locally to get the local record length above the 128 byte
5226  limit so that field offsets are stored in two bytes, and
5227  the extern bit is available in those two bytes. */
5228 
5229  data = rec_get_nth_field(rec, offsets, no, &local_len);
5230 
5231  ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
5232 
5233  if (UNIV_UNLIKELY
5234  (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
5236  /* The externally stored field was not written yet.
5237  This record should only be seen by
5238  recv_recovery_rollback_active() or any
5239  TRX_ISO_READ_UNCOMMITTED transactions. */
5240  return(NULL);
5241  }
5242 
5243  return(btr_copy_externally_stored_field(len, data,
5244  zip_size, local_len, heap));
5245 }
5246 #endif /* !UNIV_HOTBACKUP */
#define FIL_PAGE_DATA_END
Definition: fil0fil.h:169
UNIV_INLINE rec_t * page_cur_tuple_insert(page_cur_t *cursor, const dtuple_t *tuple, dict_index_t *index, ulint n_ext, mtr_t *mtr)
#define buf_page_get_with_no_latch(SP, ZS, OF, MTR)
Definition: buf0buf.h:325
#define FIL_PAGE_NEXT
Definition: fil0fil.h:96
UNIV_INTERN void btr_estimate_number_of_different_key_vals(dict_index_t *index)
Definition: btr0cur.cc:3547
ulint nth_rec
Definition: btr0cur.h:651
UNIV_INLINE void buf_page_release_zip(buf_page_t *bpage)
#define buf_pool_mutex_enter(b)
Definition: buf0buf.h:1765
UNIV_INLINE ulint page_is_comp(const page_t *page)
ulint btr_cur_n_non_sea_old
Definition: btr0cur.cc:94
UNIV_INTERN void page_zip_write_blob_ptr(page_zip_des_t *page_zip, const byte *rec, dict_index_t *index, const ulint *offsets, ulint n, mtr_t *mtr) __attribute__((nonnull(1
#define BTR_DELETE
Definition: btr0btr.h:94
UNIV_INTERN byte * btr_rec_copy_externally_stored_field(const rec_t *rec, const ulint *offsets, ulint zip_size, ulint no, ulint *len, mem_heap_t *heap)
Definition: btr0cur.cc:5207
#define BTR_INSERT
Definition: btr0btr.h:77
UNIV_INTERN void ibuf_update_free_bits_zip(buf_block_t *block, mtr_t *mtr)
Definition: ibuf0ibuf.cc:1046
UNIV_INTERN void btr_node_ptr_delete(dict_index_t *index, buf_block_t *block, mtr_t *mtr)
Definition: btr0btr.cc:2947
ulint stat_index_size
Definition: dict0mem.h:392
UNIV_INTERN void lock_update_insert(const buf_block_t *block, const rec_t *rec)
Definition: lock0lock.cc:3129
UNIV_INLINE ulint page_get_space_id(const page_t *page)
UNIV_INTERN void dtuple_convert_back_big_rec(dict_index_t *index, dtuple_t *entry, big_rec_t *vector)
Definition: data0data.cc:768
#define btr_search_latch
Definition: btr0sea.h:290
ib_id_t roll_ptr_t
Definition: trx0types.h:87
UNIV_INTERN ibool btr_page_get_split_rec_to_left(btr_cur_t *cursor, rec_t **split_rec)
Definition: btr0btr.cc:1921
UNIV_INTERN void btr_search_update_hash_on_delete(btr_cur_t *cursor)
Definition: btr0sea.cc:1494
ulint n_fields
Definition: data0data.h:447
UNIV_INLINE ulint dict_index_is_sec_or_ibuf(const dict_index_t *index) __attribute__((pure))
UNIV_INTERN ibool btr_search_guess_on_hash(dict_index_t *index, btr_search_t *info, const dtuple_t *tuple, ulint mode, ulint latch_mode, btr_cur_t *cursor, ulint has_search_latch, mtr_t *mtr)
Definition: btr0sea.cc:815
UNIV_INTERN dtuple_t * dict_index_build_node_ptr(const dict_index_t *index, const rec_t *rec, ulint page_no, mem_heap_t *heap, ulint level)
Definition: dict0dict.cc:4089
#define FIL_PAGE_INDEX
Definition: fil0fil.h:173
trx_id_t id
Definition: trx0trx.h:548
#define BTR_CUR_PAGE_COMPRESS_LIMIT
Definition: btr0cur.h:644
UNIV_INTERN ibool trx_is_recv(const trx_t *trx)
Definition: trx0roll.cc:387
UNIV_INTERN void page_cur_search_with_match(const buf_block_t *block, const dict_index_t *index, const dtuple_t *tuple, ulint mode, ulint *iup_matched_fields, ulint *iup_matched_bytes, ulint *ilow_matched_fields, ulint *ilow_matched_bytes, page_cur_t *cursor)
Definition: page0cur.cc:246
dict_field_t * fields
Definition: dict0mem.h:371
UNIV_INTERN byte * btr_cur_parse_update_in_place(byte *ptr, byte *end_ptr, page_t *page, page_zip_des_t *page_zip, dict_index_t *index)
Definition: btr0cur.cc:1684
UNIV_INLINE page_t * page_align(const void *ptr) __attribute__((const ))
ulint btr_cur_n_sea
Definition: btr0cur.cc:90
UNIV_INTERN void row_upd_rec_sys_fields_in_recovery(rec_t *rec, page_zip_des_t *page_zip, const ulint *offsets, ulint pos, trx_id_t trx_id, roll_ptr_t roll_ptr)
Definition: row0upd.cc:349
UNIV_INLINE void mach_write_to_4(byte *b, ulint n)
UNIV_INLINE void ibuf_update_free_bits_if_full(buf_block_t *block, ulint max_ins_size, ulint increase)
UNIV_INTERN ibool btr_cur_optimistic_delete(btr_cur_t *cursor, mtr_t *mtr)
Definition: btr0cur.cc:2906
UNIV_INTERN ibool page_zip_compress(page_zip_des_t *page_zip, const page_t *page, dict_index_t *index, mtr_t *mtr) __attribute__((nonnull(1
UNIV_INLINE ulint dict_index_get_n_fields(const dict_index_t *index)
unsigned space
Definition: dict0mem.h:343
UNIV_INLINE ulint page_get_page_no(const page_t *page)
UNIV_INLINE ulint page_cur_search(const buf_block_t *block, const dict_index_t *index, const dtuple_t *tuple, ulint mode, page_cur_t *cursor)
ulint tree_height
Definition: btr0cur.h:708
UNIV_INLINE ulint buf_block_get_zip_size(const buf_block_t *block) __attribute__((pure))
unsigned type
Definition: dict0mem.h:347
que_thr_t * thr
Definition: btr0cur.h:697
#define mtr_s_lock(B, MTR)
Definition: mtr0mtr.h:336
UNIV_INTERN buf_page_t * buf_page_get_zip(ulint space, ulint zip_size, ulint offset)
Definition: buf0buf.cc:2306
purge_node_t * purge_node
Definition: btr0cur.h:690
UNIV_INTERN void btr_cur_search_to_nth_level(dict_index_t *index, ulint level, const dtuple_t *tuple, ulint mode, ulint latch_mode, btr_cur_t *cursor, ulint has_search_latch, const char *file, ulint line, mtr_t *mtr)
Definition: btr0cur.cc:346
#define BUF_GET_IF_IN_POOL
Definition: buf0buf.h:44
UNIV_INLINE ulint rec_offs_any_extern(const ulint *offsets)
UNIV_INTERN ibool btr_cur_update_alloc_zip(page_zip_des_t *page_zip, buf_block_t *block, dict_index_t *index, ulint length, ibool create, mtr_t *mtr) UNIV_WARN_UNUSED_RESULT_NONNULL
Definition: btr0cur.cc:1764
UNIV_INLINE ibool page_zip_available(const page_zip_des_t *page_zip, ibool is_clust, ulint length, ulint create) __attribute__((nonnull
UNIV_INLINE ulint page_get_n_recs(const page_t *page)
trx_rb_ctx
Definition: trx0types.h:68
UNIV_INTERN ulint btr_copy_externally_stored_field_prefix(byte *buf, ulint len, ulint zip_size, const byte *data, ulint local_len)
Definition: btr0cur.cc:5098
UNIV_INLINE ulint dfield_is_ext(const dfield_t *field)
#define BUF_GET
Definition: buf0buf.h:43
UNIV_INTERN void page_cur_delete_rec(page_cur_t *cursor, dict_index_t *index, const ulint *offsets, mtr_t *mtr)
Definition: page0cur.cc:1858
UNIV_INTERN void page_zip_set_alloc(void *stream, mem_heap_t *heap)
Definition: page0zip.cc:674
UNIV_INLINE ibool ibuf_inside(const mtr_t *mtr) __attribute__((nonnull
UNIV_INLINE ulint page_get_data_size(const page_t *page)
#define FIL_PAGE_TYPE_BLOB
Definition: fil0fil.h:184
#define FIL_PAGE_TYPE_ZBLOB2
Definition: fil0fil.h:186
UNIV_INLINE void mlog_close(mtr_t *mtr, byte *ptr)
#define TRX_ID_FMT
Definition: trx0types.h:33
#define MLOG_1BYTE
Definition: mtr0mtr.h:73
#define BTR_ESTIMATE
Definition: btr0btr.h:81
UNIV_INTERN void mlog_write_string(byte *ptr, const byte *str, ulint len, mtr_t *mtr)
Definition: mtr0log.cc:330
UNIV_INLINE ibool dict_table_is_comp(const dict_table_t *table)
UNIV_INLINE ulint rec_offs_nth_extern(const ulint *offsets, ulint n)
UNIV_INTERN rec_t * btr_root_raise_and_insert(btr_cur_t *cursor, const dtuple_t *tuple, ulint n_ext, mtr_t *mtr)
Definition: btr0btr.cc:1752
ulint up_match
Definition: btr0cur.h:711
buf_block_t * left_block
Definition: btr0cur.h:691
UNIV_INTERN ulint btr_cur_optimistic_update(ulint flags, btr_cur_t *cursor, const upd_t *update, ulint cmpl_info, que_thr_t *thr, mtr_t *mtr)
Definition: btr0cur.cc:1952
#define FIL_PAGE_TYPE
Definition: fil0fil.h:118
UNIV_INTERN void rec_print_new(FILE *file, const rec_t *rec, const ulint *offsets)
Definition: rem0rec.cc:1722
UNIV_INTERN ulint btr_cur_pessimistic_insert(ulint flags, btr_cur_t *cursor, dtuple_t *entry, rec_t **rec, big_rec_t **big_rec, ulint n_ext, que_thr_t *thr, mtr_t *mtr)
Definition: btr0cur.cc:1429
UNIV_INTERN void mlog_write_ulint(byte *ptr, ulint val, byte type, mtr_t *mtr)
Definition: mtr0log.cc:247
UNIV_INTERN rec_t * btr_page_split_and_insert(btr_cur_t *cursor, const dtuple_t *tuple, ulint n_ext, mtr_t *mtr)
Definition: btr0btr.cc:2445
UNIV_INLINE ibool page_cur_is_before_first(const page_cur_t *cur)
UNIV_INTERN ulint rec_get_converted_size_comp_prefix(const dict_index_t *index, const dfield_t *fields, ulint n_fields, ulint *extra)
Definition: rem0rec.cc:771
UNIV_INTERN ibool row_purge_poss_sec(purge_node_t *node, dict_index_t *index, const dtuple_t *entry)
Definition: row0purge.cc:254
UNIV_INTERN void row_upd_index_write_log(const upd_t *update, byte *log_ptr, mtr_t *mtr)
Definition: row0upd.cc:631
#define DICT_TF_FORMAT_51
Definition: dict0mem.h:87
UNIV_INTERN void btr_cur_open_at_rnd_pos_func(dict_index_t *index, ulint latch_mode, btr_cur_t *cursor, const char *file, ulint line, mtr_t *mtr)
Definition: btr0cur.cc:949
UNIV_INTERN void page_cur_open_on_rnd_user_rec(buf_block_t *block, page_cur_t *cursor)
Definition: page0cur.cc:545
UNIV_INTERN ulint fil_space_get_zip_size(ulint id)
Definition: fil0fil.cc:1535
UNIV_INTERN void UNIV_INTERN ulint btr_store_big_rec_extern_fields_func(dict_index_t *index, buf_block_t *rec_block, rec_t *rec, const ulint *offsets, const big_rec_t *big_rec_vec) __attribute__((nonnull))
Definition: btr0cur.cc:4106
UNIV_INLINE rec_t * page_rec_get_next(rec_t *rec)
UNIV_INLINE btr_search_t * btr_search_get_info(dict_index_t *index)
UNIV_INLINE void page_cur_set_before_first(const buf_block_t *block, page_cur_t *cur)
UNIV_INLINE ulint btr_page_get_level_low(const page_t *page)
#define FIL_PAGE_FILE_FLUSH_LSN
Definition: fil0fil.h:147
UNIV_INTERN byte * mlog_open_and_write_index(mtr_t *mtr, const byte *rec, dict_index_t *index, byte type, ulint size)
Definition: mtr0log.cc:440
UNIV_INTERN void rec_print(FILE *file, const rec_t *rec, const dict_index_t *index)
Definition: rem0rec.cc:1750
ibool last_hash_succ
Definition: btr0sea.h:224
#define mem_heap_free(heap)
Definition: mem0mem.h:117
unsigned orig_len
Definition: row0upd.h:377
buf_page_t page
Definition: buf0buf.h:1433
#define btr_block_get(space, zip_size, page_no, mode, mtr)
Definition: btr0btr.h:212
UNIV_INLINE ulint btr_page_get_prev(const page_t *page, mtr_t *mtr)
#define MLOG_COMP_REC_CLUST_DELETE_MARK
Definition: mtr0mtr.h:187
btr_path_t * path_arr
Definition: btr0cur.h:766
enum btr_cur_method flag
Definition: btr0cur.h:707
ulint up_bytes
Definition: btr0cur.h:725
UNIV_INLINE page_t * btr_cur_get_page(btr_cur_t *cursor)
UNIV_INLINE ulint rw_lock_get_writer(const rw_lock_t *lock)
ibool check_index_page_at_flush
Definition: buf0buf.h:1467
UNIV_INLINE ulint dict_index_get_page(const dict_index_t *tree)
UNIV_INLINE ibool page_zip_rec_needs_ext(ulint rec_size, ulint comp, ulint n_fields, ulint zip_size) __attribute__((const ))
UNIV_INTERN void dtuple_print(FILE *f, const dtuple_t *tuple)
Definition: data0data.cc:532
#define MLOG_REC_UPDATE_IN_PLACE
Definition: mtr0mtr.h:84
UNIV_INLINE ulint btr_page_get_level(const page_t *page, mtr_t *mtr)
UNIV_INLINE ulint dtuple_get_n_fields(const dtuple_t *tuple)
#define PAGE_ZIP_MIN_SIZE
Definition: page0types.h:56
UNIV_INTERN byte * row_upd_parse_sys_vals(byte *ptr, byte *end_ptr, ulint *pos, trx_id_t *trx_id, roll_ptr_t *roll_ptr)
Definition: row0upd.cc:598
UNIV_INLINE void rec_set_deleted_flag_new(rec_t *rec, page_zip_des_t *page_zip, ulint flag)
UNIV_INLINE ulint btr_page_get_next(const page_t *page, mtr_t *mtr)
UNIV_INTERN byte * row_upd_index_parse(byte *ptr, byte *end_ptr, mem_heap_t *heap, upd_t **update_out)
Definition: row0upd.cc:704
ulint low_bytes
Definition: btr0cur.h:737
UNIV_INTERN ibool fsp_reserve_free_extents(ulint *n_reserved, ulint space, ulint n_ext, ulint alloc_type, mtr_t *mtr)
Definition: fsp0fsp.cc:2955
UNIV_INTERN void row_upd_rec_in_place(rec_t *rec, dict_index_t *index, const ulint *offsets, const upd_t *update, page_zip_des_t *page_zip)
Definition: row0upd.cc:486
#define BTR_EXTERN_OFFSET
Definition: btr0cur.h:790
UNIV_INTERN big_rec_t * dtuple_convert_big_rec(dict_index_t *index, dtuple_t *entry, ulint *n_ext)
Definition: data0data.cc:566
UNIV_INLINE ulint dict_index_get_space(const dict_index_t *index)
UNIV_INTERN ulint lock_clust_rec_modify_check_and_lock(ulint flags, const buf_block_t *block, const rec_t *rec, dict_index_t *index, const ulint *offsets, que_thr_t *thr)
Definition: lock0lock.cc:5227
The buffer pool structure.
Definition: buf0buf.h:1607
UNIV_INLINE ulint ut_min(ulint n1, ulint n2)
UNIV_INLINE ulint dict_index_get_n_unique_in_tree(const dict_index_t *index)
UNIV_INTERN enum buf_lru_free_block_status buf_LRU_free_block(buf_page_t *bpage, ibool zip) __attribute__((nonnull))
Definition: buf0lru.cc:1437
UNIV_INTERN ibool ibuf_insert(ibuf_op_t op, const dtuple_t *entry, dict_index_t *index, ulint space, ulint zip_size, ulint page_no, que_thr_t *thr)
Definition: ibuf0ibuf.cc:3754
UNIV_INLINE ulint rec_get_deleted_flag(const rec_t *rec, ulint comp)
UNIV_INTERN ulint btr_cur_optimistic_insert(ulint flags, btr_cur_t *cursor, dtuple_t *entry, rec_t **rec, big_rec_t **big_rec, ulint n_ext, que_thr_t *thr, mtr_t *mtr)
Definition: btr0cur.cc:1167
UNIV_INLINE void page_update_max_trx_id(buf_block_t *block, page_zip_des_t *page_zip, trx_id_t trx_id, mtr_t *mtr)
UNIV_INTERN void btr_discard_page(btr_cur_t *cursor, mtr_t *mtr)
Definition: btr0btr.cc:3464
upd_field_t * fields
Definition: row0upd.h:393
UNIV_INTERN ulint ibuf_rec_get_counter(const rec_t *rec)
Definition: ibuf0ibuf.cc:1458
UNIV_INTERN ibool row_upd_changes_field_size_or_external(dict_index_t *index, const ulint *offsets, const upd_t *update)
Definition: row0upd.cc:418
UNIV_INLINE ulint buf_block_get_page_no(const buf_block_t *block) __attribute__((pure))
UNIV_INLINE ulint rec_get_nth_field_offs(const ulint *offsets, ulint n, ulint *len)
UNIV_INTERN void mtr_commit(mtr_t *mtr) __attribute__((nonnull))
Definition: mtr0mtr.cc:247
UNIV_INLINE index_id_t btr_page_get_index_id(const page_t *page)
UNIV_INLINE void dfield_set_data(dfield_t *field, const void *data, ulint len)
UNIV_INTERN ib_int64_t btr_estimate_n_rows_in_range(dict_index_t *index, const dtuple_t *tuple1, ulint mode1, const dtuple_t *tuple2, ulint mode2)
Definition: btr0cur.cc:3342
UNIV_INLINE ulint dict_table_zip_size(const dict_table_t *table)
#define MLOG_REC_SEC_DELETE_MARK
Definition: mtr0mtr.h:81
UNIV_INLINE byte * mlog_write_initial_log_record_fast(const byte *ptr, byte type, byte *log_ptr, mtr_t *mtr)
UNIV_INLINE ulint dfield_get_len(const dfield_t *field)
UNIV_INTERN int cmp_rec_rec_with_match(const rec_t *rec1, const rec_t *rec2, const ulint *offsets1, const ulint *offsets2, dict_index_t *index, ibool nulls_unequal, ulint *matched_fields, ulint *matched_bytes)
Definition: rem0cmp.cc:857
UNIV_INTERN void lock_update_delete(const buf_block_t *block, const rec_t *rec)
Definition: lock0lock.cc:3162
unsigned is_hashed
Definition: buf0buf.h:1524
UNIV_INTERN ibool btr_page_get_split_rec_to_right(btr_cur_t *cursor, rec_t **split_rec)
Definition: btr0btr.cc:1966
UNIV_INLINE ulint page_rec_is_comp(const rec_t *rec)
#define BTR_IGNORE_SEC_UNIQUE
Definition: btr0btr.h:86
UNIV_INLINE enum buf_page_state buf_block_get_state(const buf_block_t *block) __attribute__((pure))
UNIV_INLINE ulint rec_offs_comp(const ulint *offsets)
UNIV_INTERN ibool btr_cur_compress_if_useful(btr_cur_t *cursor, mtr_t *mtr)
Definition: btr0cur.cc:2882
UNIV_INTERN void btr_cur_open_at_index_side_func(ibool from_left, dict_index_t *index, ulint latch_mode, btr_cur_t *cursor, const char *file, ulint line, mtr_t *mtr)
Definition: btr0cur.cc:817
page_zip_des_t zip
Definition: buf0buf.h:1308
UNIV_INLINE ulint page_get_max_insert_size(const page_t *page, ulint n_recs)
UNIV_INLINE ibool page_is_leaf(const page_t *page) __attribute__((nonnull
UNIV_INLINE ulint page_get_max_insert_size_after_reorganize(const page_t *page, ulint n_recs)
UNIV_INTERN void btr_cur_set_deleted_flag_for_ibuf(rec_t *rec, page_zip_des_t *page_zip, ibool val, mtr_t *mtr)
Definition: btr0cur.cc:2853
#define btr_page_get(space, zip_size, page_no, mode, mtr)
Definition: btr0btr.h:221
UNIV_INTERN ibool btr_compress(btr_cur_t *cursor, mtr_t *mtr)
Definition: btr0btr.cc:3106
ib_int64_t * stat_n_diff_key_vals
Definition: dict0mem.h:379
#define FIL_PAGE_TYPE_ZBLOB
Definition: fil0fil.h:185
UNIV_INTERN ulint fil_space_get_flags(ulint id)
Definition: fil0fil.cc:1483
#define FIL_PAGE_PREV
Definition: fil0fil.h:83
UNIV_INLINE ulint dict_index_is_clust(const dict_index_t *index) __attribute__((pure))
UNIV_INTERN void btr_cur_disown_inherited_fields(page_zip_des_t *page_zip, rec_t *rec, dict_index_t *index, const ulint *offsets, const upd_t *update, mtr_t *mtr) __attribute__((nonnull(2
UNIV_INTERN void lock_rec_store_on_page_infimum(const buf_block_t *block, const rec_t *rec)
Definition: lock0lock.cc:3207
#define MLOG_2BYTES
Definition: mtr0mtr.h:74
mutex_t mutex
Definition: buf0buf.h:1452
UNIV_INTERN void row_upd_index_entry_sys_field(dtuple_t *entry, dict_index_t *index, ulint type, ib_uint64_t val)
Definition: row0upd.cc:382
UNIV_INTERN ibool dtuple_check_typed(const dtuple_t *tuple)
UNIV_INLINE ulint page_offset(const void *ptr) __attribute__((const ))
UNIV_INTERN ulint trx_undo_report_row_operation(ulint flags, ulint op_type, que_thr_t *thr, dict_index_t *index, const dtuple_t *clust_entry, const upd_t *update, ulint cmpl_info, const rec_t *rec, roll_ptr_t *roll_ptr)
Definition: trx0rec.cc:1151
UNIV_INTERN ulint btr_cur_del_mark_set_clust_rec(ulint flags, buf_block_t *block, rec_t *rec, dict_index_t *index, const ulint *offsets, ibool val, que_thr_t *thr, mtr_t *mtr) __attribute__((nonnull))
Definition: btr0cur.cc:2642
UNIV_INLINE rw_lock_t * dict_index_get_lock(dict_index_t *index)
UNIV_INTERN void lock_rec_restore_from_page_infimum(const buf_block_t *block, const rec_t *rec, const buf_block_t *donator)
Definition: lock0lock.cc:3232
UNIV_INLINE rec_t * btr_cur_get_rec(btr_cur_t *cursor)
dfield_t new_val
Definition: row0upd.h:385
#define buf_block_get_page_zip(block)
Definition: buf0buf.h:1034
#define ut_is_2pow(n)
Definition: ut0ut.h:162
#define ut_a(EXPR)
Definition: ut0dbg.h:105
#define MLOG_REC_CLUST_DELETE_MARK
Definition: mtr0mtr.h:78
UNIV_INLINE void page_cur_move_to_next(page_cur_t *cur)
UNIV_INTERN void btr_search_update_hash_on_insert(btr_cur_t *cursor)
Definition: btr0sea.cc:1597
UNIV_INLINE void * mem_heap_alloc(mem_heap_t *heap, ulint n)
#define mem_heap_create(N)
Definition: mem0mem.h:97
UNIV_INLINE ibool page_rec_is_supremum(const rec_t *rec) __attribute__((const ))
dict_table_t * table
Definition: dict0mem.h:341
UNIV_INLINE void * mem_heap_zalloc(mem_heap_t *heap, ulint n)
UNIV_INTERN void btr_free_externally_stored_field(dict_index_t *index, byte *field_ref, const rec_t *rec, const ulint *offsets, page_zip_des_t *page_zip, ulint i, enum trx_rb_ctx rb_ctx, mtr_t *local_mtr)
Definition: btr0cur.cc:4564
UNIV_INTERN ulint fil_page_get_type(const byte *page)
Definition: fil0fil.cc:4915
UNIV_INTERN ibool btr_page_reorganize(buf_block_t *block, dict_index_t *index, mtr_t *mtr)
Definition: btr0btr.cc:1671
UNIV_INLINE void mach_write_to_1(byte *b, ulint n)
#define BTR_PATH_ARRAY_N_SLOTS
Definition: btr0cur.h:664
dfield_t * fields
Definition: data0data.h:455
UNIV_INTERN buf_block_t * buf_page_get_gen(ulint space, ulint zip_size, ulint offset, ulint rw_latch, buf_block_t *guess, ulint mode, const char *file, ulint line, mtr_t *mtr)
Definition: buf0buf.cc:2712
unsigned m_nonempty
Definition: page0types.h:74
UNIV_INLINE ulint rec_offs_n_fields(const ulint *offsets)
UNIV_INLINE byte * mlog_open(mtr_t *mtr, ulint size)
UNIV_INTERN ulint btr_cur_del_mark_set_sec_rec(ulint flags, btr_cur_t *cursor, ibool val, que_thr_t *thr, mtr_t *mtr)
Definition: btr0cur.cc:2799
UNIV_INTERN void btr_page_free_low(dict_index_t *index, buf_block_t *block, ulint level, mtr_t *mtr)
Definition: btr0btr.cc:1032
#define BTR_EXTERN_INHERITED_FLAG
Definition: btr0cur.h:815
UNIV_INTERN byte * btr_cur_parse_del_mark_set_sec_rec(byte *ptr, byte *end_ptr, page_t *page, page_zip_des_t *page_zip)
Definition: btr0cur.cc:2756
bool btr_search_enabled
Definition: btr0sea.cc:48
#define MLOG_4BYTES
Definition: mtr0mtr.h:75
#define rw_lock_s_lock(M)
Definition: sync0rw.h:155
#define BTR_EXTERN_SPACE_ID
Definition: btr0cur.h:788
#define MLOG_COMP_REC_UPDATE_IN_PLACE
Definition: mtr0mtr.h:206
UNIV_INLINE void btr_search_info_update(dict_index_t *index, btr_cur_t *cursor)
UNIV_INLINE dict_index_t * btr_cur_get_index(btr_cur_t *cursor)
UNIV_INTERN void ibuf_reset_free_bits(buf_block_t *block)
Definition: ibuf0ibuf.cc:989
UNIV_INTERN ibool btr_cur_pessimistic_delete(ulint *err, ibool has_reserved_extents, btr_cur_t *cursor, enum trx_rb_ctx rb_ctx, mtr_t *mtr)
Definition: btr0cur.cc:2995
#define BTR_DELETE_MARK
Definition: btr0btr.h:90
index_id_t id
Definition: dict0mem.h:337
#define ut_ad(EXPR)
Definition: ut0dbg.h:127
#define mtr_x_lock(B, MTR)
Definition: mtr0mtr.h:340
UNIV_INTERN ulint lock_rec_insert_check_and_lock(ulint flags, const rec_t *rec, buf_block_t *block, dict_index_t *index, que_thr_t *thr, mtr_t *mtr, ibool *inherit)
Definition: lock0lock.cc:5054
UNIV_INTERN void btr_set_min_rec_mark(rec_t *rec, mtr_t *mtr)
Definition: btr0btr.cc:2920
buf_block_t * root_guess
Definition: btr0sea.h:218
UNIV_INTERN void fil_space_release_free_extents(ulint id, ulint n_reserved)
Definition: fil0fil.cc:4116
ib_id_t trx_id_t
Definition: trx0types.h:85
UNIV_INLINE ulint rec_get_converted_size(dict_index_t *index, const dtuple_t *dtuple, ulint n_ext)
ulint n_recs
Definition: btr0cur.h:656
#define ut_error
Definition: ut0dbg.h:115
UNIV_INLINE const upd_field_t * upd_get_field_by_field_no(const upd_t *update, ulint no) __attribute__((nonnull
UNIV_INTERN ulint btr_push_update_extern_fields(dtuple_t *tuple, const upd_t *update, mem_heap_t *heap) __attribute__((nonnull))
Definition: btr0cur.cc:3958
#define BTR_EXTERN_FIELD_REF_SIZE
Definition: btr0types.h:170
ulint btr_cur_n_sea_old
Definition: btr0cur.cc:98
UNIV_INTERN ulint lock_sec_rec_modify_check_and_lock(ulint flags, buf_block_t *block, const rec_t *rec, dict_index_t *index, que_thr_t *thr, mtr_t *mtr)
Definition: lock0lock.cc:5283
UNIV_INLINE ibool rec_get_node_ptr_flag(const rec_t *rec)
#define FIL_NULL
Definition: fil0fil.h:48
UNIV_INLINE ulint dict_index_get_n_unique(const dict_index_t *index)
UNIV_INLINE ulint mtr_set_savepoint(mtr_t *mtr)
dict_index_t * index
Definition: btr0cur.h:688
UNIV_INLINE ulint buf_block_get_space(const buf_block_t *block) __attribute__((pure))
#define BTR_EXTERN_OWNER_FLAG
Definition: btr0cur.h:809
UNIV_INTERN buf_block_t * btr_page_alloc(dict_index_t *index, ulint hint_page_no, byte file_direction, ulint level, mtr_t *mtr)
Definition: btr0btr.cc:911
unsigned n_fields
Definition: dict0mem.h:361
UNIV_INTERN ibool dtuple_check_typed_no_assert(const dtuple_t *tuple)
Definition: data0data.cc:170
UNIV_INTERN void btr_search_update_hash_node_on_insert(btr_cur_t *cursor)
Definition: btr0sea.cc:1546
UNIV_INLINE ulint mach_read_from_4(const byte *b) __attribute__((nonnull
UNIV_INLINE ulint dict_index_get_space_reserve(void)
#define ut_calc_align(n, m)
Definition: ut0ut.h:187
page_zip_t * data
Definition: page0types.h:68
#define BUF_GET_IF_IN_POOL_OR_WATCH
Definition: buf0buf.h:59
UNIV_INTERN void ibuf_update_free_bits_low(const buf_block_t *block, ulint max_ins_size, mtr_t *mtr)
Definition: ibuf0ibuf.cc:1008
UNIV_INTERN ulint btr_cur_update_in_place(ulint flags, btr_cur_t *cursor, const upd_t *update, ulint cmpl_info, que_thr_t *thr, mtr_t *mtr)
Definition: btr0cur.cc:1824
UNIV_INTERN void ut_print_timestamp(FILE *file)
Definition: ut0ut.cc:247
UNIV_INTERN ulint page_zip_empty_size(ulint n_fields, ulint zip_size) __attribute__((const ))
UNIV_INLINE void mach_write_to_2(byte *b, ulint n)
UNIV_INLINE trx_t * thr_get_trx(que_thr_t *thr)
#define FIL_PAGE_DATA
Definition: fil0fil.h:159
UNIV_INLINE buf_block_t * btr_cur_get_block(btr_cur_t *cursor)
byte page_t
Definition: page0types.h:37
UNIV_INLINE buf_pool_t * buf_pool_from_block(const buf_block_t *block)
UNIV_INTERN void buf_pool_watch_unset(ulint space, ulint offset)
Definition: buf0buf.cc:2059
UNIV_INLINE void row_upd_rec_sys_fields(rec_t *rec, page_zip_des_t *page_zip, dict_index_t *index, const ulint *offsets, trx_t *trx, roll_ptr_t roll_ptr)
#define BTR_EXTERN_PAGE_NO
Definition: btr0cur.h:789
UNIV_INTERN void row_upd_index_replace_new_col_vals_index_pos(dtuple_t *entry, dict_index_t *index, const upd_t *update, ibool order_only, mem_heap_t *heap) __attribute__((nonnull))
Definition: row0upd.cc:1054
UNIV_INTERN void lock_rec_reset_and_inherit_gap_locks(const buf_block_t *heir_block, const buf_block_t *block, ulint heir_heap_no, ulint heap_no)
Definition: lock0lock.cc:3040
UNIV_INTERN byte * btr_cur_parse_del_mark_set_clust_rec(byte *ptr, byte *end_ptr, page_t *page, page_zip_des_t *page_zip, dict_index_t *index)
Definition: btr0cur.cc:2559
UNIV_INLINE void page_cur_set_after_last(const buf_block_t *block, page_cur_t *cur)
ulint page_no
Definition: btr0cur.h:657
#define BTR_EXTERN_LEN
Definition: btr0cur.h:793
UNIV_INLINE ibool ibuf_should_try(dict_index_t *index, ulint ignore_sec_unique)
UNIV_INLINE void mtr_start(mtr_t *mtr) __attribute__((nonnull))
ulint page_level
Definition: btr0cur.h:658
UNIV_INLINE void rec_set_deleted_flag_old(rec_t *rec, ulint flag)
UNIV_INLINE ulint rec_get_info_bits(const rec_t *rec, ulint comp)
UNIV_INLINE ulint mach_read_from_1(const byte *b) __attribute__((nonnull
unsigned field_no
Definition: row0upd.h:371
UNIV_INLINE ulint upd_get_n_fields(const upd_t *update)
UNIV_INTERN dtuple_t * row_rec_to_index_entry(ulint type, const rec_t *rec, const dict_index_t *index, ulint *offsets, ulint *n_ext, mem_heap_t *heap)
Definition: row0row.cc:382
UNIV_INLINE ulint rec_offs_size(const ulint *offsets)
UNIV_INLINE ulint mach_read_from_2(const byte *b) __attribute__((nonnull
UNIV_INLINE ulint page_get_free_space_of_empty(ulint comp) __attribute__((const ))
UNIV_INLINE void page_cur_move_to_prev(page_cur_t *cur)
ulint btr_cur_n_non_sea
Definition: btr0cur.cc:87
UNIV_INTERN ulint page_rec_get_n_recs_before(const rec_t *rec)
Definition: page0page.cc:1544
UNIV_INLINE void dfield_set_ext(dfield_t *field)
const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE]
Definition: btr0cur.cc:135
UNIV_INLINE ulint page_rec_get_heap_no(const rec_t *rec)
UNIV_INTERN void dict_index_name_print(FILE *file, trx_t *trx, const dict_index_t *index)
Definition: dict0dict.cc:4775
ulint stat_n_leaf_pages
Definition: dict0mem.h:395
UNIV_INLINE void mtr_release_s_latch_at_savepoint(mtr_t *mtr, ulint savepoint, rw_lock_t *lock)
UNIV_INTERN void mlog_log_string(byte *ptr, ulint len, mtr_t *mtr)
Definition: mtr0log.cc:350
ib_int64_t stat_n_rows
Definition: dict0mem.h:579
#define buf_pool_mutex_exit(b)
Definition: buf0buf.h:1807
UNIV_INLINE ulint dict_index_is_ibuf(const dict_index_t *index) __attribute__((pure))
ulint low_match
Definition: btr0cur.h:729
UNIV_INTERN ulint btr_cur_pessimistic_update(ulint flags, btr_cur_t *cursor, mem_heap_t **heap, big_rec_t **big_rec, const upd_t *update, ulint cmpl_info, que_thr_t *thr, mtr_t *mtr)
Definition: btr0cur.cc:2205
UNIV_INLINE ulint page_zip_get_size(const page_zip_des_t *page_zip) __attribute__((nonnull
UNIV_INLINE ulint btr_node_ptr_get_child_page_no(const rec_t *rec, const ulint *offsets)
UNIV_INTERN byte * row_upd_write_sys_vals_to_log(dict_index_t *index, trx_t *trx, roll_ptr_t roll_ptr, byte *log_ptr, mtr_t *mtr)
Definition: row0upd.cc:568
UNIV_INLINE ibool rec_offs_validate(const rec_t *rec, const dict_index_t *index, const ulint *offsets)
#define buf_page_get(SP, ZS, OF, LA, MTR)
Definition: buf0buf.h:316