mysql チュートリアル

MySQL Internals-Index Merge优化_MySQL

WBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWB

Jun 01, 2016 pm 01:36 PM

歴史

bitsCN.com

MySQL Internals-Index Merge优化

Louis Hust

0 前言

之前搞错了，以为Index Merge是MySQL5.6的新特性，原来不是，发现5.5也有，看了下manual，发现5.0的manual就已经存在了，可以说是一个历史悠久的优化手段了，好吧，不管怎么样，今天就拨开其神秘的面纱，看看其内部到底如何生成这种Index Merge的计划的。这里只详细介绍Intersect操作，对于Union和Sort-Union的具体代码，还没开始研究。

1 Index Merge理论基础

Index Merge——索引归并，即针对一张表，同时使用多个索引进行查询，然后将各个索引查出来的结果进行进一步的操作，可以是求交 ——Intersect，也可以是求和——Union，针对union还有一种补充算法——Sort-Union，很奇怪为什么没有Sort-Intersect，按道理也是可以做的。

什么情况下，同时使用多个索引会有利呢？比如说WHERE条件是C1=10 AND C2 =100，但是只有分别针对C1和C2的索引，而没有(C1,C2)这种索引，两个索引同时使用才有意义,通过两个索引都可以快速定位到一批数据，然后对这一批数据进行进一步的求交或求和操作即可,这样的效率可能比全表扫描或者只使用其中一个索引进行扫描然后再去主索引查询要快。

Intersect和Union都需要使用的索引是ROR的，也就时ROWID ORDERED，即针对不同的索引扫描出来的数据必须是同时按照ROWID排序的，这里的 ROWID其实也就是InnoDB的主键(如果不定义主键，InnoDB会隐式添加ROWID列作为主键)。只有每个索引是ROR的，才能进行归并排序，你懂的。当然你可能会有疑惑，查不记录后内部进行一次sort不一样么，何必必须要ROR呢，不错，所以有了SORT-UNION。SORT-UNION就是每个非ROR的索引排序后再进行Merge。至于为什么没有SORT-INTERSECT，我也很是迷茫。

2 初始化数据

mysql> show create table im/G*************************** 1. row ***************************       Table: imCreate Table: CREATE TABLE `im` (  `c1` int(11) DEFAULT NULL,  `c2` int(11) DEFAULT NULL,  `c3` int(11) DEFAULT NULL,  KEY `c1` (`c1`,`c3`),  KEY `c2` (`c2`,`c1`)) ENGINE=InnoDB DEFAULT CHARSET=latin11 row in set (0.00 sec)mysql> show create procedure fill_im1/G*************************** 1. row ***************************           Procedure: fill_im1            sql_mode: NO_ENGINE_SUBSTITUTION    Create Procedure: CREATE DEFINER=`root`@`127.0.0.1` PROCEDURE `fill_im1`(cnt int)begin declare i int default 0; repeat insert into im values(100, 50, 100); set i=i+1; until i > cnt end repeat; endcharacter_set_client: utf8collation_connection: utf8_general_ci  Database Collation: latin1_swedish_ci1 row in set (0.07 sec)mysql> show create procedure fill_im2/G*************************** 1. row ***************************           Procedure: fill_im2            sql_mode: NO_ENGINE_SUBSTITUTION    Create Procedure: CREATE DEFINER=`root`@`127.0.0.1` PROCEDURE `fill_im2`(cnt int)begin declare i int default 0; repeat insert into im values(100, 100, 50); set i=i+1; until i > cnt end repeat; endcharacter_set_client: utf8collation_connection: utf8_general_ci  Database Collation: latin1_swedish_ci1 row in set (0.00 sec)mysql> call fill_im1(2000)mysql> call fill_im2(2000)mysql> insert into im values(100,50,50);Query OK, 1 row affected (0.00 sec)mysql> insert into im values(100,50,50);Query OK, 1 row affected (0.00 sec)mysql> commit;Query OK, 0 rows affected (0.05 sec)mysql> select * from im where c1=100 and c2 = 50 and c3 = 50/G*************************** 1. row ***************************c1: 100c2: 50c3: 50*************************** 2. row ***************************c1: 100c2: 50c3: 502 rows in set (0.13 sec)

3 执行计划

mysql> explain select * from im where c1=100 and c2 = 50 and c3 = 50/G*************************** 1. row ***************************           id: 1  select_type: SIMPLE        table: im         type: index_mergepossible_keys: c1,c2          key: c1,c2      key_len: 10,10          ref: NULL         rows: 1001        Extra: Using intersect(c1,c2); Using where; Using index1 row in set (0.00 sec)

4 代码分析

从生成数据的方法可以看出来，是专门针对查询的语句进行构造的。无论是根据(c1,c3)的索引查询还是根据(c2,c1)的索引查询，都会查出一般的数据，即效率接近于全表扫描的一半。但是如果利用两个索引同时进行过滤，那么过滤出来的数据就很少了,也就是结果中的两条。

也就是说如果单独查询各个索引，过滤效果不明显，但是如果联合两个索引进行MERGE过滤，那么效果可能很明显，这里所说的过滤，用更专业的词来说是选择因子——selectivity。而计划的选择时代价的计算，便是计算这个选择因子。如果综合多个索引，导致选择因子很小，从而达到索引merge出来的结果集很小的话，那么计划就更倾向于Index Merge，反之则不然。

下面是选择子计算的代码：

static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info,                                    const ROR_SCAN_INFO *scan){  double selectivity_mult= 1.0;  const TABLE * const table= info->param->table;  const KEY_PART_INFO * const key_part= table->key_info[scan->keynr].key_part;  /**    key values tuple, used to store both min_range.key and    max_range.key. This function is only called for equality ranges;    open ranges (e.g. "min_value covered_fields,                                        key_part->fieldnr-1));  key_range min_range;  key_range max_range;  min_range.key= key_val;  min_range.flag= HA_READ_KEY_EXACT;  max_range.key= key_val;  max_range.flag= HA_READ_AFTER_KEY;  ha_rows prev_records= table->file->stats.records;  DBUG_ENTER("ror_scan_selectivity");  for (sel_arg= scan->sel_arg; sel_arg;       sel_arg= sel_arg->next_key_part)  {    DBUG_PRINT("info",("sel_arg step"));    cur_covered= test(bitmap_is_set(&info->covered_fields,                                    key_part[sel_arg->part].fieldnr-1));    if (cur_covered != prev_covered)    {      /* create (part1val, ..., part{n-1}val) tuple. */      bool is_null_range= false;      ha_rows records;      if (!tuple_arg)      {        tuple_arg= scan->sel_arg;        /* Here we use the length of the first key part */        tuple_arg->store_min(key_part[0].store_length, &key_ptr, 0);        is_null_range|= tuple_arg->is_null_interval();        keypart_map= 1;      }      while (tuple_arg->next_key_part != sel_arg)      {        tuple_arg= tuple_arg->next_key_part;        tuple_arg->store_min(key_part[tuple_arg->part].store_length,                             &key_ptr, 0);        is_null_range|= tuple_arg->is_null_interval();        keypart_map= (keypart_map param->use_index_statistics ||        // (1)          is_null_range ||                             // (2)          !(records= table->key_info[scan->keynr].                     rec_per_key[tuple_arg->part]))    // (3)      {        DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););        DBUG_ASSERT(min_range.length > 0);        records= (table->file->                  records_in_range(scan->keynr, &min_range, &max_range));      }      if (cur_covered)      {        /* uncovered -> covered */        double tmp= rows2double(records)/rows2double(prev_records);        DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));        selectivity_mult *= tmp;        prev_records= HA_POS_ERROR;      }      else      {        /* covered -> uncovered */        prev_records= records;      }    }    prev_covered= cur_covered;  }  if (!prev_covered)  {    double tmp= rows2double(table->quick_rows[scan->keynr]) /                rows2double(prev_records);    DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));    selectivity_mult *= tmp;  }  // Todo: This assert fires in PB sysqa RQG tests.  // DBUG_ASSERT(selectivity_mult  <p>刚看到这段代码时，确实有点犯懵，代码的注释给了很大的帮助：</p><pre class="brush:php;toolbar:false">/*  Get selectivity of adding a ROR scan to the ROR-intersection.  SYNOPSIS    ror_scan_selectivity()      info  ROR-interection, an intersection of ROR index scans       scan  ROR scan that may or may not improve the selectivity            of 'info'        NOTES    Suppose we have conditions on several keys    cond=k_11=c_11 AND k_12=c_12 AND ...  // key_parts of first key in 'info'         k_21=c_21 AND k_22=c_22 AND ...  // key_parts of second key in 'info'          ...         k_n1=c_n1 AND k_n3=c_n3 AND ...  (1) //key_parts of 'scan'    where k_ij may be the same as any k_pq (i.e. keys may have common parts).    Note that for ROR retrieval, only equality conditions are usable so there    are no open ranges (e.g., k_ij > c_ij) in 'scan' or 'info'    A full row is retrieved if entire condition holds.    The recursive procedure for finding P(cond) is as follows:    First step:    Pick 1st part of 1st key and break conjunction (1) into two parts:      cond= (k_11=c_11 AND R)    Here R may still contain condition(s) equivalent to k_11=c_11.    Nevertheless, the following holds:      P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).    Mark k_11 as fixed field (and satisfied condition) F, save P(F),    save R to be cond and proceed to recursion step.    Recursion step:    We have a set of fixed fields/satisfied conditions) F, probability P(F),    and remaining conjunction R    Pick next key part on current key and its condition "k_ij=c_ij".    We will add "k_ij=c_ij" into F and update P(F).    Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then     P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)    (where '|' mean conditional probability, not "or")    Consider the first multiplier in (2). One of the following holds:    a) F contains condition on field used in t (i.e. t AND F = F).      Then P(t|F) = 1    b) F doesn't contain condition on field used in t. Then F and t are     considered independent.     P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =          = P(t|fields_before_t_in_key).     P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /                                   #records(fields_before_t_in_key, t)    The second multiplier is calculated by applying this step recursively.  IMPLEMENTATION    This function calculates the result of application of the "recursion step"    described above for all fixed key members of a single key, accumulating set    of covered fields, selectivity, etc.    The calculation is conducted as follows:    Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate     n_{k1}      n_{k2}    --------- * ---------  * .... (3)     n_{k1-1}    n_{k2-1}    where k1,k2,... are key parts which fields were not yet marked as fixed    ( this is result of application of option b) of the recursion step for      parts of a single key).    Since it is reasonable to expect that most of the fields are not marked    as fixed, we calculate (3) as                                  n_{i1}      n_{i2}    (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )                                  n_{i1-1}    n_{i2-1}    where i1,i2, .. are key parts that were already marked as fixed.    In order to minimize number of expensive records_in_range calls we    group and reduce adjacent fractions. Note that on the optimizer's    request, index statistics may be used instead of records_in_range    @see RANGE_OPT_PARAM::use_index_statistics.  RETURN    Selectivity of given ROR scan, a number between 0 and 1. 1 means that    adding 'scan' to the intersection does not improve the selectivity.*/

注释想说明的就是选择因子的概率如何进行计算，其实就是不同INDEX之间差异性的索引列会引起选择因子不断变小，即 Index之间差异性越大，过滤的记录就越多，选择出来的数据集就会越少。INDEX的差异性就是INdex之间索引列列是否重复出现在不同索引之间，两个INDEX约相似，那么MERGE的结果集越大。具体的实现大家自己看看吧，明白了原理，实现都是浮云了。

BTW, 5.6的Optimizer trace十分好用，对于想要跟踪Optimizer内部的同学来说，可以先把详细的计划生成流程通过Optimizer trace 打印出来，对照优化流程，就能更好的定位到代码。

References

[1]: index-merge-optimization

File translated fromT_EXby T_TH,version 4.03.
On 28 Jan 2013, 22:35.

bitsCN.com

声明

この記事の内容はネチズンが自主的に寄稿したものであり、著作権は原著者に帰属します。このサイトは、それに相当する法的責任を負いません。盗作または侵害の疑いのあるコンテンツを見つけた場合は、admin@php.cn までご連絡ください。

MySQLと他のSQL方言の構文の違いは何ですか？Apr 27, 2025 am 12:26 AM

MySQLdiffersfromotherSQLdialectsinsyntaxforLIMIT,auto-increment,stringcomparison,subqueries,andperformanceanalysis.1)MySQLusesLIMIT,whileSQLServerusesTOPandOracleusesROWNUM.2)MySQL'sAUTO_INCREMENTcontrastswithPostgreSQL'sSERIALandOracle'ssequenceandt

MySQLパーティションは何ですか？Apr 27, 2025 am 12:23 AM

MySQLパーティション化により、パフォーマンスが向上し、メンテナンスが簡素化されます。 1）大きなテーブルを特定の基準（日付範囲など）、2）物理的に独立したファイルに物理的に分割する、3）MySQLはクエリするときに関連するパーティションに焦点を合わせることができます。

MySQLで特権をどのように許可して取り消しますか？Apr 27, 2025 am 12:21 AM

mysqlで許可を許可および取り消す方法は？ 1。grantallprivilegesondatabase_name.to'username'@'host 'などの許可を付与するために付与ステートメントを使用してください。 2。Revokeallprivilegesondatabase_name.from'username'@'host 'など、Revoke Statementを使用して、許可のタイムリーな通信を確保します。

InnoDBとMyisamストレージエンジンの違いを説明してください。Apr 27, 2025 am 12:20 AM

INNODBは、トランザクションサポートと高い並行性を必要とするアプリケーションに適していますが、Myisamはより多くの読み取りとより少ない書き込みを必要とするアプリケーションに適しています。 1.INNODBは、eコマースおよび銀行システムに適したトランザクションおよび銀行レベルのロックをサポートしています。 2. Myisamは、ブログやコンテンツ管理システムに適した、迅速な読み取りとインデックス作成を提供します。

MySQLの結合のさまざまなタイプは何ですか？Apr 27, 2025 am 12:13 AM

MySQLには4つのメイン結合タイプがあります：innerjoin、leftjoin、rightjoin、fullouterjoin。 1.InnerJoinは、結合条件を満たす2つのテーブルのすべての行を返します。 2.右のテーブルに一致する行がない場合でも、Leftjoinは左のテーブルのすべての行を返します。 3。右joinはleftjoinに反しており、右のテーブルのすべての行を返します。 4.fullouterjoinは、結合条件を満たしている、または満たさない2つのテーブルのすべての行を返します。

MySQLで利用可能なさまざまなストレージエンジンは何ですか？Apr 26, 2025 am 12:27 AM

mysqloffersvariousstorageEngines、それぞれのfordifferentusecases：1）Innodbisidealforapplicationsingingidcomplianceanceandhighconcurrency、support transactions andforeignkeys.2）myisamisbestforread-havyworkloads、transactionsupptort.3）

MySQLの一般的なセキュリティの脆弱性は何ですか？Apr 26, 2025 am 12:27 AM

MySQLの一般的なセキュリティの脆弱性には、SQLインジェクション、弱いパスワード、不適切な許可構成、および非合事ソフトウェアが含まれます。 1。SQL注射は、前処理ステートメントを使用することで防ぐことができます。 2。強力なパスワード戦略を強制的に使用することにより、弱いパスワードを回避できます。 3.不適切な許可構成は、ユーザー許可の定期的なレビューと調整を通じて解決できます。 4.未使用のソフトウェアは、MySQLバージョンを定期的にチェックして更新することでパッチを適用できます。

MySQLでスロークエリをどのように識別できますか？Apr 26, 2025 am 12:15 AM

MySQLの遅いクエリを識別することは、遅いクエリログを有効にし、しきい値を設定することで実現できます。 1.スロークエリログを有効にし、しきい値を設定します。 2.スロークエリログファイルを表示および分析し、詳細な分析のためにMySQLDumpSlowやPT-Query-Digestなどのツールを使用します。 3.インデックスの最適化、クエリの書き換え、およびselect*の使用を回避することで、遅いクエリの最適化を実現できます。

See all articles

ホットAIツール

Undresser.AI Undress

リアルなヌード写真を作成する AI 搭載アプリ

AI Clothes Remover

写真から衣服を削除するオンライン AI ツール。

Undress AI Tool

脱衣画像を無料で

Clothoff.io

AI衣類リムーバー

Video Face Swap

完全無料の AI 顔交換ツールを使用して、あらゆるビデオの顔を簡単に交換できます。

ホットツール

メモ帳++7.3.1

使いやすく無料のコードエディター

MantisBT

Mantis は、製品の欠陥追跡を支援するために設計された、導入が簡単な Web ベースの欠陥追跡ツールです。 PHP、MySQL、Web サーバーが必要です。デモおよびホスティングサービスをチェックしてください。

DVWA

Damn Vulnerable Web App (DVWA) は、非常に脆弱な PHP/MySQL Web アプリケーションです。その主な目的は、セキュリティ専門家が法的環境でスキルとツールをテストするのに役立ち、Web 開発者が Web アプリケーションを保護するプロセスをより深く理解できるようにし、教師/生徒が教室環境で Web アプリケーションを教え/学習できるようにすることです。安全。 DVWA の目標は、シンプルでわかりやすいインターフェイスを通じて、さまざまな難易度で最も一般的な Web 脆弱性のいくつかを実践することです。このソフトウェアは、

mPDF

mPDF は、UTF-8 でエンコードされた HTML から PDF ファイルを生成できる PHP ライブラリです。オリジナルの作者である Ian Back は、Web サイトから「オンザフライ」で PDF ファイルを出力し、さまざまな言語を処理するために mPDF を作成しました。 HTML2FPDF などのオリジナルのスクリプトよりも遅く、Unicode フォントを使用すると生成されるファイルが大きくなりますが、CSS スタイルなどをサポートし、多くの機能強化が施されています。 RTL (アラビア語とヘブライ語) や CJK (中国語、日本語、韓国語) を含むほぼすべての言語をサポートします。ネストされたブロックレベル要素 (P、DIV など) をサポートします。