Quantcast
Channel: Planet MySQL
Viewing all articles
Browse latest Browse all 18783

MySQL Workbench Plugin: mforms example and slow query log statistics

$
0
0

As an update to my prior post, I’ve added a form to the workbench plugin.  Now, the user can select a slow query log file and generate statistics from it.  The plugin provides information to answer the following questions:

Figure 1. Sample plugin form

  • What type of queries run most often?
  • What type of queries are the slowest?
  • Which queries access the most rows?
  • Which queries send the most data?

The plugin scans the slow query log, aggregates similar queries, and provides summary statistics for each group.  It’s very similar to the mysqldumpslow perl utility, which is included in the mysql bin folder.  However, as a plugin, it’s easier to use on a Windows server as it doesn’t require you to install perl.

If you’re working to create a MySQL Workbench plugin, this mform code will help provide an example.

This form prompts the user for the output type of the slow query log (file or table), for the location of the slow query log file, and for the sort order: count, query_time, rows_examined, rows_sent. See Figure 1. Sample plugin form.

The plugin generates output similar to the following for queries in the slow query log:

Statement: DELETE FROM cache_form WHERE expire != DIGIT AND expire < DIGIT
count: 21
average time: 0:00:00.003677
slowest sql: DELETE FROM cache_form WHERE expire != 0 AND expire < 1281722403
slowest time: 0:00:00.075859
average rows sent: 0
average rows examined: 0
average lock time: 0:00:00.003625

Statement: SELECT data, created, headers, expire, serialized FROM cache_menu WHERE cid = ‘STRING’
count: 116
average time: 0:00:00.001269
slowest sql: SELECT data, created, headers, expire, serialized FROM cache_menu WHERE cid = ‘links:secondary-links:tree-data:5d6d3aaaaef5fba302ce62698fa37bbe’
slowest time: 0:00:00.024912<
average rows sent: 0
average rows examined: 0
average lock time: 0:00:00.000071

. . .
. . .
. . .


To add the plugin to your workbench install, create a file named slowquerystats_grt.py, click on the “show source” magnifying glass icon below, and copy the code block into the file.  From the Scripting menu in Workbench, call install Plugin/Module File and select the file to be installed.  This plugin requires Workbench version 2.7 or newer.  Once you’ve installed the plugin, you can run it by opening the SQL Editor and selecting Slow Log Stats from the Plugins|Utilities menu.

import os
import re
from wb import *
import grt
from operator import itemgetter
from mforms import newBox, newButton, newPanel, newTextBox, newRadioButton, newLabel, newSelector, newTextEntry, newCheckBox, SelectorCombobox
from mforms import Utilities, FileChooser
import mforms
import datetime

ModuleInfo = DefineModule(name= "slowlogstats", author= "lstigile", version="0.5")
@ModuleInfo.plugin("slowlogstats", caption= "Slow Log Stats", input= [wbinputs.currentQueryBuffer()], pluginMenu= "SQL/Utilities")
@ModuleInfo.export(grt.INT, grt.classes.db_query_QueryBuffer)
#---------------------------------------------
#  Entry point, creates form instance, sets choice for sourcetype (table or file), file location
#  and sort order of results
#---------------------------------------------
def optionPrompt(qbuffer):

  lines = []

  sort = {"query_time":1, "count":0, "rows_sent":3,"rows_examined":4, "lock_time":2}

  f = LogParamFrm()

  editor= qbuffer.owner
  editor.addToOutput("\n", 1)

  if f.result == mforms.ResultOk:
    if f.rbfile.get_active():
      resultsets = LogFileResultset(f.logfile_path)
      ok= resultsets.open()
      tuples = parseSlowQueryTable(resultsets)
    else:
      editor= qbuffer.owner
      sql = "SELECT * FROM mysql.slow_log order by start_time desc limit 10000;"
      resultsets = editor.executeScript(sql)
      tuples = parseSlowQueryTable(resultsets[0])

    if len(tuples) < 12:
      editor.addToOutput( tuples[0], 1)
      return 0

    tuples = sorted(tuples, key=itemgetter(sort[f.sort_method]), reverse=True)
    lines = printFormat(tuples)
    editor.addToOutput('\n'.join(lines), 0)

    if f.rbfile.get_active(): resultsets.close()

  return 0

#-----------------------------------
# format info to print to screen
#------------------------------------
def printFormat(tuples):
  lines=[]
  for item in tuples:
    (count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam, alock, statement ) = item
    lines.append('Statement: ' + statement + '\n' + 'count: ' + str(count) + '\n' +
                        'average time:' +  str(atime) + '\n' +
                        'slowest sql: ' + maxstmt + '\n' +
                        'slowest time: ' + str(maxtime) + '\n'
                        'average rows sent: ' + str(asent) + '\n'
                        'average rows examined: ' + str(aexam) + '\n'
                        'average lock time: ' + str(alock) + '\n\n'
                     )
  return lines

#----------------------------
def parseSlowQueryTable(result):
  row_dict = {}
  lines= []

  ok = result.goToFirstRow()

  if ok == 0:
    lines.append('invalid file format, User@Host not found in first 20 entries')
    return lines

  while ok:
    sql = result.stringFieldValueByName('sql_text')
    time = result.stringFieldValueByName('query_time')

    if time[9:]!='':
      time = datetime.timedelta(minutes=int(time[0:2]),
        seconds=int(time[3:5]), microseconds=int(time[6:]))
    else:
      time = datetime.timedelta(hours=int(time[0:2]), minutes=int(time[3:5]),
      seconds=int(time[6:8]))
    timelock = result.stringFieldValueByName('lock_time')
    if timelock[9:]!='':
      timelock = datetime.timedelta(minutes=int(timelock[0:2]),
        seconds=int(timelock[3:5]), microseconds=int(timelock[6:]))
    else:
      timelock = datetime.timedelta(hours=int(timelock[0:2]), minutes=int(timelock[3:5]),
        seconds=int(timelock[6:8]))
    sent = int(result.stringFieldValueByName('rows_sent'))

    examined = int(result.stringFieldValueByName('rows_examined'))

      #Use python regular expression engine to replace variables with STRING or
      #DIGIT  For readability, notes on expression are in comments at bottom of this
      #script.  Modify this expression if SQL statements are not correctly grouped.
    sql = re.sub(r'(?<=[\s\(,<>=][\'\"]).*?(?=[\'\"]\)$|[\'\"]$|[\'\"][\,\)]\s|[\'\"]\s[FROM|' +
                         'ORDER|GROUP|AND|WHERE])', 'STRING', sql)

      #  replace all digits with 'DIGIT'
    sql = re.sub(r'(?<=[-+=\s\(,<>])\d.*?(?=[\s\),\)]|$)','DIGIT', sql)

      #determine if SQL is in row_count dict, if not add statement and count, else increment
      #existing entries
    if not sql in row_dict:
      avgtime = time
      avgrowssent = sent
      avgrowsexamined = examined
      avglocktime = timelock
      totaltime = time
      maxsql = result.stringFieldValueByName('sql_text')
      row = [1,totaltime, timelock, sent, examined, time, maxsql, avgtime,
                   avgrowssent, avgrowsexamined, avglocktime, sql]
      row_dict[sql]=row
    else:
      (count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam,
           alock, sql) = tuple(row_dict[sql])
      count = count + 1
      ttime = ttime + time

      tlock = tlock + timelock
      tsent = tsent + sent
      texam = texam + examined

      if maxtime < time:
        maxtime = time
        maxstmt = result.stringFieldValueByName('sql_text')

      atime = ttime/count
      asent = tsent/count
      aexam = texam/count
      alock = tlock/count

      row = [count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam, alock, sql]
      row_dict[sql] = row

    ok = result.nextRow()

  for row in row_dict.values():
    lines.append(tuple(row))

  return tuple(lines)

#------function to format time coming from file into 00:00:00000"
def filetimeformat(s):
  parts = s.split('.')
  sformat = ('%(minutes)02d:%(seconds)02d:%(microseconds)05d'
                  %{'minutes':int(parts[0])//60,'seconds':int(parts[0])%60,
                  'microseconds':int(parts[1])})
  return sformat

########################################################################
##
##  LogFileResultset class
##
##
########################################################################

class LogFileResultset:
  def __init__(self, location ):
    self.location = location
    self.__user = ''

  def open(self):
    try:
      self.file = open(self.location, 'r')
      return 1
    except IOError, e:
      return "Error in file IO: ", e

  def goToFirstRow(self):
    ok = self.nextRow()
    return ok

  def __matchTest(self, pattern, text, groupno):
    match = re.search(pattern, text)
    if match:
      return match.group(groupno)
    else:
      return ''

  def nextRow(self):
    x = 0
    sqlline = []

    self.row = {}

    # walk through 20 lines looking for first instance of User@Host
    # if no entry, return 0
    while self.__user == '':
      x = x+1
      self.__user = self.__matchTest('#\sUser@Host:\s(.*)\n', self.file.readline(),1)
      if x == 20:
        return 0

    line = self.file.readline()

    query_time = self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' +
                                                      '\s\sRows_examined:\s(.*)', line,1)

    if query_time != '':
      query_time = filetimeformat(query_time)
    lock_time= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' +
                                               '\s\sRows_examined:\s(.*)', line,2)
    if lock_time != '':
      lock_time = filetimeformat(lock_time)
    rows_sent= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' +
                                                  '\s\sRows_examined:\s(.*)', line,3)
    rows_examined= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' +
                                                         '\s\sRows_examined:\s(.*)', line,4)

    # if use databasename in log, the timestamp skips a line
    timestamp = self.__matchTest('SET\stimestamp=(\d*);', self.file.readline(),1)
    if timestamp == '':
      timestamp = self.__matchTest('SET\stimestamp=(\d*);', self.file.readline(),1)

    ##this pseudo do..until loop is designed to find the first User@Host entry, which marks a new record.
    ##When we hit User@Host, we save the record in a class variable since it belongs to the next
   ##record.
    while True:
      line = self.file.readline()
      sqlmatch = re.search(r'#\sTime:\s(.*)\n', line)
      if sqlmatch:  #skip Time: entry for SHOW STATUS type statements
        line = self.file.readline()

      sqlmatch = re.search(r'#\sUser@Host:\s(.*)\n', line)
      if sqlmatch:
        self.__user = sqlmatch.group(1)
        break
      if line == '':
        break
      sqlline.append(line)
    sql = ' '.join(sqlline)

    self.row = {'user': self.__user, 'query_time': query_time, 'lock_time':lock_time, 'rows_sent':rows_sent,
                  'rows_examined':rows_examined, 'timestamp':timestamp, 'sql_text':sql.rstrip(';\n')}

      #if any value is missing, empty the dict
    for r in self.row:
      if self.row[r] == '':
        empty = True
      else:  empty = False

    if empty:  return 0
    # if empty:  self.row.clear()

    return 1

  def close(self):
    self.file.close()

  def stringFieldValueByName(self, name):
    return self.row[name]

########################################################################
##
##  Form class
##
##
########################################################################

class LogParamFrm(mforms.Form):

  def __init__(self):
    mforms.Form.__init__(self, None)
    self.logfile_path = ''
    self.sort_method = 'query_time'
    self.result = None

    self.set_title("Log Parameters")

    #bx -- first box as container for others
    bx = mforms.newBox(False)
    bx.set_padding(12)
    bx.set_spacing(12)
    self.set_content(bx)

    #bx components
    lbtitle = mforms.newLabel()
    lbtitle.set_text("Slow Query Log Analyze Options")
    self.rbfile = newRadioButton(1)
    self.rbfile.add_clicked_callback(lambda: self.set_box())
    self.rbfile.set_text("Analyze log file on disk (output=file)")
    self.rbfile.set_active(True)
    self.lbdetail = newLabel("Select an option to analyze a log file for the slow_log table:")
    self.lbdetail.set_enabled(True)
#    self.lbdetail.set_style(mforms.SmallStyle)
    self.rbtable = newRadioButton(1)
    self.rbtable.add_clicked_callback(lambda: self.set_box())
    self.rbtable.set_text("Analyze slow_log table (output=table)")
    bx.add(self.lbdetail, False, True)
    bx.add(self.rbfile, False, True)
    bx.add(self.rbtable, False, True)
    bx.add(lbtitle, False, True)

    #bxfile_path -- list path selection and radio buttons
    bxfile_path = newBox(True)
    bxfile_path.set_spacing(4)
    self.tbfile = newTextEntry()
    self.lbfile = newLabel("   File Path")
    self.btfile = newButton()
    self.btfile.set_text("...")
    self.btfile.enable_internal_padding(False)
    self.btfile.set_enabled(True)
    self.btfile.add_clicked_callback(self.open_file_chooser)

    bxfile_path.add(self.lbfile, False, False)
    bxfile_path.add(self.tbfile, True, True)
    bxfile_path.add(self.btfile, False, False)
    bx.add(bxfile_path, False, True)

    #bxOptions -- sorting and other obtions
    bxoptions = mforms.newBox(True)
    bxoptions.set_spacing(4)
    bx.add(bxoptions, False, True)
    lbsort = newLabel("  Sort")
    self.cbsort = newSelector()
    self.cbsort.set_size(150, -1)
    sort_items = ("query_time", "count", "rows_sent","rows_examined", "lock_time")
    for item in sort_items:
      self.cbsort.add_item(item)
    self.cbsort.add_changed_callback(lambda: self.get_sort_method())
    bxoptions.add(lbsort, False, False)
    bxoptions.add(self.cbsort, False, False)

    #bxOK --- last items on window
    bxOK = mforms.newBox(True)
    bx.add(bxOK, False, True)
    bxOK.set_spacing(4)
    self.ok = mforms.newButton()
    self.ok.set_text("OK")
    self.ok.set_enabled(False)
    bxOK.add_end(self.ok, False, False)
    cancel = mforms.newButton()
    cancel.set_text("Cancel")
    bxOK.add_end(cancel, False, True)

    self.set_size(400, 300)
    self.center()

    self.result = self.run_modal(self.ok, cancel)

#-------open_file_chooser method---------
  def open_file_chooser(self):
    filechooser = FileChooser(mforms.OpenFile)
    filechooser.set_directory(self.logfile_path)
    if filechooser.run_modal():
      self.logfile_path = filechooser.get_path()
#      self.tbfile.set_value(self.logfile_path)
      self.tbfile.set_value(self.logfile_path)
      if self.logfile_path != '':
        self.ok.set_enabled(True)

#-------file path box visibility toggle method------
  def set_box(self):
    if self.rbfile.get_active():
      self.tbfile.show(True)
      self.lbfile.show(True)
      self.btfile.show(True)
      self.ok.set_enabled(True)
      self.ok.set_enabled(False)
    else:
      self.ok.set_enabled(True)
      self.tbfile.show(False)
      self.lbfile.show(False)
      self.btfile.show(False)

#---------------------------------------------------------------------------
  def get_sort_method(self):
    self.sort_method = self.cbsort.get_string_value()


PlanetMySQL Voting: Vote UP / Vote DOWN

Viewing all articles
Browse latest Browse all 18783

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>