As an update to my prior post, I’ve added a form to the workbench plugin. Now, the user can select a slow query log file and generate statistics from it. The plugin provides information to answer the following questions:
- What type of queries run most often?
- What type of queries are the slowest?
- Which queries access the most rows?
- Which queries send the most data?
The plugin scans the slow query log, aggregates similar queries, and provides summary statistics for each group. It’s very similar to the mysqldumpslow perl utility, which is included in the mysql bin folder. However, as a plugin, it’s easier to use on a Windows server as it doesn’t require you to install perl.
If you’re working to create a MySQL Workbench plugin, this mform code will help provide an example.
This form prompts the user for the output type of the slow query log (file or table), for the location of the slow query log file, and for the sort order: count, query_time, rows_examined, rows_sent. See Figure 1. Sample plugin form.
The plugin generates output similar to the following for queries in the slow query log:
Statement: SELECT data, created, headers, expire, serialized FROM cache_menu WHERE cid = ‘STRING’
count: 116
average time: 0:00:00.001269
slowest sql: SELECT data, created, headers, expire, serialized FROM cache_menu WHERE cid = ‘links:secondary-links:tree-data:5d6d3aaaaef5fba302ce62698fa37bbe’
slowest time: 0:00:00.024912<
average rows sent: 0
average rows examined: 0
average lock time: 0:00:00.000071
. . .
. . .
. . .
To add the plugin to your workbench install, create a file named slowquerystats_grt.py, click on the “show source” magnifying glass icon below, and copy the code block into the file. From the Scripting menu in Workbench, call install Plugin/Module File and select the file to be installed. This plugin requires Workbench version 2.7 or newer. Once you’ve installed the plugin, you can run it by opening the SQL Editor and selecting Slow Log Stats from the Plugins|Utilities menu.
import os import re from wb import * import grt from operator import itemgetter from mforms import newBox, newButton, newPanel, newTextBox, newRadioButton, newLabel, newSelector, newTextEntry, newCheckBox, SelectorCombobox from mforms import Utilities, FileChooser import mforms import datetime ModuleInfo = DefineModule(name= "slowlogstats", author= "lstigile", version="0.5") @ModuleInfo.plugin("slowlogstats", caption= "Slow Log Stats", input= [wbinputs.currentQueryBuffer()], pluginMenu= "SQL/Utilities") @ModuleInfo.export(grt.INT, grt.classes.db_query_QueryBuffer) #--------------------------------------------- # Entry point, creates form instance, sets choice for sourcetype (table or file), file location # and sort order of results #--------------------------------------------- def optionPrompt(qbuffer): lines = [] sort = {"query_time":1, "count":0, "rows_sent":3,"rows_examined":4, "lock_time":2} f = LogParamFrm() editor= qbuffer.owner editor.addToOutput("\n", 1) if f.result == mforms.ResultOk: if f.rbfile.get_active(): resultsets = LogFileResultset(f.logfile_path) ok= resultsets.open() tuples = parseSlowQueryTable(resultsets) else: editor= qbuffer.owner sql = "SELECT * FROM mysql.slow_log order by start_time desc limit 10000;" resultsets = editor.executeScript(sql) tuples = parseSlowQueryTable(resultsets[0]) if len(tuples) < 12: editor.addToOutput( tuples[0], 1) return 0 tuples = sorted(tuples, key=itemgetter(sort[f.sort_method]), reverse=True) lines = printFormat(tuples) editor.addToOutput('\n'.join(lines), 0) if f.rbfile.get_active(): resultsets.close() return 0 #----------------------------------- # format info to print to screen #------------------------------------ def printFormat(tuples): lines=[] for item in tuples: (count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam, alock, statement ) = item lines.append('Statement: ' + statement + '\n' + 'count: ' + str(count) + '\n' + 'average time:' + str(atime) + '\n' + 'slowest sql: ' + maxstmt + '\n' + 'slowest time: ' + str(maxtime) + '\n' 'average rows sent: ' + str(asent) + '\n' 'average rows examined: ' + str(aexam) + '\n' 'average lock time: ' + str(alock) + '\n\n' ) return lines #---------------------------- def parseSlowQueryTable(result): row_dict = {} lines= [] ok = result.goToFirstRow() if ok == 0: lines.append('invalid file format, User@Host not found in first 20 entries') return lines while ok: sql = result.stringFieldValueByName('sql_text') time = result.stringFieldValueByName('query_time') if time[9:]!='': time = datetime.timedelta(minutes=int(time[0:2]), seconds=int(time[3:5]), microseconds=int(time[6:])) else: time = datetime.timedelta(hours=int(time[0:2]), minutes=int(time[3:5]), seconds=int(time[6:8])) timelock = result.stringFieldValueByName('lock_time') if timelock[9:]!='': timelock = datetime.timedelta(minutes=int(timelock[0:2]), seconds=int(timelock[3:5]), microseconds=int(timelock[6:])) else: timelock = datetime.timedelta(hours=int(timelock[0:2]), minutes=int(timelock[3:5]), seconds=int(timelock[6:8])) sent = int(result.stringFieldValueByName('rows_sent')) examined = int(result.stringFieldValueByName('rows_examined')) #Use python regular expression engine to replace variables with STRING or #DIGIT For readability, notes on expression are in comments at bottom of this #script. Modify this expression if SQL statements are not correctly grouped. sql = re.sub(r'(?<=[\s\(,<>=][\'\"]).*?(?=[\'\"]\)$|[\'\"]$|[\'\"][\,\)]\s|[\'\"]\s[FROM|' + 'ORDER|GROUP|AND|WHERE])', 'STRING', sql) # replace all digits with 'DIGIT' sql = re.sub(r'(?<=[-+=\s\(,<>])\d.*?(?=[\s\),\)]|$)','DIGIT', sql) #determine if SQL is in row_count dict, if not add statement and count, else increment #existing entries if not sql in row_dict: avgtime = time avgrowssent = sent avgrowsexamined = examined avglocktime = timelock totaltime = time maxsql = result.stringFieldValueByName('sql_text') row = [1,totaltime, timelock, sent, examined, time, maxsql, avgtime, avgrowssent, avgrowsexamined, avglocktime, sql] row_dict[sql]=row else: (count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam, alock, sql) = tuple(row_dict[sql]) count = count + 1 ttime = ttime + time tlock = tlock + timelock tsent = tsent + sent texam = texam + examined if maxtime < time: maxtime = time maxstmt = result.stringFieldValueByName('sql_text') atime = ttime/count asent = tsent/count aexam = texam/count alock = tlock/count row = [count, ttime, tlock, tsent, texam, maxtime, maxstmt, atime, asent, aexam, alock, sql] row_dict[sql] = row ok = result.nextRow() for row in row_dict.values(): lines.append(tuple(row)) return tuple(lines) #------function to format time coming from file into 00:00:00000" def filetimeformat(s): parts = s.split('.') sformat = ('%(minutes)02d:%(seconds)02d:%(microseconds)05d' %{'minutes':int(parts[0])//60,'seconds':int(parts[0])%60, 'microseconds':int(parts[1])}) return sformat ######################################################################## ## ## LogFileResultset class ## ## ######################################################################## class LogFileResultset: def __init__(self, location ): self.location = location self.__user = '' def open(self): try: self.file = open(self.location, 'r') return 1 except IOError, e: return "Error in file IO: ", e def goToFirstRow(self): ok = self.nextRow() return ok def __matchTest(self, pattern, text, groupno): match = re.search(pattern, text) if match: return match.group(groupno) else: return '' def nextRow(self): x = 0 sqlline = [] self.row = {} # walk through 20 lines looking for first instance of User@Host # if no entry, return 0 while self.__user == '': x = x+1 self.__user = self.__matchTest('#\sUser@Host:\s(.*)\n', self.file.readline(),1) if x == 20: return 0 line = self.file.readline() query_time = self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' + '\s\sRows_examined:\s(.*)', line,1) if query_time != '': query_time = filetimeformat(query_time) lock_time= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' + '\s\sRows_examined:\s(.*)', line,2) if lock_time != '': lock_time = filetimeformat(lock_time) rows_sent= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' + '\s\sRows_examined:\s(.*)', line,3) rows_examined= self.__matchTest('Query_time:\s(.*)\s\sLock_time:\s(.*)\sRows_sent:\s(.*)' + '\s\sRows_examined:\s(.*)', line,4) # if use databasename in log, the timestamp skips a line timestamp = self.__matchTest('SET\stimestamp=(\d*);', self.file.readline(),1) if timestamp == '': timestamp = self.__matchTest('SET\stimestamp=(\d*);', self.file.readline(),1) ##this pseudo do..until loop is designed to find the first User@Host entry, which marks a new record. ##When we hit User@Host, we save the record in a class variable since it belongs to the next ##record. while True: line = self.file.readline() sqlmatch = re.search(r'#\sTime:\s(.*)\n', line) if sqlmatch: #skip Time: entry for SHOW STATUS type statements line = self.file.readline() sqlmatch = re.search(r'#\sUser@Host:\s(.*)\n', line) if sqlmatch: self.__user = sqlmatch.group(1) break if line == '': break sqlline.append(line) sql = ' '.join(sqlline) self.row = {'user': self.__user, 'query_time': query_time, 'lock_time':lock_time, 'rows_sent':rows_sent, 'rows_examined':rows_examined, 'timestamp':timestamp, 'sql_text':sql.rstrip(';\n')} #if any value is missing, empty the dict for r in self.row: if self.row[r] == '': empty = True else: empty = False if empty: return 0 # if empty: self.row.clear() return 1 def close(self): self.file.close() def stringFieldValueByName(self, name): return self.row[name] ######################################################################## ## ## Form class ## ## ######################################################################## class LogParamFrm(mforms.Form): def __init__(self): mforms.Form.__init__(self, None) self.logfile_path = '' self.sort_method = 'query_time' self.result = None self.set_title("Log Parameters") #bx -- first box as container for others bx = mforms.newBox(False) bx.set_padding(12) bx.set_spacing(12) self.set_content(bx) #bx components lbtitle = mforms.newLabel() lbtitle.set_text("Slow Query Log Analyze Options") self.rbfile = newRadioButton(1) self.rbfile.add_clicked_callback(lambda: self.set_box()) self.rbfile.set_text("Analyze log file on disk (output=file)") self.rbfile.set_active(True) self.lbdetail = newLabel("Select an option to analyze a log file for the slow_log table:") self.lbdetail.set_enabled(True) # self.lbdetail.set_style(mforms.SmallStyle) self.rbtable = newRadioButton(1) self.rbtable.add_clicked_callback(lambda: self.set_box()) self.rbtable.set_text("Analyze slow_log table (output=table)") bx.add(self.lbdetail, False, True) bx.add(self.rbfile, False, True) bx.add(self.rbtable, False, True) bx.add(lbtitle, False, True) #bxfile_path -- list path selection and radio buttons bxfile_path = newBox(True) bxfile_path.set_spacing(4) self.tbfile = newTextEntry() self.lbfile = newLabel(" File Path") self.btfile = newButton() self.btfile.set_text("...") self.btfile.enable_internal_padding(False) self.btfile.set_enabled(True) self.btfile.add_clicked_callback(self.open_file_chooser) bxfile_path.add(self.lbfile, False, False) bxfile_path.add(self.tbfile, True, True) bxfile_path.add(self.btfile, False, False) bx.add(bxfile_path, False, True) #bxOptions -- sorting and other obtions bxoptions = mforms.newBox(True) bxoptions.set_spacing(4) bx.add(bxoptions, False, True) lbsort = newLabel(" Sort") self.cbsort = newSelector() self.cbsort.set_size(150, -1) sort_items = ("query_time", "count", "rows_sent","rows_examined", "lock_time") for item in sort_items: self.cbsort.add_item(item) self.cbsort.add_changed_callback(lambda: self.get_sort_method()) bxoptions.add(lbsort, False, False) bxoptions.add(self.cbsort, False, False) #bxOK --- last items on window bxOK = mforms.newBox(True) bx.add(bxOK, False, True) bxOK.set_spacing(4) self.ok = mforms.newButton() self.ok.set_text("OK") self.ok.set_enabled(False) bxOK.add_end(self.ok, False, False) cancel = mforms.newButton() cancel.set_text("Cancel") bxOK.add_end(cancel, False, True) self.set_size(400, 300) self.center() self.result = self.run_modal(self.ok, cancel) #-------open_file_chooser method--------- def open_file_chooser(self): filechooser = FileChooser(mforms.OpenFile) filechooser.set_directory(self.logfile_path) if filechooser.run_modal(): self.logfile_path = filechooser.get_path() # self.tbfile.set_value(self.logfile_path) self.tbfile.set_value(self.logfile_path) if self.logfile_path != '': self.ok.set_enabled(True) #-------file path box visibility toggle method------ def set_box(self): if self.rbfile.get_active(): self.tbfile.show(True) self.lbfile.show(True) self.btfile.show(True) self.ok.set_enabled(True) self.ok.set_enabled(False) else: self.ok.set_enabled(True) self.tbfile.show(False) self.lbfile.show(False) self.btfile.show(False) #--------------------------------------------------------------------------- def get_sort_method(self): self.sort_method = self.cbsort.get_string_value()

PlanetMySQL Voting: Vote UP / Vote DOWN