Appendix / Advanced Report Usage Notes |
# Find all sessions that contained file "foo.txt" # List both session info and file info # ASSUMES NO SESSIONS WERE BETWEEN TWO MANAGED NODES SET @report_period_start = '$REPORT_PERIOD_START'; SET @report_period_end = '$REPORT_PERIOD_END'; CREATE TABLE $FINAL_RESULT_TABLE SELECT DISTINCT ts.session_id , ts.source_ip , ts.dest_ip , ts.started_at , ts.stopped_at , ts.status , tsf.file_fullpath , tsf.size , tsf.started_at AS file_started_at , tsf.stopped_at AS file_stopped_at , tsf.status AS file_status FROM $TBL_TRANSFER_SESSIONS ts JOIN $TBL_TRANSFER_SESSION_FILES tsf ON ts.id = tsf.transfer_session_id WHERE tsf.started_at < @report_period_end AND ( tsf.stopped_at >= @report_period_start OR tsf.stopped_at IS NULL ) AND tsf.file_basename = "foo.txt" ORDER BY ts.started_at , tsf.started_at ;
Although the above report uses SELECT DISTINCT, contains no aggregate functions such as COUNT and SUM, and generates a correct final result (unless any of the transfer sessions were between two managed nodes), it is potentially slow. For greater speed (and to prevent query misoptimization from MySQL), it is better to decompose the above query into smaller steps, and join your temporary tables to the report views instead of joining the report views together directly.
SET @report_period_start = '$REPORT_PERIOD_START'; SET @report_period_end = '$REPORT_PERIOD_END'; #------------------------------------------------ # Create tables to prefilter base table record ids #------------------------------------------------ CREATE TABLE $TMP_TRANSFER_SESSION_IDS ( id INT NOT NULL PRIMARY KEY ); CREATE TABLE $TMP_TRANSFER_SESSION_FILE_IDS ( id INT NOT NULL PRIMARY KEY , transfer_session_id INT NOT NULL ); #------------------------------------------------ # Create table to hold all desired fields from # transfer_sessions #------------------------------------------------ CREATE TABLE $TMP_TRANSFER_SESSION_DATA ( `id` INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY , `session_id` VARCHAR(36) , `source_ip` VARCHAR(255) , `dest_ip` VARCHAR(255) , `started_at` DATETIME , `stopped_at` DATETIME , `status` VARCHAR(255) ); #------------------------------------------------ # Create table to hold all desired fields from # transfer_session_files #------------------------------------------------ CREATE TABLE $TMP_TRANSFER_SESSION_FILE_DATA ( `id` INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY , `transfer_session_id` INT(11) , `started_at` DATETIME , `stopped_at` DATETIME , `status` VARCHAR(255) , `file_fullpath` TEXT , `size` BIGINT(20) ); #======================================== # PRE-FILTER BASE TABLE IDS #======================================== #------------------------------------------------ # For this report, we know we are # filtering on file name and can use # the index on that column, so it is # faster to find the records from # transfer_session_files first #------------------------------------------------ #------------------------------------------------ # Transfer Session Files #------------------------------------------------ INSERT INTO $TMP_TRANSFER_SESSION_FILE_IDS SELECT DISTINCT tsf.id , tsf.transfer_session_id FROM $TBL_TRANSFER_SESSION_FILES tsf WHERE tsf.started_at < @report_period_end AND ( tsf.stopped_at >= @report_period_start OR tsf.stopped_at IS NULL ) AND tsf.file_basename = "foo.txt" ; #------------------------------------------------ # Create an index on the join field - # for speed, we wait until table is # populated instead of defining the index # during initial creation of table #------------------------------------------------ CREATE INDEX idx_transfer_session_id ON $TMP_TRANSFER_SESSION_FILE_IDS (transfer_session_id); #------------------- # Transfer Sessions #------------------- INSERT INTO $TMP_TRANSFER_SESSION_IDS SELECT DISTINCT ts.id FROM $TBL_TRANSFER_SESSIONS ts JOIN $TMP_TRANSFER_SESSION_FILE_IDS tsf ON ts.id = tsf.transfer_session_id WHERE ts.started_at < @report_period_end AND ( ts.stopped_at >= @report_period_start OR ts.stopped_at IS NULL ) ; #------------------------------------------------ # Remove transfer file sessions that don't have an # associated transfer_session record # (normally not supposed to happen, but we want # to protect against bad data that might be # caused by system crash, logger errors, console # purge errors, Canonicalizer shutdown, etc.) # For this particular report, this is not needed # since the final join will weed out such records, # but it is a good habit to maintain, this report # could be modified later into one where # it would make a difference. #------------------------------------------------ DELETE tsf.* FROM $TMP_TRANSFER_SESSION_FILE_IDS tsf LEFT JOIN $TMP_TRANSFER_SESSION_IDS ts ON tsf.transfer_session_id = ts.id WHERE ts.id IS NULL; #======================== # Get all desired fields #======================== #------------------------ # transfer_session_files #------------------------ INSERT INTO $TMP_TRANSFER_SESSION_FILE_DATA ( id , `transfer_session_id` , `started_at` , `stopped_at` , `status` , `file_fullpath` , `size` ) SELECT DISTINCT tsf.id , tsf.transfer_session_id , tsf.started_at , tsf.stopped_at , tsf.status , tsf.file_fullpath , tsf.size FROM $TMP_TRANSFER_SESSION_FILE_IDS tsf_ids STRAIGHT_JOIN $TBL_TRANSFER_SESSION_FILES tsf ON tsf_ids.id = tsf.id ; #------------------------- # Add index to speed joins #------------------------- CREATE INDEX idx_transfer_session_id ON $TMP_TRANSFER_SESSION_FILE_DATA (`transfer_session_id`); #------------------- # transfer_sessions #------------------- INSERT INTO $TMP_TRANSFER_SESSION_DATA ( id , `session_id` , `source_ip` , `dest_ip` , `started_at` , `stopped_at` , `status` ) SELECT DISTINCT ts.id , ts.session_id , ts.source_ip , ts.dest_ip , ts.started_at , ts.stopped_at , ts.status FROM $TMP_TRANSFER_SESSION_IDS ts_ids STRAIGHT_JOIN $TBL_TRANSFER_SESSIONS ts ON ts_ids.id = ts.id ; #=========================== # Create final result table #=========================== CREATE TABLE $FINAL_RESULT_TABLE SELECT ts.session_id , ts.source_ip , ts.dest_ip , ts.started_at , ts.stopped_at , ts.status , tsf.file_fullpath , tsf.size , tsf.started_at AS file_started_at , tsf.stopped_at AS file_stopped_at , tsf.status AS file_status FROM $TMP_TRANSFER_SESSION_DATA ts JOIN $TMP_TRANSFER_SESSION_FILE_DATA tsf ON ts.id = tsf.transfer_session_id ORDER BY ts.started_at , tsf.started_at ;