Advanced Report Usage Notes: Avoid Joining Reporting Views

EXAMPLE:

MySQL often mis-optimizes queries that join reporting views directly to each other. The fact that the views can show the same record multiple times can cause a geometric explosion in the number of temporary records inspected.
# Find all sessions that contained file "foo.txt"
# List both session info and file info
# ASSUMES NO SESSIONS WERE BETWEEN TWO MANAGED NODES
SET @report_period_start = '$REPORT_PERIOD_START';
SET @report_period_end = '$REPORT_PERIOD_END';
CREATE TABLE $FINAL_RESULT_TABLE
SELECT DISTINCT
	ts.session_id
	, ts.source_ip
	, ts.dest_ip
	, ts.started_at
	, ts.stopped_at
	, ts.status
	, tsf.file_fullpath
	, tsf.size
	, tsf.started_at AS file_started_at
	, tsf.stopped_at AS file_stopped_at
	, tsf.status AS file_status
FROM
	$TBL_TRANSFER_SESSIONS ts
	JOIN $TBL_TRANSFER_SESSION_FILES tsf
		ON ts.id = tsf.transfer_session_id
WHERE
	tsf.started_at < @report_period_end
	AND (
		tsf.stopped_at >= @report_period_start
		OR tsf.stopped_at IS NULL
	)
	AND tsf.file_basename = "foo.txt"
ORDER BY
	ts.started_at
	, tsf.started_at
;

EXAMPLE

Although the above report uses SELECT DISTINCT, contains no aggregate functions such as COUNT and SUM, and generates a correct final result (unless any of the transfer sessions were between two managed nodes), it is potentially slow. For greater speed (and to prevent query misoptimization from MySQL), it is better to decompose the above query into smaller steps, and join your temporary tables to the report views instead of joining the report views together directly.

Note: In order to avoid complexity in the SQL, the example below assumes no sessions were between two managed nodes. Therefore, the code for dealing with this has been left out (see Advanced Report Usage Notes: Avoid Duplicating Redundant Records).
SET @report_period_start = '$REPORT_PERIOD_START';
SET @report_period_end = '$REPORT_PERIOD_END';

#------------------------------------------------
# Create tables to prefilter base table record ids
#------------------------------------------------

CREATE TABLE $TMP_TRANSFER_SESSION_IDS (
	id INT NOT NULL PRIMARY KEY
	);
CREATE TABLE $TMP_TRANSFER_SESSION_FILE_IDS (
	id INT NOT NULL PRIMARY KEY
	, transfer_session_id INT NOT NULL
	);

#------------------------------------------------
# Create table to hold all desired fields from
# transfer_sessions
#------------------------------------------------

CREATE TABLE $TMP_TRANSFER_SESSION_DATA (
	`id` INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY
	, `session_id` VARCHAR(36)
	, `source_ip` VARCHAR(255)
	, `dest_ip` VARCHAR(255)
	, `started_at` DATETIME
	, `stopped_at` DATETIME
	, `status` VARCHAR(255)
	);

#------------------------------------------------
# Create table to hold all desired fields from
# transfer_session_files
#------------------------------------------------

CREATE TABLE $TMP_TRANSFER_SESSION_FILE_DATA (
`id` INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY
	, `transfer_session_id` INT(11)
	, `started_at` DATETIME
	, `stopped_at` DATETIME
	, `status` VARCHAR(255)
	, `file_fullpath` TEXT
	, `size` BIGINT(20)
	);

#========================================
# PRE-FILTER BASE TABLE IDS
#========================================

#------------------------------------------------
# For this report, we know we are
# filtering on file name and can use
# the index on that column, so it is
# faster to find the records from
# transfer_session_files first
#------------------------------------------------

#------------------------------------------------
# Transfer Session Files
#------------------------------------------------

INSERT INTO $TMP_TRANSFER_SESSION_FILE_IDS
SELECT DISTINCT
	tsf.id
	, tsf.transfer_session_id
FROM $TBL_TRANSFER_SESSION_FILES tsf
WHERE
	tsf.started_at < @report_period_end
	AND (
		tsf.stopped_at >= @report_period_start
		OR tsf.stopped_at IS NULL
	)
	AND tsf.file_basename = "foo.txt"
;

#------------------------------------------------
# Create an index on the join field -
# for speed, we wait until table is
# populated instead of defining the index
# during initial creation of table
#------------------------------------------------

CREATE INDEX idx_transfer_session_id ON
$TMP_TRANSFER_SESSION_FILE_IDS (transfer_session_id);

#-------------------
# Transfer Sessions
#-------------------

INSERT INTO $TMP_TRANSFER_SESSION_IDS
SELECT DISTINCT ts.id
FROM $TBL_TRANSFER_SESSIONS ts
	JOIN $TMP_TRANSFER_SESSION_FILE_IDS tsf
		ON ts.id = tsf.transfer_session_id
WHERE
	ts.started_at < @report_period_end
	AND (
		ts.stopped_at >= @report_period_start
		OR ts.stopped_at IS NULL
	)
;

#------------------------------------------------
# Remove transfer file sessions that don't have an
# associated transfer_session record
# (normally not supposed to happen, but we want
# to protect against bad data that might be
# caused by system crash, logger errors, console
# purge errors, Canonicalizer shutdown, etc.)
# For this particular report, this is not needed
# since the final join will weed out such records,
# but it is a good habit to maintain, this report
# could be modified later into one where
# it would make a difference.
#------------------------------------------------

DELETE tsf.*
FROM $TMP_TRANSFER_SESSION_FILE_IDS tsf
	LEFT JOIN $TMP_TRANSFER_SESSION_IDS ts
		ON tsf.transfer_session_id = ts.id
WHERE ts.id IS NULL;

#========================
# Get all desired fields
#========================

#------------------------
# transfer_session_files
#------------------------

INSERT INTO $TMP_TRANSFER_SESSION_FILE_DATA (
	id
	, `transfer_session_id`
	, `started_at`
	, `stopped_at`
	, `status`
	, `file_fullpath`
	, `size`
	)

SELECT DISTINCT
	tsf.id
	, tsf.transfer_session_id
	, tsf.started_at
	, tsf.stopped_at
	, tsf.status
	, tsf.file_fullpath
	, tsf.size
FROM
	$TMP_TRANSFER_SESSION_FILE_IDS tsf_ids
	STRAIGHT_JOIN $TBL_TRANSFER_SESSION_FILES tsf
		ON tsf_ids.id = tsf.id
;

#-------------------------
# Add index to speed joins
#-------------------------

CREATE INDEX idx_transfer_session_id ON
$TMP_TRANSFER_SESSION_FILE_DATA (`transfer_session_id`);

#-------------------
# transfer_sessions
#-------------------

INSERT INTO $TMP_TRANSFER_SESSION_DATA (
	id
	, `session_id`
	, `source_ip`
	, `dest_ip`
	, `started_at`
	, `stopped_at`
	, `status`
	)
SELECT DISTINCT
	ts.id
	, ts.session_id
	, ts.source_ip
	, ts.dest_ip
	, ts.started_at
	, ts.stopped_at
	, ts.status
FROM
	$TMP_TRANSFER_SESSION_IDS ts_ids
	STRAIGHT_JOIN $TBL_TRANSFER_SESSIONS ts
		ON ts_ids.id = ts.id
;

#===========================
# Create final result table
#===========================

CREATE TABLE $FINAL_RESULT_TABLE
SELECT
	ts.session_id
	, ts.source_ip
	, ts.dest_ip
	, ts.started_at
	, ts.stopped_at
	, ts.status
	, tsf.file_fullpath
	, tsf.size
	, tsf.started_at AS file_started_at
	, tsf.stopped_at AS file_stopped_at
	, tsf.status AS file_status
FROM
	$TMP_TRANSFER_SESSION_DATA ts
	JOIN $TMP_TRANSFER_SESSION_FILE_DATA tsf
		ON ts.id = tsf.transfer_session_id
ORDER BY
	ts.started_at
	, tsf.started_at
;