Advanced Report Usage Notes: Avoid Joining Reporting Views
EXAMPLE:
# Find all sessions that contained file "foo.txt"
# List both session info and file info
# ASSUMES NO SESSIONS WERE BETWEEN TWO MANAGED NODES
SET @report_period_start = '$REPORT_PERIOD_START';
SET @report_period_end = '$REPORT_PERIOD_END';
CREATE TABLE $FINAL_RESULT_TABLE
SELECT DISTINCT
ts.session_id
, ts.source_ip
, ts.dest_ip
, ts.started_at
, ts.stopped_at
, ts.status
, tsf.file_fullpath
, tsf.size
, tsf.started_at AS file_started_at
, tsf.stopped_at AS file_stopped_at
, tsf.status AS file_status
FROM
$TBL_TRANSFER_SESSIONS ts
JOIN $TBL_TRANSFER_SESSION_FILES tsf
ON ts.id = tsf.transfer_session_id
WHERE
tsf.started_at < @report_period_end
AND (
tsf.stopped_at >= @report_period_start
OR tsf.stopped_at IS NULL
)
AND tsf.file_basename = "foo.txt"
ORDER BY
ts.started_at
, tsf.started_at
;
Although the above report uses SELECT DISTINCT, contains no aggregate functions such as COUNT and SUM, and generates a correct final result (unless any of the transfer sessions were between two managed nodes), it is potentially slow. For greater speed (and to prevent query misoptimization from MySQL), it is better to decompose the above query into smaller steps, and join your temporary tables to the report views instead of joining the report views together directly.
Note: In order to avoid complexity in the SQL, the example below
assumes no sessions were between two managed nodes. Therefore, the code for dealing
with this has been left out (see Advanced Report Usage Notes: Avoid Duplicating Redundant Records).
EXAMPLE
SET @report_period_start = '$REPORT_PERIOD_START';
SET @report_period_end = '$REPORT_PERIOD_END';
#------------------------------------------------
# Create tables to prefilter base table record ids
#------------------------------------------------
CREATE TABLE $TMP_TRANSFER_SESSION_IDS (
id INT NOT NULL PRIMARY KEY
);
CREATE TABLE $TMP_TRANSFER_SESSION_FILE_IDS (
id INT NOT NULL PRIMARY KEY
, transfer_session_id INT NOT NULL
);
#------------------------------------------------
# Create table to hold all desired fields from
# transfer_sessions
#------------------------------------------------
CREATE TABLE $TMP_TRANSFER_SESSION_DATA (
`id` INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY
, `session_id` VARCHAR(36)
, `source_ip` VARCHAR(255)
, `dest_ip` VARCHAR(255)
, `started_at` DATETIME
, `stopped_at` DATETIME
, `status` VARCHAR(255)
);
#------------------------------------------------
# Create table to hold all desired fields from
# transfer_session_files
#------------------------------------------------
CREATE TABLE $TMP_TRANSFER_SESSION_FILE_DATA (
`id` INT(11) NOT NULL AUTO_INCREMENT PRIMARY KEY
, `transfer_session_id` INT(11)
, `started_at` DATETIME
, `stopped_at` DATETIME
, `status` VARCHAR(255)
, `file_fullpath` TEXT
, `size` BIGINT(20)
);
#========================================
# PRE-FILTER BASE TABLE IDS
#========================================
#------------------------------------------------
# For this report, we know we are
# filtering on file name and can use
# the index on that column, so it is
# faster to find the records from
# transfer_session_files first
#------------------------------------------------
#------------------------------------------------
# Transfer Session Files
#------------------------------------------------
INSERT INTO $TMP_TRANSFER_SESSION_FILE_IDS
SELECT DISTINCT
tsf.id
, tsf.transfer_session_id
FROM $TBL_TRANSFER_SESSION_FILES tsf
WHERE
tsf.started_at < @report_period_end
AND (
tsf.stopped_at >= @report_period_start
OR tsf.stopped_at IS NULL
)
AND tsf.file_basename = "foo.txt"
;
#------------------------------------------------
# Create an index on the join field -
# for speed, we wait until table is
# populated instead of defining the index
# during initial creation of table
#------------------------------------------------
CREATE INDEX idx_transfer_session_id ON
$TMP_TRANSFER_SESSION_FILE_IDS (transfer_session_id);
#-------------------
# Transfer Sessions
#-------------------
INSERT INTO $TMP_TRANSFER_SESSION_IDS
SELECT DISTINCT ts.id
FROM $TBL_TRANSFER_SESSIONS ts
JOIN $TMP_TRANSFER_SESSION_FILE_IDS tsf
ON ts.id = tsf.transfer_session_id
WHERE
ts.started_at < @report_period_end
AND (
ts.stopped_at >= @report_period_start
OR ts.stopped_at IS NULL
)
;
#------------------------------------------------
# Remove transfer file sessions that don't have an
# associated transfer_session record
# (normally not supposed to happen, but we want
# to protect against bad data that might be
# caused by system crash, logger errors, console
# purge errors, Canonicalizer shutdown, etc.)
# For this particular report, this is not needed
# since the final join will weed out such records,
# but it is a good habit to maintain, this report
# could be modified later into one where
# it would make a difference.
#------------------------------------------------
DELETE tsf.*
FROM $TMP_TRANSFER_SESSION_FILE_IDS tsf
LEFT JOIN $TMP_TRANSFER_SESSION_IDS ts
ON tsf.transfer_session_id = ts.id
WHERE ts.id IS NULL;
#========================
# Get all desired fields
#========================
#------------------------
# transfer_session_files
#------------------------
INSERT INTO $TMP_TRANSFER_SESSION_FILE_DATA (
id
, `transfer_session_id`
, `started_at`
, `stopped_at`
, `status`
, `file_fullpath`
, `size`
)
SELECT DISTINCT
tsf.id
, tsf.transfer_session_id
, tsf.started_at
, tsf.stopped_at
, tsf.status
, tsf.file_fullpath
, tsf.size
FROM
$TMP_TRANSFER_SESSION_FILE_IDS tsf_ids
STRAIGHT_JOIN $TBL_TRANSFER_SESSION_FILES tsf
ON tsf_ids.id = tsf.id
;
#-------------------------
# Add index to speed joins
#-------------------------
CREATE INDEX idx_transfer_session_id ON
$TMP_TRANSFER_SESSION_FILE_DATA (`transfer_session_id`);
#-------------------
# transfer_sessions
#-------------------
INSERT INTO $TMP_TRANSFER_SESSION_DATA (
id
, `session_id`
, `source_ip`
, `dest_ip`
, `started_at`
, `stopped_at`
, `status`
)
SELECT DISTINCT
ts.id
, ts.session_id
, ts.source_ip
, ts.dest_ip
, ts.started_at
, ts.stopped_at
, ts.status
FROM
$TMP_TRANSFER_SESSION_IDS ts_ids
STRAIGHT_JOIN $TBL_TRANSFER_SESSIONS ts
ON ts_ids.id = ts.id
;
#===========================
# Create final result table
#===========================
CREATE TABLE $FINAL_RESULT_TABLE
SELECT
ts.session_id
, ts.source_ip
, ts.dest_ip
, ts.started_at
, ts.stopped_at
, ts.status
, tsf.file_fullpath
, tsf.size
, tsf.started_at AS file_started_at
, tsf.stopped_at AS file_stopped_at
, tsf.status AS file_status
FROM
$TMP_TRANSFER_SESSION_DATA ts
JOIN $TMP_TRANSFER_SESSION_FILE_DATA tsf
ON ts.id = tsf.transfer_session_id
ORDER BY
ts.started_at
, tsf.started_at
;