From 309bf094bfd0a7d4ea6643188297b26a71840aa1 Mon Sep 17 00:00:00 2001 From: Stefan Brass <stefan.brass@informatik.uni-halle.de> Date: Fri, 11 Oct 2019 17:55:37 +0200 Subject: [PATCH] Improved Runtime Estimation in DB --- db/bench_cost.sql | 6 ++++- db/drop_db.sql | 2 ++ db/v_check.sql | 2 +- db/v_estimate.sql | 64 ++++++++++++++++++++++++++++++++++------------- 4 files changed, 55 insertions(+), 19 deletions(-) diff --git a/db/bench_cost.sql b/db/bench_cost.sql index c3069dc..7a7c0e8 100644 --- a/db/bench_cost.sql +++ b/db/bench_cost.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/bench_cost.sql -- Purpose: Database for Benchmark Runs: Cost Measure Data for Benchmarks --- Last Change: 04.10.2019 +-- Last Change: 10.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -200,6 +200,10 @@ INSERT INTO BENCH_COST VALUES('path2','y500_8k',8498,1,8498); INSERT INTO BENCH_COST VALUES('path2','y1k_4k',4998,1,4998); INSERT INTO BENCH_COST VALUES('path2','y1k_8k',8998,1,8998); +INSERT INTO BENCH_COST VALUES('j1axy','j1k10k',1825612,1,61988581); +INSERT INTO BENCH_COST VALUES('j1axy','j1k50k',3856026,1,981332692); +INSERT INTO BENCH_COST VALUES('j1axy','j1k250k',4000000,1,1375000127); + -- ============================================================================ -- Automatically Added Benchmark Cost Data: -- ============================================================================ diff --git a/db/drop_db.sql b/db/drop_db.sql index 82d0d29..31c09a4 100644 --- a/db/drop_db.sql +++ b/db/drop_db.sql @@ -53,6 +53,8 @@ DROP VIEW LOAD_EFFICIENCY; DROP VIEW ESTIMATE_DATA_CSV; DROP VIEW ESTIMATE_DATA; +DROP VIEW EVAL_ESTIMATES; +DROP VIEW NUM_GOOD_ESTIMATES; DROP VIEW NUM_BAD_ESTIMATES; DROP VIEW ESTIMATE_ERR; DROP VIEW ESTIMATE_FORMULA; diff --git a/db/v_check.sql b/db/v_check.sql index ba70184..770640d 100644 --- a/db/v_check.sql +++ b/db/v_check.sql @@ -148,7 +148,7 @@ ORDER BY SYS, MACHINE, BENCH, FILE_ID; CREATE VIEW INTEGRITY_ERROR AS SELECT 'Number of lines in INPUT_FILE and edges in INPUT_GRAPH differ for: ' || - F.FILE_ID + F.FILE_ID AS ERROR_MESSAGE FROM INPUT_FILE F, INPUT_GRAPH G WHERE F.FILE_ID = G.FILE_ID AND F.LINES <> G.NUM_EDGEs; diff --git a/db/v_estimate.sql b/db/v_estimate.sql index ff87e66..ceb7891 100644 --- a/db/v_estimate.sql +++ b/db/v_estimate.sql @@ -70,44 +70,74 @@ ORDER BY SYS, BYTES_PER_FACT; -- ============================================================================ CREATE VIEW ESTIMATE_FORMULA AS -SELECT R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, - G.FILE_ID, +SELECT R.SYS, R.BENCH, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, + E.BENCH AS FORMULA_FOR, + F.FILE_ID, ROUND(CAST( E.STARTUP/1000 + - (G.NUM_EDGES * (LOG(G.NUM_EDGES)/LOG(2)) * E.LOAD_IDX / 1000000000) + + (F.LINES * (LOG(F.LINES)/LOG(2)) * E.LOAD_IDX / 1000000000) + (C.INST * E.RULE_APP / 1000000000) + (C.SIZE * E.RESULT / 1000000000) AS NUMERIC), 3) AS ESTIMATE, R.REAL_T -FROM BEST_RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E -WHERE R.SYS = E.SYS AND E.BENCH = 'tcff' AND R.BENCH = 'tcff' -AND C.BENCH = 'tcff' AND C.FILE_ID = G.FILE_ID -AND R.FILE_ID = G.FILE_ID -ORDER BY R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, ESTIMATE; +FROM BEST_RESULT R, INPUT_FILE F, BENCH_COST C, ESTIMATE_PAR E +WHERE R.SYS = E.SYS +AND C.BENCH = R.BENCH +AND C.FILE_ID = F.FILE_ID +AND R.FILE_ID = F.FILE_ID +ORDER BY R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, R.BENCH, E.RESULT, + ESTIMATE; -- ============================================================================ -- Check System Speed Estimates (Real Time): -- ============================================================================ CREATE VIEW ESTIMATE_ERR AS -SELECT E.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, - E.FILE_ID, E.ESTIMATE, E.REAL_T, +SELECT E.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, E.FORMULA_FOR, + E.BENCH, E.FILE_ID, E.ESTIMATE, E.REAL_T, E.REAL_T - E.ESTIMATE AS DIFF, - ROUND((E.REAL_T - E.ESTIMATE) * 100 / E.ESTIMATE) AS DIFF_PCT + ROUND((E.REAL_T - E.ESTIMATE) * 100 / E.ESTIMATE, 3) AS DIFF_PCT FROM ESTIMATE_FORMULA E -ORDER BY E.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, DIFF_PCT; +ORDER BY E.BENCH, E.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, + E.RESULT, DIFF_PCT; -- ============================================================================ --- Count the Number of Wrong Estimates (more than 2s wrong and more than 20%): +-- Count the Number of Wrong Estimates (more than 20%): -- ============================================================================ CREATE VIEW NUM_BAD_ESTIMATES AS -SELECT SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT, - COUNT(*) AS BAD_ESTIMATES, SUM(ABS(DIFF)) AS ABS_ERR +SELECT BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT, + COUNT(*) AS BAD, SUM(ABS(DIFF)) AS ABS_ERR FROM ESTIMATE_ERR WHERE ABS(DIFF_PCT) > 20 -GROUP BY SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT -ORDER BY SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT; +GROUP BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT +ORDER BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT; + +-- ============================================================================ +-- Count the Number of Good Estimates (max. 20% error): +-- ============================================================================ + +CREATE VIEW NUM_GOOD_ESTIMATES AS +SELECT BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT, + COUNT(*) AS GOOD, SUM(ABS(DIFF)) AS ABS_ERR +FROM ESTIMATE_ERR +WHERE ABS(DIFF_PCT) <= 20 +GROUP BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT +ORDER BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT; + +-- ============================================================================ +-- Evaluate Estimates: +-- ============================================================================ + +CREATE VIEW EVAL_ESTIMATES AS +SELECT BENCH, SYS, STARTUP AS S, LOAD_IDX AS L, RULE_APP AS A, RESULT AS R, + SUM(CASE WHEN ABS(DIFF_PCT) <= 20 THEN 1 ELSE 0 END) AS GOOD, + SUM(CASE WHEN DIFF_PCT < -20 THEN 1 ELSE 0 END) AS LESS, + SUM(CASE WHEN DIFF_PCT > 20 THEN 1 ELSE 0 END) AS MORE, + ROUND(AVG(ABS(DIFF_PCT))) AS AVG_ERR +FROM ESTIMATE_ERR +GROUP BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT +ORDER BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT; -- ============================================================================ -- Generate C++ Objects with Graph Data and Runtime (for Optimization): -- GitLab