From 309bf094bfd0a7d4ea6643188297b26a71840aa1 Mon Sep 17 00:00:00 2001
From: Stefan Brass <stefan.brass@informatik.uni-halle.de>
Date: Fri, 11 Oct 2019 17:55:37 +0200
Subject: [PATCH] Improved Runtime Estimation in DB

---
 db/bench_cost.sql |  6 ++++-
 db/drop_db.sql    |  2 ++
 db/v_check.sql    |  2 +-
 db/v_estimate.sql | 64 ++++++++++++++++++++++++++++++++++-------------
 4 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/db/bench_cost.sql b/db/bench_cost.sql
index c3069dc..7a7c0e8 100644
--- a/db/bench_cost.sql
+++ b/db/bench_cost.sql
@@ -2,7 +2,7 @@
 -- Project:	rbench - Logic Programming and Database Benchmarks
 -- Filename:	db/bench_cost.sql
 -- Purpose:	Database for Benchmark Runs: Cost Measure Data for Benchmarks
--- Last Change:	04.10.2019
+-- Last Change:	10.10.2019
 -- Language:	SQL (Tested with PostgreSQL)
 -- Author:	Stefan Brass
 -- EMail:	brass@informatik.uni-halle.de
@@ -200,6 +200,10 @@ INSERT INTO BENCH_COST VALUES('path2','y500_8k',8498,1,8498);
 INSERT INTO BENCH_COST VALUES('path2','y1k_4k',4998,1,4998);
 INSERT INTO BENCH_COST VALUES('path2','y1k_8k',8998,1,8998);
 
+INSERT INTO BENCH_COST VALUES('j1axy','j1k10k',1825612,1,61988581);
+INSERT INTO BENCH_COST VALUES('j1axy','j1k50k',3856026,1,981332692);
+INSERT INTO BENCH_COST VALUES('j1axy','j1k250k',4000000,1,1375000127);
+
 -- ============================================================================
 -- Automatically Added Benchmark Cost Data:
 -- ============================================================================
diff --git a/db/drop_db.sql b/db/drop_db.sql
index 82d0d29..31c09a4 100644
--- a/db/drop_db.sql
+++ b/db/drop_db.sql
@@ -53,6 +53,8 @@ DROP VIEW  LOAD_EFFICIENCY;
 DROP VIEW  ESTIMATE_DATA_CSV;
 DROP VIEW  ESTIMATE_DATA;
 
+DROP VIEW  EVAL_ESTIMATES;
+DROP VIEW  NUM_GOOD_ESTIMATES;
 DROP VIEW  NUM_BAD_ESTIMATES;
 DROP VIEW  ESTIMATE_ERR;
 DROP VIEW  ESTIMATE_FORMULA;
diff --git a/db/v_check.sql b/db/v_check.sql
index ba70184..770640d 100644
--- a/db/v_check.sql
+++ b/db/v_check.sql
@@ -148,7 +148,7 @@ ORDER  BY SYS, MACHINE, BENCH, FILE_ID;
 
 CREATE VIEW INTEGRITY_ERROR AS
 SELECT 'Number of lines in INPUT_FILE and edges in INPUT_GRAPH differ for: ' ||
-	F.FILE_ID
+	F.FILE_ID AS ERROR_MESSAGE
 FROM  INPUT_FILE F, INPUT_GRAPH G
 WHERE F.FILE_ID = G.FILE_ID
 AND   F.LINES <> G.NUM_EDGEs;
diff --git a/db/v_estimate.sql b/db/v_estimate.sql
index ff87e66..ceb7891 100644
--- a/db/v_estimate.sql
+++ b/db/v_estimate.sql
@@ -70,44 +70,74 @@ ORDER  BY SYS, BYTES_PER_FACT;
 -- ============================================================================
 
 CREATE VIEW ESTIMATE_FORMULA AS
-SELECT R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT,
-       G.FILE_ID,
+SELECT R.SYS, R.BENCH, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT,
+       E.BENCH AS FORMULA_FOR,
+       F.FILE_ID,
        ROUND(CAST(
          E.STARTUP/1000 +
-         (G.NUM_EDGES * (LOG(G.NUM_EDGES)/LOG(2)) * E.LOAD_IDX / 1000000000) +
+         (F.LINES * (LOG(F.LINES)/LOG(2)) * E.LOAD_IDX / 1000000000) +
          (C.INST * E.RULE_APP / 1000000000) +
          (C.SIZE * E.RESULT / 1000000000) AS NUMERIC),
        3) AS ESTIMATE,
        R.REAL_T
-FROM   BEST_RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E
-WHERE  R.SYS = E.SYS AND E.BENCH = 'tcff' AND R.BENCH = 'tcff'
-AND    C.BENCH = 'tcff' AND C.FILE_ID = G.FILE_ID
-AND    R.FILE_ID = G.FILE_ID
-ORDER  BY R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, ESTIMATE;
+FROM   BEST_RESULT R, INPUT_FILE F, BENCH_COST C, ESTIMATE_PAR E
+WHERE  R.SYS = E.SYS
+AND    C.BENCH = R.BENCH
+AND    C.FILE_ID = F.FILE_ID
+AND    R.FILE_ID = F.FILE_ID
+ORDER  BY R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, R.BENCH, E.RESULT,
+       ESTIMATE;
 
 -- ============================================================================
 -- Check System Speed Estimates (Real Time):
 -- ============================================================================
 
 CREATE VIEW ESTIMATE_ERR AS
-SELECT E.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT,
-       E.FILE_ID, E.ESTIMATE, E.REAL_T,
+SELECT E.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, E.FORMULA_FOR,
+       E.BENCH, E.FILE_ID, E.ESTIMATE, E.REAL_T,
        E.REAL_T - E.ESTIMATE AS DIFF,
-       ROUND((E.REAL_T - E.ESTIMATE) * 100 / E.ESTIMATE) AS DIFF_PCT
+       ROUND((E.REAL_T - E.ESTIMATE) * 100 / E.ESTIMATE, 3) AS DIFF_PCT
 FROM   ESTIMATE_FORMULA E
-ORDER  BY E.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, DIFF_PCT;
+ORDER  BY E.BENCH, E.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP,
+       E.RESULT, DIFF_PCT;
 
 -- ============================================================================
--- Count the Number of Wrong Estimates (more than 2s wrong and more than 20%):
+-- Count the Number of Wrong Estimates (more than 20%):
 -- ============================================================================
 
 CREATE VIEW NUM_BAD_ESTIMATES AS
-SELECT SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT,
-	COUNT(*) AS BAD_ESTIMATES, SUM(ABS(DIFF)) AS ABS_ERR
+SELECT BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT,
+	COUNT(*) AS BAD, SUM(ABS(DIFF)) AS ABS_ERR
 FROM   ESTIMATE_ERR
 WHERE  ABS(DIFF_PCT) > 20
-GROUP  BY SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT
-ORDER  BY SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT;
+GROUP  BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT
+ORDER  BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT;
+
+-- ============================================================================
+-- Count the Number of Good Estimates (max. 20% error):
+-- ============================================================================
+
+CREATE VIEW NUM_GOOD_ESTIMATES AS
+SELECT BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT,
+	COUNT(*) AS GOOD, SUM(ABS(DIFF)) AS ABS_ERR
+FROM   ESTIMATE_ERR
+WHERE  ABS(DIFF_PCT) <= 20
+GROUP  BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT
+ORDER  BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT;
+
+-- ============================================================================
+-- Evaluate Estimates:
+-- ============================================================================
+
+CREATE VIEW EVAL_ESTIMATES AS
+SELECT BENCH, SYS, STARTUP AS S, LOAD_IDX AS L, RULE_APP AS A, RESULT AS R,
+	SUM(CASE WHEN ABS(DIFF_PCT) <= 20 THEN 1 ELSE 0 END) AS GOOD,
+	SUM(CASE WHEN DIFF_PCT < -20 THEN 1 ELSE 0 END) AS LESS,
+	SUM(CASE WHEN DIFF_PCT > 20 THEN 1 ELSE 0 END) AS MORE,
+	ROUND(AVG(ABS(DIFF_PCT))) AS AVG_ERR
+FROM   ESTIMATE_ERR
+GROUP  BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT
+ORDER  BY BENCH, SYS, STARTUP, LOAD_IDX, RULE_APP, RESULT;
 
 -- ============================================================================
 -- Generate C++ Objects with Graph Data and Runtime (for Optimization):
-- 
GitLab