From 494af585b04d55d8153b75b9f19c6d3f7dffe8d3 Mon Sep 17 00:00:00 2001
From: Stefan Brass <stefan.brass@informatik.uni-halle.de>
Date: Fri, 11 Oct 2019 10:53:36 +0200
Subject: [PATCH] Improved runtime estimation tables/views

---
 db/TODO                 |  2 ++
 db/drop_db.sql          |  3 ++-
 db/estimate_par.sql     | 41 +++++++-----------------------
 db/estimate_par_old.sql | 55 +++++++++++++++++++++++++++++++++++++++++
 db/load_data            |  5 +++-
 db/v_check.sql          | 18 ++++++++++++--
 db/v_estimate.sql       | 26 +++++--------------
 graph/bench_cost.sql    | 45 ---------------------------------
 8 files changed, 94 insertions(+), 101 deletions(-)
 create mode 100644 db/estimate_par_old.sql
 delete mode 100644 graph/bench_cost.sql

diff --git a/db/TODO b/db/TODO
index a40e07d..f8c6588 100644
--- a/db/TODO
+++ b/db/TODO
@@ -10,3 +10,5 @@ Columns TC_COST etc. removed from table INPUT_GRAPHS
 Probably, it would be possible new to merge TCFF_PLOT_READ and SGFF_PLOT_REAL
 and so on (the new table BENCH_COST is more general than previous distinct
 columns).
+
+New view INTEGRITY_ERROR
diff --git a/db/drop_db.sql b/db/drop_db.sql
index 66224b7..82d0d29 100644
--- a/db/drop_db.sql
+++ b/db/drop_db.sql
@@ -2,7 +2,7 @@
 -- Project:	ddbench - Logic Programming and Database Benchmarks
 -- Filename:	db/drop_db.sql
 -- Purpose:	Database for Benchmark Runs: Drop all Tables and Views
--- Last Change:	04.10.2019
+-- Last Change:	11.10.2019
 -- Language:	SQL (Tested with PostgreSQL)
 -- Author:	Stefan Brass
 -- EMail:	brass@informatik.uni-halle.de
@@ -160,6 +160,7 @@ DROP VIEW  DATASCRIPT;
 -- Views for Detecting Outliers and other Data Checks (File v_check.sql):
 -- ============================================================================
 
+DROP VIEW  INTEGRITY_ERROR;
 DROP VIEW  PCT_CPU;
 DROP VIEW  PCT_CPU_INPUT;
 DROP VIEW  REAL_TOO_LARGE;
diff --git a/db/estimate_par.sql b/db/estimate_par.sql
index 644a2f3..fbb5a1f 100644
--- a/db/estimate_par.sql
+++ b/db/estimate_par.sql
@@ -2,7 +2,7 @@
 -- Project:	ddbench - Deductive System and Database Benchmarks
 -- Filename:	db/estimate_par.sql
 -- Purpose:	Database for Benchmark Runs: Data for Runtime Estimation
--- Last Change:	02.02.2019
+-- Last Change:	10.10.2019
 -- Language:	SQL (Tested with PostgreSQL)
 -- Author:	Stefan Brass
 -- EMail:	brass@informatik.uni-halle.de
@@ -21,35 +21,12 @@
 -- Columns: SYS, BENCH, STARTUP, LOAD_IDX, RULE_APP, RESULT, HITS
 
 --                                               START  LOAD R_APP RESULT HITS
-INSERT INTO ESTIMATE_PAR VALUES( 'xsb',   'tcff',  125,  100,  107,   390, 41);
-
-INSERT INTO ESTIMATE_PAR VALUES( 'yap',   'tcff',   30,   72,  263,   350, 32);
-
-INSERT INTO ESTIMATE_PAR VALUES( 'bam',   'tcff',    0,  244,   35,    51, 29);
-
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1510, 2030,  164,  1590, 42);
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1605, 1140,  174,  1578, 42);
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1705, 1130,  172,  1578, 42);
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1805, 1130,  172,  1566, 42);
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1905, 1120,  172,  1566, 42);
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2005, 1120,  172,  1554, 42);
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2105, 1110,  172,  1554, 42);
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2205, 1110,  170,  1548, 42);
-INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2305, 1100,  172,  1542, 42);
-
-INSERT INTO ESTIMATE_PAR VALUES('psql',   'tcff',   75,  570,  440,  1020, 38);
-INSERT INTO ESTIMATE_PAR VALUES('psql',   'tcff',  105,  180,  446,  1116, 38);
-
-INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff',   50,  380,  643,  1524, 42);
-INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff',  105,    0,  628,  1818, 42);
-
-INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff',    0, 1210,  422,  1362, 19);
--- InnoDB:                                           0  1210   422   1362  19
--- Memory:                                           5,  410,  507,   822, 18
-
-INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',   96,   68,    6,   909, 24);
-INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',  100,   40,    7,   906, 24);
-INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',  100,   26,    7,   909, 24);
-INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',  105,   30,    7,   894, 24);
-INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',  120,   26,    7,   897, 24);
+INSERT INTO ESTIMATE_PAR VALUES( 'xsb',   'tcff',  130,   90,   95,   284, 49);
+INSERT INTO ESTIMATE_PAR VALUES( 'yap',   'tcff',   20,    4,  200,   221, 35);
+INSERT INTO ESTIMATE_PAR VALUES( 'bam',   'tcff',    0,  373,   32,    54, 31);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2076, 1599,  166,  1402, 47);
+INSERT INTO ESTIMATE_PAR VALUES('psql',   'tcff',  369,  102,  419,   711, 48);
+INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff',   90,   93,  593,  1317, 49);
+INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff',   19, 1026,  438,  1164, 24);
+INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',   25,   12,   42,   671, 32);
 
diff --git a/db/estimate_par_old.sql b/db/estimate_par_old.sql
new file mode 100644
index 0000000..644a2f3
--- /dev/null
+++ b/db/estimate_par_old.sql
@@ -0,0 +1,55 @@
+-- ============================================================================
+-- Project:	ddbench - Deductive System and Database Benchmarks
+-- Filename:	db/estimate_par.sql
+-- Purpose:	Database for Benchmark Runs: Data for Runtime Estimation
+-- Last Change:	02.02.2019
+-- Language:	SQL (Tested with PostgreSQL)
+-- Author:	Stefan Brass
+-- EMail:	brass@informatik.uni-halle.de
+-- WWW:		http://www.informatik.uni-halle.de/~brass/
+-- Address:	Feldschloesschen 15, D-06120 Halle (Saale), GERMANY
+-- Copyright:	(c) 2016-2019 by Stefan Brass
+-- License:	See file "LICENSE" for copying conditions.
+-- Note:	There is no warranty at all - this code may contain bugs.
+-- ============================================================================
+
+
+-- ============================================================================
+-- Parameter Values for Runtime Estimation (Experimental Table):
+-- ============================================================================
+
+-- Columns: SYS, BENCH, STARTUP, LOAD_IDX, RULE_APP, RESULT, HITS
+
+--                                               START  LOAD R_APP RESULT HITS
+INSERT INTO ESTIMATE_PAR VALUES( 'xsb',   'tcff',  125,  100,  107,   390, 41);
+
+INSERT INTO ESTIMATE_PAR VALUES( 'yap',   'tcff',   30,   72,  263,   350, 32);
+
+INSERT INTO ESTIMATE_PAR VALUES( 'bam',   'tcff',    0,  244,   35,    51, 29);
+
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1510, 2030,  164,  1590, 42);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1605, 1140,  174,  1578, 42);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1705, 1130,  172,  1578, 42);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1805, 1130,  172,  1566, 42);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 1905, 1120,  172,  1566, 42);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2005, 1120,  172,  1554, 42);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2105, 1110,  172,  1554, 42);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2205, 1110,  170,  1548, 42);
+INSERT INTO ESTIMATE_PAR VALUES('jena',   'tcff', 2305, 1100,  172,  1542, 42);
+
+INSERT INTO ESTIMATE_PAR VALUES('psql',   'tcff',   75,  570,  440,  1020, 38);
+INSERT INTO ESTIMATE_PAR VALUES('psql',   'tcff',  105,  180,  446,  1116, 38);
+
+INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff',   50,  380,  643,  1524, 42);
+INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff',  105,    0,  628,  1818, 42);
+
+INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff',    0, 1210,  422,  1362, 19);
+-- InnoDB:                                           0  1210   422   1362  19
+-- Memory:                                           5,  410,  507,   822, 18
+
+INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',   96,   68,    6,   909, 24);
+INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',  100,   40,    7,   906, 24);
+INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',  100,   26,    7,   909, 24);
+INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',  105,   30,    7,   894, 24);
+INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff',  120,   26,    7,   897, 24);
+
diff --git a/db/load_data b/db/load_data
index 03c1a47..3a34893 100755
--- a/db/load_data
+++ b/db/load_data
@@ -3,7 +3,7 @@
 # Project:	ddbench - Deductive System and Database Benchmarks
 # Filename:	db/load_data
 # Purpose:	Shellscript to create database and load all data
-# Last Change:	04.10.2019
+# Last Change:	10.10.2019
 # Language:	Shellscript (bash)
 # Author:	Stefan Brass
 # EMail:	brass@informatik.uni-halle.de
@@ -98,6 +98,9 @@ psql -f input_join1.sql  | grep -v "INSERT 0 1"
 # Cost measures of a benchmark for an input file:
 psql -f bench_cost.sql  | grep -v "INSERT 0 1"
 
+# For runtime estimation:
+psql -f estimate_par.sql  | grep -v "INSERT 0 1"
+
 # Convert tsv-files with benchmark results to SQL:
 # (might be unnecessary, if the SQL files already exist, but is safer,
 #  because SQL files might be missing or outdated)
diff --git a/db/v_check.sql b/db/v_check.sql
index b0f1def..ba70184 100644
--- a/db/v_check.sql
+++ b/db/v_check.sql
@@ -2,7 +2,7 @@
 -- Project:	rbench - Logic Programming and Database Benchmarks
 -- Filename:	db/v_check.sql
 -- Purpose:	Benchmark DB: Views for Outlier Detection and other Checks
--- Last Change:	04.03.2019
+-- Last Change:	10.10.2019
 -- Language:	SQL (Tested with PostgreSQL)
 -- Author:	Stefan Brass
 -- EMail:	brass@informatik.uni-halle.de
@@ -15,7 +15,7 @@
 
 
 -- ############################################################################
--- Outlier Detection, Checking of Results for Plausibility:
+-- Outlier Detection, Checking of Results for Plausibility, Integrity Check:
 -- ############################################################################
 
 
@@ -141,3 +141,17 @@ AND    WITH_OUTPUT = 'N'
 GROUP  BY SYS, MACHINE, BENCH, FILE_ID
 ORDER  BY SYS, MACHINE, BENCH, FILE_ID;
 
+
+-- ============================================================================
+-- Integrity Constraints / Consistency Check - Query Result must be Empty!
+-- ============================================================================
+
+CREATE VIEW INTEGRITY_ERROR AS
+SELECT 'Number of lines in INPUT_FILE and edges in INPUT_GRAPH differ for: ' ||
+	F.FILE_ID
+FROM  INPUT_FILE F, INPUT_GRAPH G
+WHERE F.FILE_ID = G.FILE_ID
+AND   F.LINES <> G.NUM_EDGEs;
+
+-- Further checks can be added with UNION ALL
+
diff --git a/db/v_estimate.sql b/db/v_estimate.sql
index 9c1a50d..ff87e66 100644
--- a/db/v_estimate.sql
+++ b/db/v_estimate.sql
@@ -2,7 +2,7 @@
 -- Project:	rbench - Logic Programming and Database Benchmarks
 -- Filename:	db/v_estimate.sql
 -- Purpose:	Benchmark DB: Views to Support Research on Runtime Estimation
--- Last Change:	04.10.2019
+-- Last Change:	11.10.2019
 -- Language:	SQL (Tested with PostgreSQL)
 -- Author:	Stefan Brass
 -- EMail:	brass@informatik.uni-halle.de
@@ -73,13 +73,13 @@ CREATE VIEW ESTIMATE_FORMULA AS
 SELECT R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT,
        G.FILE_ID,
        ROUND(CAST(
-         E.STARTUP +
+         E.STARTUP/1000 +
          (G.NUM_EDGES * (LOG(G.NUM_EDGES)/LOG(2)) * E.LOAD_IDX / 1000000000) +
          (C.INST * E.RULE_APP / 1000000000) +
          (C.SIZE * E.RESULT / 1000000000) AS NUMERIC),
        3) AS ESTIMATE,
        R.REAL_T
-FROM   RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E
+FROM   BEST_RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E
 WHERE  R.SYS = E.SYS AND E.BENCH = 'tcff' AND R.BENCH = 'tcff'
 AND    C.BENCH = 'tcff' AND C.FILE_ID = G.FILE_ID
 AND    R.FILE_ID = G.FILE_ID
@@ -144,24 +144,10 @@ SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO,
 		C.ITER      || '","' ||
 		ROUND(R.REAL_T*1000) || '"' AS CSV
 FROM	INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R
-WHERE   R.BENCH = 'tcff' AND G.FILE_ID = F.FILE_ID AND R.FILE_ID = F.FILE_ID
-AND     C.BENCH = 'tcff' AND C.FILE_ID = F.FILE_ID
-UNION ALL
-SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO,
-       '"' ||   R.SYS       || '","' ||
-		R.BENCH     || '","' ||
-		R.IMPL      || '","' ||
-		G.FILE_ID   || '","' ||
-		G.NUM_NODES || '","' ||
-		G.NUM_EDGES || '","' ||
-		C.SIZE      || '","' ||
-		C.INST      || '","' ||
-		C.ITER      || '","' ||
-		ROUND(R.REAL_T*1000) || '"' AS CSV
-FROM	INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R
-WHERE   R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID
+WHERE   R.BENCH = C.BENCH
+AND     R.FILE_ID = F.FILE_ID
 AND     G.FILE_ID = F.FILE_ID
-AND     C.BENCH = 'sgff' AND C.FILE_ID = F.FILE_ID
+AND     C.FILE_ID = F.FILE_ID
 UNION ALL
 SELECT '' AS SYS, '' AS BENCH, '' AS IMPL, '' AS FILE_ID, 0 AS SEQ_NO,
        '"' ||   'SYS'       || '","' ||
diff --git a/graph/bench_cost.sql b/graph/bench_cost.sql
deleted file mode 100644
index 686d8e2..0000000
--- a/graph/bench_cost.sql
+++ /dev/null
@@ -1,45 +0,0 @@
-INSERT INTO BENCH_COST VALUES('tcff','b17',1966082,16,1966082);
-INSERT INTO BENCH_COST VALUES('tcff','b18',4194306,17,4194306);
-INSERT INTO BENCH_COST VALUES('tcff','c1k',1000000,1000,1001000);
-INSERT INTO BENCH_COST VALUES('tcff','c2k',4000000,2000,4002000);
-INSERT INTO BENCH_COST VALUES('tcff','c3k',9000000,3000,9003000);
-INSERT INTO BENCH_COST VALUES('tcff','c4k',16000000,4000,16004000);
-INSERT INTO BENCH_COST VALUES('tcff','k100',10000,1,1010000);
-INSERT INTO BENCH_COST VALUES('tcff','k1k',1000000,1,1001000000);
-INSERT INTO BENCH_COST VALUES('tcff','k1k5',2250000,1,3377250000);
-INSERT INTO BENCH_COST VALUES('tcff','k2k',4000000,1,8004000000);
-INSERT INTO BENCH_COST VALUES('tcff','k50',2500,1,127500);
-INSERT INTO BENCH_COST VALUES('tcff','k500',250000,1,125250000);
-INSERT INTO BENCH_COST VALUES('tcff','m4_2ki',12288,3,12288);
-INSERT INTO BENCH_COST VALUES('tcff','m16_512',61440,15,61440);
-INSERT INTO BENCH_COST VALUES('tcff','m64_128',258048,63,258048);
-INSERT INTO BENCH_COST VALUES('tcff','m256_32',1044480,255,1044480);
-INSERT INTO BENCH_COST VALUES('tcff','m1ki_8',4190208,1023,4190208);
-INSERT INTO BENCH_COST VALUES('tcff','m4ki_2',16773120,4095,16773120);
-INSERT INTO BENCH_COST VALUES('tcff','p1k',499500,999,499500);
-INSERT INTO BENCH_COST VALUES('tcff','p2k',1999000,1999,1999000);
-INSERT INTO BENCH_COST VALUES('tcff','p3k',4498500,2999,4498500);
-INSERT INTO BENCH_COST VALUES('tcff','p4k',7998000,3999,7998000);
-INSERT INTO BENCH_COST VALUES('tcff','s1k_1',1000000,500,2002000);
-INSERT INTO BENCH_COST VALUES('tcff','s1k_3',1000000,250,4004000);
-INSERT INTO BENCH_COST VALUES('tcff','s1k_4',1000000,200,5005000);
-INSERT INTO BENCH_COST VALUES('tcff','s2k_1',4000000,1000,8004000);
-INSERT INTO BENCH_COST VALUES('tcff','s2k_3',4000000,500,16008000);
-INSERT INTO BENCH_COST VALUES('tcff','s2k_4',4000000,400,20010000);
-INSERT INTO BENCH_COST VALUES('tcff','t50',1225,1,20825);
-INSERT INTO BENCH_COST VALUES('tcff','t100',4950,1,166650);
-INSERT INTO BENCH_COST VALUES('tcff','t500',124750,1,20833250);
-INSERT INTO BENCH_COST VALUES('tcff','t1k',499500,1,166666500);
-INSERT INTO BENCH_COST VALUES('tcff','t1k5',1124250,1,562499750);
-INSERT INTO BENCH_COST VALUES('tcff','t2k',1999000,1,1333333000);
-INSERT INTO BENCH_COST VALUES('tcff','v10',8194,9,8194);
-INSERT INTO BENCH_COST VALUES('tcff','v11',18434,10,18434);
-INSERT INTO BENCH_COST VALUES('tcff','v12',40962,11,40962);
-INSERT INTO BENCH_COST VALUES('tcff','v17',1966082,16,1966082);
-INSERT INTO BENCH_COST VALUES('tcff','v18',4194306,17,4194306);
-INSERT INTO BENCH_COST VALUES('tcff','w1k',1000000,1,1000000);
-INSERT INTO BENCH_COST VALUES('tcff','x10k',100020000,2,100020000);
-INSERT INTO BENCH_COST VALUES('tcff','y1k_4k',11998000,4000,11998000);
-INSERT INTO BENCH_COST VALUES('tcff','y1k_8k',39996000,8000,39996000);
-INSERT INTO BENCH_COST VALUES('tcff','y500_4k',9998000,4000,9998000);
-INSERT INTO BENCH_COST VALUES('tcff','y500_8k',35996000,8000,35996000);
-- 
GitLab