From 494af585b04d55d8153b75b9f19c6d3f7dffe8d3 Mon Sep 17 00:00:00 2001 From: Stefan Brass <stefan.brass@informatik.uni-halle.de> Date: Fri, 11 Oct 2019 10:53:36 +0200 Subject: [PATCH] Improved runtime estimation tables/views --- db/TODO | 2 ++ db/drop_db.sql | 3 ++- db/estimate_par.sql | 41 +++++++----------------------- db/estimate_par_old.sql | 55 +++++++++++++++++++++++++++++++++++++++++ db/load_data | 5 +++- db/v_check.sql | 18 ++++++++++++-- db/v_estimate.sql | 26 +++++-------------- graph/bench_cost.sql | 45 --------------------------------- 8 files changed, 94 insertions(+), 101 deletions(-) create mode 100644 db/estimate_par_old.sql delete mode 100644 graph/bench_cost.sql diff --git a/db/TODO b/db/TODO index a40e07d..f8c6588 100644 --- a/db/TODO +++ b/db/TODO @@ -10,3 +10,5 @@ Columns TC_COST etc. removed from table INPUT_GRAPHS Probably, it would be possible new to merge TCFF_PLOT_READ and SGFF_PLOT_REAL and so on (the new table BENCH_COST is more general than previous distinct columns). + +New view INTEGRITY_ERROR diff --git a/db/drop_db.sql b/db/drop_db.sql index 66224b7..82d0d29 100644 --- a/db/drop_db.sql +++ b/db/drop_db.sql @@ -2,7 +2,7 @@ -- Project: ddbench - Logic Programming and Database Benchmarks -- Filename: db/drop_db.sql -- Purpose: Database for Benchmark Runs: Drop all Tables and Views --- Last Change: 04.10.2019 +-- Last Change: 11.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -160,6 +160,7 @@ DROP VIEW DATASCRIPT; -- Views for Detecting Outliers and other Data Checks (File v_check.sql): -- ============================================================================ +DROP VIEW INTEGRITY_ERROR; DROP VIEW PCT_CPU; DROP VIEW PCT_CPU_INPUT; DROP VIEW REAL_TOO_LARGE; diff --git a/db/estimate_par.sql b/db/estimate_par.sql index 644a2f3..fbb5a1f 100644 --- a/db/estimate_par.sql +++ b/db/estimate_par.sql @@ -2,7 +2,7 @@ -- Project: ddbench - Deductive System and Database Benchmarks -- Filename: db/estimate_par.sql -- Purpose: Database for Benchmark Runs: Data for Runtime Estimation --- Last Change: 02.02.2019 +-- Last Change: 10.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -21,35 +21,12 @@ -- Columns: SYS, BENCH, STARTUP, LOAD_IDX, RULE_APP, RESULT, HITS -- START LOAD R_APP RESULT HITS -INSERT INTO ESTIMATE_PAR VALUES( 'xsb', 'tcff', 125, 100, 107, 390, 41); - -INSERT INTO ESTIMATE_PAR VALUES( 'yap', 'tcff', 30, 72, 263, 350, 32); - -INSERT INTO ESTIMATE_PAR VALUES( 'bam', 'tcff', 0, 244, 35, 51, 29); - -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1510, 2030, 164, 1590, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1605, 1140, 174, 1578, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1705, 1130, 172, 1578, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1805, 1130, 172, 1566, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1905, 1120, 172, 1566, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2005, 1120, 172, 1554, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2105, 1110, 172, 1554, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2205, 1110, 170, 1548, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2305, 1100, 172, 1542, 42); - -INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 75, 570, 440, 1020, 38); -INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 105, 180, 446, 1116, 38); - -INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 50, 380, 643, 1524, 42); -INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 105, 0, 628, 1818, 42); - -INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff', 0, 1210, 422, 1362, 19); --- InnoDB: 0 1210 422 1362 19 --- Memory: 5, 410, 507, 822, 18 - -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 96, 68, 6, 909, 24); -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 100, 40, 7, 906, 24); -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 100, 26, 7, 909, 24); -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 105, 30, 7, 894, 24); -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 120, 26, 7, 897, 24); +INSERT INTO ESTIMATE_PAR VALUES( 'xsb', 'tcff', 130, 90, 95, 284, 49); +INSERT INTO ESTIMATE_PAR VALUES( 'yap', 'tcff', 20, 4, 200, 221, 35); +INSERT INTO ESTIMATE_PAR VALUES( 'bam', 'tcff', 0, 373, 32, 54, 31); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2076, 1599, 166, 1402, 47); +INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 369, 102, 419, 711, 48); +INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 90, 93, 593, 1317, 49); +INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff', 19, 1026, 438, 1164, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 25, 12, 42, 671, 32); diff --git a/db/estimate_par_old.sql b/db/estimate_par_old.sql new file mode 100644 index 0000000..644a2f3 --- /dev/null +++ b/db/estimate_par_old.sql @@ -0,0 +1,55 @@ +-- ============================================================================ +-- Project: ddbench - Deductive System and Database Benchmarks +-- Filename: db/estimate_par.sql +-- Purpose: Database for Benchmark Runs: Data for Runtime Estimation +-- Last Change: 02.02.2019 +-- Language: SQL (Tested with PostgreSQL) +-- Author: Stefan Brass +-- EMail: brass@informatik.uni-halle.de +-- WWW: http://www.informatik.uni-halle.de/~brass/ +-- Address: Feldschloesschen 15, D-06120 Halle (Saale), GERMANY +-- Copyright: (c) 2016-2019 by Stefan Brass +-- License: See file "LICENSE" for copying conditions. +-- Note: There is no warranty at all - this code may contain bugs. +-- ============================================================================ + + +-- ============================================================================ +-- Parameter Values for Runtime Estimation (Experimental Table): +-- ============================================================================ + +-- Columns: SYS, BENCH, STARTUP, LOAD_IDX, RULE_APP, RESULT, HITS + +-- START LOAD R_APP RESULT HITS +INSERT INTO ESTIMATE_PAR VALUES( 'xsb', 'tcff', 125, 100, 107, 390, 41); + +INSERT INTO ESTIMATE_PAR VALUES( 'yap', 'tcff', 30, 72, 263, 350, 32); + +INSERT INTO ESTIMATE_PAR VALUES( 'bam', 'tcff', 0, 244, 35, 51, 29); + +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1510, 2030, 164, 1590, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1605, 1140, 174, 1578, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1705, 1130, 172, 1578, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1805, 1130, 172, 1566, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1905, 1120, 172, 1566, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2005, 1120, 172, 1554, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2105, 1110, 172, 1554, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2205, 1110, 170, 1548, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2305, 1100, 172, 1542, 42); + +INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 75, 570, 440, 1020, 38); +INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 105, 180, 446, 1116, 38); + +INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 50, 380, 643, 1524, 42); +INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 105, 0, 628, 1818, 42); + +INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff', 0, 1210, 422, 1362, 19); +-- InnoDB: 0 1210 422 1362 19 +-- Memory: 5, 410, 507, 822, 18 + +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 96, 68, 6, 909, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 100, 40, 7, 906, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 100, 26, 7, 909, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 105, 30, 7, 894, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 120, 26, 7, 897, 24); + diff --git a/db/load_data b/db/load_data index 03c1a47..3a34893 100755 --- a/db/load_data +++ b/db/load_data @@ -3,7 +3,7 @@ # Project: ddbench - Deductive System and Database Benchmarks # Filename: db/load_data # Purpose: Shellscript to create database and load all data -# Last Change: 04.10.2019 +# Last Change: 10.10.2019 # Language: Shellscript (bash) # Author: Stefan Brass # EMail: brass@informatik.uni-halle.de @@ -98,6 +98,9 @@ psql -f input_join1.sql | grep -v "INSERT 0 1" # Cost measures of a benchmark for an input file: psql -f bench_cost.sql | grep -v "INSERT 0 1" +# For runtime estimation: +psql -f estimate_par.sql | grep -v "INSERT 0 1" + # Convert tsv-files with benchmark results to SQL: # (might be unnecessary, if the SQL files already exist, but is safer, # because SQL files might be missing or outdated) diff --git a/db/v_check.sql b/db/v_check.sql index b0f1def..ba70184 100644 --- a/db/v_check.sql +++ b/db/v_check.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_check.sql -- Purpose: Benchmark DB: Views for Outlier Detection and other Checks --- Last Change: 04.03.2019 +-- Last Change: 10.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -15,7 +15,7 @@ -- ############################################################################ --- Outlier Detection, Checking of Results for Plausibility: +-- Outlier Detection, Checking of Results for Plausibility, Integrity Check: -- ############################################################################ @@ -141,3 +141,17 @@ AND WITH_OUTPUT = 'N' GROUP BY SYS, MACHINE, BENCH, FILE_ID ORDER BY SYS, MACHINE, BENCH, FILE_ID; + +-- ============================================================================ +-- Integrity Constraints / Consistency Check - Query Result must be Empty! +-- ============================================================================ + +CREATE VIEW INTEGRITY_ERROR AS +SELECT 'Number of lines in INPUT_FILE and edges in INPUT_GRAPH differ for: ' || + F.FILE_ID +FROM INPUT_FILE F, INPUT_GRAPH G +WHERE F.FILE_ID = G.FILE_ID +AND F.LINES <> G.NUM_EDGEs; + +-- Further checks can be added with UNION ALL + diff --git a/db/v_estimate.sql b/db/v_estimate.sql index 9c1a50d..ff87e66 100644 --- a/db/v_estimate.sql +++ b/db/v_estimate.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_estimate.sql -- Purpose: Benchmark DB: Views to Support Research on Runtime Estimation --- Last Change: 04.10.2019 +-- Last Change: 11.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -73,13 +73,13 @@ CREATE VIEW ESTIMATE_FORMULA AS SELECT R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, G.FILE_ID, ROUND(CAST( - E.STARTUP + + E.STARTUP/1000 + (G.NUM_EDGES * (LOG(G.NUM_EDGES)/LOG(2)) * E.LOAD_IDX / 1000000000) + (C.INST * E.RULE_APP / 1000000000) + (C.SIZE * E.RESULT / 1000000000) AS NUMERIC), 3) AS ESTIMATE, R.REAL_T -FROM RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E +FROM BEST_RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E WHERE R.SYS = E.SYS AND E.BENCH = 'tcff' AND R.BENCH = 'tcff' AND C.BENCH = 'tcff' AND C.FILE_ID = G.FILE_ID AND R.FILE_ID = G.FILE_ID @@ -144,24 +144,10 @@ SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO, C.ITER || '","' || ROUND(R.REAL_T*1000) || '"' AS CSV FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R -WHERE R.BENCH = 'tcff' AND G.FILE_ID = F.FILE_ID AND R.FILE_ID = F.FILE_ID -AND C.BENCH = 'tcff' AND C.FILE_ID = F.FILE_ID -UNION ALL -SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO, - '"' || R.SYS || '","' || - R.BENCH || '","' || - R.IMPL || '","' || - G.FILE_ID || '","' || - G.NUM_NODES || '","' || - G.NUM_EDGES || '","' || - C.SIZE || '","' || - C.INST || '","' || - C.ITER || '","' || - ROUND(R.REAL_T*1000) || '"' AS CSV -FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R -WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID +WHERE R.BENCH = C.BENCH +AND R.FILE_ID = F.FILE_ID AND G.FILE_ID = F.FILE_ID -AND C.BENCH = 'sgff' AND C.FILE_ID = F.FILE_ID +AND C.FILE_ID = F.FILE_ID UNION ALL SELECT '' AS SYS, '' AS BENCH, '' AS IMPL, '' AS FILE_ID, 0 AS SEQ_NO, '"' || 'SYS' || '","' || diff --git a/graph/bench_cost.sql b/graph/bench_cost.sql deleted file mode 100644 index 686d8e2..0000000 --- a/graph/bench_cost.sql +++ /dev/null @@ -1,45 +0,0 @@ -INSERT INTO BENCH_COST VALUES('tcff','b17',1966082,16,1966082); -INSERT INTO BENCH_COST VALUES('tcff','b18',4194306,17,4194306); -INSERT INTO BENCH_COST VALUES('tcff','c1k',1000000,1000,1001000); -INSERT INTO BENCH_COST VALUES('tcff','c2k',4000000,2000,4002000); -INSERT INTO BENCH_COST VALUES('tcff','c3k',9000000,3000,9003000); -INSERT INTO BENCH_COST VALUES('tcff','c4k',16000000,4000,16004000); -INSERT INTO BENCH_COST VALUES('tcff','k100',10000,1,1010000); -INSERT INTO BENCH_COST VALUES('tcff','k1k',1000000,1,1001000000); -INSERT INTO BENCH_COST VALUES('tcff','k1k5',2250000,1,3377250000); -INSERT INTO BENCH_COST VALUES('tcff','k2k',4000000,1,8004000000); -INSERT INTO BENCH_COST VALUES('tcff','k50',2500,1,127500); -INSERT INTO BENCH_COST VALUES('tcff','k500',250000,1,125250000); -INSERT INTO BENCH_COST VALUES('tcff','m4_2ki',12288,3,12288); -INSERT INTO BENCH_COST VALUES('tcff','m16_512',61440,15,61440); -INSERT INTO BENCH_COST VALUES('tcff','m64_128',258048,63,258048); -INSERT INTO BENCH_COST VALUES('tcff','m256_32',1044480,255,1044480); -INSERT INTO BENCH_COST VALUES('tcff','m1ki_8',4190208,1023,4190208); -INSERT INTO BENCH_COST VALUES('tcff','m4ki_2',16773120,4095,16773120); -INSERT INTO BENCH_COST VALUES('tcff','p1k',499500,999,499500); -INSERT INTO BENCH_COST VALUES('tcff','p2k',1999000,1999,1999000); -INSERT INTO BENCH_COST VALUES('tcff','p3k',4498500,2999,4498500); -INSERT INTO BENCH_COST VALUES('tcff','p4k',7998000,3999,7998000); -INSERT INTO BENCH_COST VALUES('tcff','s1k_1',1000000,500,2002000); -INSERT INTO BENCH_COST VALUES('tcff','s1k_3',1000000,250,4004000); -INSERT INTO BENCH_COST VALUES('tcff','s1k_4',1000000,200,5005000); -INSERT INTO BENCH_COST VALUES('tcff','s2k_1',4000000,1000,8004000); -INSERT INTO BENCH_COST VALUES('tcff','s2k_3',4000000,500,16008000); -INSERT INTO BENCH_COST VALUES('tcff','s2k_4',4000000,400,20010000); -INSERT INTO BENCH_COST VALUES('tcff','t50',1225,1,20825); -INSERT INTO BENCH_COST VALUES('tcff','t100',4950,1,166650); -INSERT INTO BENCH_COST VALUES('tcff','t500',124750,1,20833250); -INSERT INTO BENCH_COST VALUES('tcff','t1k',499500,1,166666500); -INSERT INTO BENCH_COST VALUES('tcff','t1k5',1124250,1,562499750); -INSERT INTO BENCH_COST VALUES('tcff','t2k',1999000,1,1333333000); -INSERT INTO BENCH_COST VALUES('tcff','v10',8194,9,8194); -INSERT INTO BENCH_COST VALUES('tcff','v11',18434,10,18434); -INSERT INTO BENCH_COST VALUES('tcff','v12',40962,11,40962); -INSERT INTO BENCH_COST VALUES('tcff','v17',1966082,16,1966082); -INSERT INTO BENCH_COST VALUES('tcff','v18',4194306,17,4194306); -INSERT INTO BENCH_COST VALUES('tcff','w1k',1000000,1,1000000); -INSERT INTO BENCH_COST VALUES('tcff','x10k',100020000,2,100020000); -INSERT INTO BENCH_COST VALUES('tcff','y1k_4k',11998000,4000,11998000); -INSERT INTO BENCH_COST VALUES('tcff','y1k_8k',39996000,8000,39996000); -INSERT INTO BENCH_COST VALUES('tcff','y500_4k',9998000,4000,9998000); -INSERT INTO BENCH_COST VALUES('tcff','y500_8k',35996000,8000,35996000); -- GitLab