diff --git a/db/TODO b/db/TODO index a40e07d047ac75c9b3414e2917c628789056a414..f8c65888274e8c8e0f2618ed2366aee5fd53d3a9 100644 --- a/db/TODO +++ b/db/TODO @@ -10,3 +10,5 @@ Columns TC_COST etc. removed from table INPUT_GRAPHS Probably, it would be possible new to merge TCFF_PLOT_READ and SGFF_PLOT_REAL and so on (the new table BENCH_COST is more general than previous distinct columns). + +New view INTEGRITY_ERROR diff --git a/db/drop_db.sql b/db/drop_db.sql index 66224b714b399f82493089af2b3facc246b2528c..82d0d2994a7785fb96839242ddd99d4a7a2d4588 100644 --- a/db/drop_db.sql +++ b/db/drop_db.sql @@ -2,7 +2,7 @@ -- Project: ddbench - Logic Programming and Database Benchmarks -- Filename: db/drop_db.sql -- Purpose: Database for Benchmark Runs: Drop all Tables and Views --- Last Change: 04.10.2019 +-- Last Change: 11.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -160,6 +160,7 @@ DROP VIEW DATASCRIPT; -- Views for Detecting Outliers and other Data Checks (File v_check.sql): -- ============================================================================ +DROP VIEW INTEGRITY_ERROR; DROP VIEW PCT_CPU; DROP VIEW PCT_CPU_INPUT; DROP VIEW REAL_TOO_LARGE; diff --git a/db/estimate_par.sql b/db/estimate_par.sql index 644a2f33a440bf4e60b7f1735a87002349c68598..fbb5a1fda22306f24fb00462c4232d916c046b38 100644 --- a/db/estimate_par.sql +++ b/db/estimate_par.sql @@ -2,7 +2,7 @@ -- Project: ddbench - Deductive System and Database Benchmarks -- Filename: db/estimate_par.sql -- Purpose: Database for Benchmark Runs: Data for Runtime Estimation --- Last Change: 02.02.2019 +-- Last Change: 10.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -21,35 +21,12 @@ -- Columns: SYS, BENCH, STARTUP, LOAD_IDX, RULE_APP, RESULT, HITS -- START LOAD R_APP RESULT HITS -INSERT INTO ESTIMATE_PAR VALUES( 'xsb', 'tcff', 125, 100, 107, 390, 41); - -INSERT INTO ESTIMATE_PAR VALUES( 'yap', 'tcff', 30, 72, 263, 350, 32); - -INSERT INTO ESTIMATE_PAR VALUES( 'bam', 'tcff', 0, 244, 35, 51, 29); - -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1510, 2030, 164, 1590, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1605, 1140, 174, 1578, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1705, 1130, 172, 1578, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1805, 1130, 172, 1566, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1905, 1120, 172, 1566, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2005, 1120, 172, 1554, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2105, 1110, 172, 1554, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2205, 1110, 170, 1548, 42); -INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2305, 1100, 172, 1542, 42); - -INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 75, 570, 440, 1020, 38); -INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 105, 180, 446, 1116, 38); - -INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 50, 380, 643, 1524, 42); -INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 105, 0, 628, 1818, 42); - -INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff', 0, 1210, 422, 1362, 19); --- InnoDB: 0 1210 422 1362 19 --- Memory: 5, 410, 507, 822, 18 - -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 96, 68, 6, 909, 24); -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 100, 40, 7, 906, 24); -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 100, 26, 7, 909, 24); -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 105, 30, 7, 894, 24); -INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 120, 26, 7, 897, 24); +INSERT INTO ESTIMATE_PAR VALUES( 'xsb', 'tcff', 130, 90, 95, 284, 49); +INSERT INTO ESTIMATE_PAR VALUES( 'yap', 'tcff', 20, 4, 200, 221, 35); +INSERT INTO ESTIMATE_PAR VALUES( 'bam', 'tcff', 0, 373, 32, 54, 31); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2076, 1599, 166, 1402, 47); +INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 369, 102, 419, 711, 48); +INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 90, 93, 593, 1317, 49); +INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff', 19, 1026, 438, 1164, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 25, 12, 42, 671, 32); diff --git a/db/estimate_par_old.sql b/db/estimate_par_old.sql new file mode 100644 index 0000000000000000000000000000000000000000..644a2f33a440bf4e60b7f1735a87002349c68598 --- /dev/null +++ b/db/estimate_par_old.sql @@ -0,0 +1,55 @@ +-- ============================================================================ +-- Project: ddbench - Deductive System and Database Benchmarks +-- Filename: db/estimate_par.sql +-- Purpose: Database for Benchmark Runs: Data for Runtime Estimation +-- Last Change: 02.02.2019 +-- Language: SQL (Tested with PostgreSQL) +-- Author: Stefan Brass +-- EMail: brass@informatik.uni-halle.de +-- WWW: http://www.informatik.uni-halle.de/~brass/ +-- Address: Feldschloesschen 15, D-06120 Halle (Saale), GERMANY +-- Copyright: (c) 2016-2019 by Stefan Brass +-- License: See file "LICENSE" for copying conditions. +-- Note: There is no warranty at all - this code may contain bugs. +-- ============================================================================ + + +-- ============================================================================ +-- Parameter Values for Runtime Estimation (Experimental Table): +-- ============================================================================ + +-- Columns: SYS, BENCH, STARTUP, LOAD_IDX, RULE_APP, RESULT, HITS + +-- START LOAD R_APP RESULT HITS +INSERT INTO ESTIMATE_PAR VALUES( 'xsb', 'tcff', 125, 100, 107, 390, 41); + +INSERT INTO ESTIMATE_PAR VALUES( 'yap', 'tcff', 30, 72, 263, 350, 32); + +INSERT INTO ESTIMATE_PAR VALUES( 'bam', 'tcff', 0, 244, 35, 51, 29); + +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1510, 2030, 164, 1590, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1605, 1140, 174, 1578, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1705, 1130, 172, 1578, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1805, 1130, 172, 1566, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 1905, 1120, 172, 1566, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2005, 1120, 172, 1554, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2105, 1110, 172, 1554, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2205, 1110, 170, 1548, 42); +INSERT INTO ESTIMATE_PAR VALUES('jena', 'tcff', 2305, 1100, 172, 1542, 42); + +INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 75, 570, 440, 1020, 38); +INSERT INTO ESTIMATE_PAR VALUES('psql', 'tcff', 105, 180, 446, 1116, 38); + +INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 50, 380, 643, 1524, 42); +INSERT INTO ESTIMATE_PAR VALUES('sqlite3','tcff', 105, 0, 628, 1818, 42); + +INSERT INTO ESTIMATE_PAR VALUES('mariadb','tcff', 0, 1210, 422, 1362, 19); +-- InnoDB: 0 1210 422 1362 19 +-- Memory: 5, 410, 507, 822, 18 + +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 96, 68, 6, 909, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 100, 40, 7, 906, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 100, 26, 7, 909, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 105, 30, 7, 894, 24); +INSERT INTO ESTIMATE_PAR VALUES('souffle','tcff', 120, 26, 7, 897, 24); + diff --git a/db/load_data b/db/load_data index 03c1a47d909cd8c24668e8f8714f461433d5c115..3a34893a51c04cfa88fdef063ca789721fe6c1b6 100755 --- a/db/load_data +++ b/db/load_data @@ -3,7 +3,7 @@ # Project: ddbench - Deductive System and Database Benchmarks # Filename: db/load_data # Purpose: Shellscript to create database and load all data -# Last Change: 04.10.2019 +# Last Change: 10.10.2019 # Language: Shellscript (bash) # Author: Stefan Brass # EMail: brass@informatik.uni-halle.de @@ -98,6 +98,9 @@ psql -f input_join1.sql | grep -v "INSERT 0 1" # Cost measures of a benchmark for an input file: psql -f bench_cost.sql | grep -v "INSERT 0 1" +# For runtime estimation: +psql -f estimate_par.sql | grep -v "INSERT 0 1" + # Convert tsv-files with benchmark results to SQL: # (might be unnecessary, if the SQL files already exist, but is safer, # because SQL files might be missing or outdated) diff --git a/db/v_check.sql b/db/v_check.sql index b0f1def23d40e299964684ff77297f0c4f02f0bd..ba701849fdf51bd7acdc00e5f90be4aad090ea49 100644 --- a/db/v_check.sql +++ b/db/v_check.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_check.sql -- Purpose: Benchmark DB: Views for Outlier Detection and other Checks --- Last Change: 04.03.2019 +-- Last Change: 10.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -15,7 +15,7 @@ -- ############################################################################ --- Outlier Detection, Checking of Results for Plausibility: +-- Outlier Detection, Checking of Results for Plausibility, Integrity Check: -- ############################################################################ @@ -141,3 +141,17 @@ AND WITH_OUTPUT = 'N' GROUP BY SYS, MACHINE, BENCH, FILE_ID ORDER BY SYS, MACHINE, BENCH, FILE_ID; + +-- ============================================================================ +-- Integrity Constraints / Consistency Check - Query Result must be Empty! +-- ============================================================================ + +CREATE VIEW INTEGRITY_ERROR AS +SELECT 'Number of lines in INPUT_FILE and edges in INPUT_GRAPH differ for: ' || + F.FILE_ID +FROM INPUT_FILE F, INPUT_GRAPH G +WHERE F.FILE_ID = G.FILE_ID +AND F.LINES <> G.NUM_EDGEs; + +-- Further checks can be added with UNION ALL + diff --git a/db/v_estimate.sql b/db/v_estimate.sql index 9c1a50d90f03721f9675a70215ae7a747a0b09e0..ff87e66113dcf7e8dd8660b2c83985076fba8447 100644 --- a/db/v_estimate.sql +++ b/db/v_estimate.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_estimate.sql -- Purpose: Benchmark DB: Views to Support Research on Runtime Estimation --- Last Change: 04.10.2019 +-- Last Change: 11.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -73,13 +73,13 @@ CREATE VIEW ESTIMATE_FORMULA AS SELECT R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, G.FILE_ID, ROUND(CAST( - E.STARTUP + + E.STARTUP/1000 + (G.NUM_EDGES * (LOG(G.NUM_EDGES)/LOG(2)) * E.LOAD_IDX / 1000000000) + (C.INST * E.RULE_APP / 1000000000) + (C.SIZE * E.RESULT / 1000000000) AS NUMERIC), 3) AS ESTIMATE, R.REAL_T -FROM RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E +FROM BEST_RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E WHERE R.SYS = E.SYS AND E.BENCH = 'tcff' AND R.BENCH = 'tcff' AND C.BENCH = 'tcff' AND C.FILE_ID = G.FILE_ID AND R.FILE_ID = G.FILE_ID @@ -144,24 +144,10 @@ SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO, C.ITER || '","' || ROUND(R.REAL_T*1000) || '"' AS CSV FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R -WHERE R.BENCH = 'tcff' AND G.FILE_ID = F.FILE_ID AND R.FILE_ID = F.FILE_ID -AND C.BENCH = 'tcff' AND C.FILE_ID = F.FILE_ID -UNION ALL -SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO, - '"' || R.SYS || '","' || - R.BENCH || '","' || - R.IMPL || '","' || - G.FILE_ID || '","' || - G.NUM_NODES || '","' || - G.NUM_EDGES || '","' || - C.SIZE || '","' || - C.INST || '","' || - C.ITER || '","' || - ROUND(R.REAL_T*1000) || '"' AS CSV -FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R -WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID +WHERE R.BENCH = C.BENCH +AND R.FILE_ID = F.FILE_ID AND G.FILE_ID = F.FILE_ID -AND C.BENCH = 'sgff' AND C.FILE_ID = F.FILE_ID +AND C.FILE_ID = F.FILE_ID UNION ALL SELECT '' AS SYS, '' AS BENCH, '' AS IMPL, '' AS FILE_ID, 0 AS SEQ_NO, '"' || 'SYS' || '","' || diff --git a/graph/bench_cost.sql b/graph/bench_cost.sql deleted file mode 100644 index 686d8e29157cf2239641c7471519426e5fb46a5c..0000000000000000000000000000000000000000 --- a/graph/bench_cost.sql +++ /dev/null @@ -1,45 +0,0 @@ -INSERT INTO BENCH_COST VALUES('tcff','b17',1966082,16,1966082); -INSERT INTO BENCH_COST VALUES('tcff','b18',4194306,17,4194306); -INSERT INTO BENCH_COST VALUES('tcff','c1k',1000000,1000,1001000); -INSERT INTO BENCH_COST VALUES('tcff','c2k',4000000,2000,4002000); -INSERT INTO BENCH_COST VALUES('tcff','c3k',9000000,3000,9003000); -INSERT INTO BENCH_COST VALUES('tcff','c4k',16000000,4000,16004000); -INSERT INTO BENCH_COST VALUES('tcff','k100',10000,1,1010000); -INSERT INTO BENCH_COST VALUES('tcff','k1k',1000000,1,1001000000); -INSERT INTO BENCH_COST VALUES('tcff','k1k5',2250000,1,3377250000); -INSERT INTO BENCH_COST VALUES('tcff','k2k',4000000,1,8004000000); -INSERT INTO BENCH_COST VALUES('tcff','k50',2500,1,127500); -INSERT INTO BENCH_COST VALUES('tcff','k500',250000,1,125250000); -INSERT INTO BENCH_COST VALUES('tcff','m4_2ki',12288,3,12288); -INSERT INTO BENCH_COST VALUES('tcff','m16_512',61440,15,61440); -INSERT INTO BENCH_COST VALUES('tcff','m64_128',258048,63,258048); -INSERT INTO BENCH_COST VALUES('tcff','m256_32',1044480,255,1044480); -INSERT INTO BENCH_COST VALUES('tcff','m1ki_8',4190208,1023,4190208); -INSERT INTO BENCH_COST VALUES('tcff','m4ki_2',16773120,4095,16773120); -INSERT INTO BENCH_COST VALUES('tcff','p1k',499500,999,499500); -INSERT INTO BENCH_COST VALUES('tcff','p2k',1999000,1999,1999000); -INSERT INTO BENCH_COST VALUES('tcff','p3k',4498500,2999,4498500); -INSERT INTO BENCH_COST VALUES('tcff','p4k',7998000,3999,7998000); -INSERT INTO BENCH_COST VALUES('tcff','s1k_1',1000000,500,2002000); -INSERT INTO BENCH_COST VALUES('tcff','s1k_3',1000000,250,4004000); -INSERT INTO BENCH_COST VALUES('tcff','s1k_4',1000000,200,5005000); -INSERT INTO BENCH_COST VALUES('tcff','s2k_1',4000000,1000,8004000); -INSERT INTO BENCH_COST VALUES('tcff','s2k_3',4000000,500,16008000); -INSERT INTO BENCH_COST VALUES('tcff','s2k_4',4000000,400,20010000); -INSERT INTO BENCH_COST VALUES('tcff','t50',1225,1,20825); -INSERT INTO BENCH_COST VALUES('tcff','t100',4950,1,166650); -INSERT INTO BENCH_COST VALUES('tcff','t500',124750,1,20833250); -INSERT INTO BENCH_COST VALUES('tcff','t1k',499500,1,166666500); -INSERT INTO BENCH_COST VALUES('tcff','t1k5',1124250,1,562499750); -INSERT INTO BENCH_COST VALUES('tcff','t2k',1999000,1,1333333000); -INSERT INTO BENCH_COST VALUES('tcff','v10',8194,9,8194); -INSERT INTO BENCH_COST VALUES('tcff','v11',18434,10,18434); -INSERT INTO BENCH_COST VALUES('tcff','v12',40962,11,40962); -INSERT INTO BENCH_COST VALUES('tcff','v17',1966082,16,1966082); -INSERT INTO BENCH_COST VALUES('tcff','v18',4194306,17,4194306); -INSERT INTO BENCH_COST VALUES('tcff','w1k',1000000,1,1000000); -INSERT INTO BENCH_COST VALUES('tcff','x10k',100020000,2,100020000); -INSERT INTO BENCH_COST VALUES('tcff','y1k_4k',11998000,4000,11998000); -INSERT INTO BENCH_COST VALUES('tcff','y1k_8k',39996000,8000,39996000); -INSERT INTO BENCH_COST VALUES('tcff','y500_4k',9998000,4000,9998000); -INSERT INTO BENCH_COST VALUES('tcff','y500_8k',35996000,8000,35996000);