diff --git a/db/TODO b/db/TODO index 1cd6194d913d30e1a25ab6efe7b347dc11802f07..a40e07d047ac75c9b3414e2917c628789056a414 100644 --- a/db/TODO +++ b/db/TODO @@ -2,3 +2,11 @@ Enter new table INPUT_JOIN1 into db_doc.md Also work on documentation of views. And of data files, which were split from main create_db.sql + +New view BENCH_COST_MD. +New table BENCH_COST. +Columns TC_COST etc. removed from table INPUT_GRAPHS + +Probably, it would be possible new to merge TCFF_PLOT_READ and SGFF_PLOT_REAL +and so on (the new table BENCH_COST is more general than previous distinct +columns). diff --git a/db/bench_cost.sql b/db/bench_cost.sql new file mode 100644 index 0000000000000000000000000000000000000000..bc55e99cfe15c4d10b9e6a6f98ffe7b35be948cd --- /dev/null +++ b/db/bench_cost.sql @@ -0,0 +1,165 @@ +-- ============================================================================ +-- Project: rbench - Logic Programming and Database Benchmarks +-- Filename: db/bench_cost.sql +-- Purpose: Database for Benchmark Runs: Cost Measure Data for Benchmarks +-- Last Change: 04.10.2019 +-- Language: SQL (Tested with PostgreSQL) +-- Author: Stefan Brass +-- EMail: brass@informatik.uni-halle.de +-- WWW: http://www.informatik.uni-halle.de/~brass/ +-- Address: Feldschloesschen 15, D-06120 Halle (Saale), GERMANY +-- Copyright: (c) 2019 by Stefan Brass +-- License: See file "LICENSE" for copying conditions. +-- Note: There is no warranty at all - this code may contain bugs. +-- ============================================================================ + + +-- ============================================================================ +-- Cost Measures for Benchmarks (for Analysis of Runtimes): +-- ============================================================================ + +-- CREATE TABLE BENCH_COST ( +-- BENCH VARCHAR(20) NOT NULL, +-- FILE_ID VARCHAR(10) NOT NULL, +-- SIZE NUMERIC(12) NOT NULL, +-- ITER NUMERIC(12) NOT NULL, +-- INST NUMERIC(12) NOT NULL, +-- CONSTRAINT BENCH_COST_PK +-- PRIMARY KEY(BENCH, FILE_ID), +-- CONSTRAINT BENCH_COST_REF_BENCHMARK +-- FOREIGN KEY (BENCH) REFERENCES BENCHMARK, +-- CONSTRAINT BENCH_COST_REF_INPUT_FILE +-- FOREIGN KEY(FILE_ID) REFERENCES INPUT_FILE, +-- CONSTRAINT BENCH_COST_SIZE_NONNEGATIVE +-- CHECK(SIZE >= 0), +-- CONSTRAINT BENCH_COST_ITER_NONNEGATIVE +-- CHECK(ITER >= 0), +-- CONSTRAINT BENCH_COST_INST_NONNEGATIVE +-- CHECK(INST >= 0)); +-- +-- SIZE is the number of derived tuples of the benchmark (result size). +-- INST is the number of applicable rule instances in the benchmark program. +-- This is also the number of rule instances that semi-naive evaluation would +-- consider. +-- ITER is the number of iterations that the T_P operator needs to compute +-- the entire minimal model (counting only iterations that derive new tuples). +-- The application of the non-recursive rules is the first iteration +-- (if they derive at least one tuple, otherwise it does not count as an +-- iteration). + + +-- ============================================================================ +-- INSERT Statements for Input Graph Data: +-- ============================================================================ + +INSERT INTO BENCH_COST VALUES('tcff','k50',2500,1,127500); +INSERT INTO BENCH_COST VALUES('tcff','k100',10000,1,1010000); +INSERT INTO BENCH_COST VALUES('tcff','k500',250000,1,125250000); +INSERT INTO BENCH_COST VALUES('tcff','k1k',1000000,1,1001000000); +INSERT INTO BENCH_COST VALUES('tcff','k2k',4000000,1,8004000000); +INSERT INTO BENCH_COST VALUES('tcff','t50',1225,1,20825); +INSERT INTO BENCH_COST VALUES('tcff','t100',4950,1,166650); +INSERT INTO BENCH_COST VALUES('tcff','t500',124750,1,20833250); +INSERT INTO BENCH_COST VALUES('tcff','t1k',499500,1,166666500); +INSERT INTO BENCH_COST VALUES('tcff','t2k',1999000,1,1333333000); +INSERT INTO BENCH_COST VALUES('tcff','c1k',1000000,1000,1001000); +INSERT INTO BENCH_COST VALUES('tcff','c2k',4000000,2000,4002000); +INSERT INTO BENCH_COST VALUES('tcff','c3k',9000000,3000,9003000); +INSERT INTO BENCH_COST VALUES('tcff','c4k',16000000,4000,16004000); +INSERT INTO BENCH_COST VALUES('tcff','s1k_1',1000000,500,2002000); +INSERT INTO BENCH_COST VALUES('tcff','s1k_2',1000000,201,3003000); +INSERT INTO BENCH_COST VALUES('tcff','s1k_3',1000000,250,4004000); +INSERT INTO BENCH_COST VALUES('tcff','s1k_4',1000000,200,5005000); +INSERT INTO BENCH_COST VALUES('tcff','s1k_5',1000000,42,6006000); +INSERT INTO BENCH_COST VALUES('tcff','s2k_1',4000000,1000,8004000); +INSERT INTO BENCH_COST VALUES('tcff','s2k_2',4000000,288,12006000); +INSERT INTO BENCH_COST VALUES('tcff','s2k_3',4000000,500,16008000); +INSERT INTO BENCH_COST VALUES('tcff','s2k_4',4000000,400,20010000); +INSERT INTO BENCH_COST VALUES('tcff','s2k_5',4000000,127,24012000); +INSERT INTO BENCH_COST VALUES('tcff','p1k',499500,999,499500); +INSERT INTO BENCH_COST VALUES('tcff','p2k',1999000,1999,1999000); +INSERT INTO BENCH_COST VALUES('tcff','p3k',4498500,2999,4498500); +INSERT INTO BENCH_COST VALUES('tcff','p4k',7998000,3999,7998000); +INSERT INTO BENCH_COST VALUES('tcff','m4_2ki',12288,3,12288); +INSERT INTO BENCH_COST VALUES('tcff','m16_512',61440,15,61440); +INSERT INTO BENCH_COST VALUES('tcff','m64_128',258048,63,258048); +INSERT INTO BENCH_COST VALUES('tcff','m256_32',1044480,255,1044480); +INSERT INTO BENCH_COST VALUES('tcff','m1ki_8',4190208,1023,4190208); +INSERT INTO BENCH_COST VALUES('tcff','m4ki_2',16773120,4095,16773120); +INSERT INTO BENCH_COST VALUES('tcff','b17',1966082,16,1966082); +INSERT INTO BENCH_COST VALUES('tcff','b18',4194306,17,4194306); +INSERT INTO BENCH_COST VALUES('tcff','b19',8912898,18,8912898); +INSERT INTO BENCH_COST VALUES('tcff','v10',8194,9,8194); +INSERT INTO BENCH_COST VALUES('tcff','v11',18434,10,18434); +INSERT INTO BENCH_COST VALUES('tcff','v12',40962,11,40962); +INSERT INTO BENCH_COST VALUES('tcff','v17',1966082,16,1966082); +INSERT INTO BENCH_COST VALUES('tcff','v18',4194306,17,4194306); +INSERT INTO BENCH_COST VALUES('tcff','v19',8912898,18,8912898); +INSERT INTO BENCH_COST VALUES('tcff','y500_4k',9998000,4000,9998000); +INSERT INTO BENCH_COST VALUES('tcff','y500_8k',35996000,8000,35996000); +INSERT INTO BENCH_COST VALUES('tcff','y1k_4k',11998000,4000,11998000); +INSERT INTO BENCH_COST VALUES('tcff','y1k_8k',39996000,8000,39996000); +INSERT INTO BENCH_COST VALUES('tcff','u1k_50k',1000000,3,50050000); +INSERT INTO BENCH_COST VALUES('tcff','u1k_125k',1000000,2,125125000); +INSERT INTO BENCH_COST VALUES('tcff','u1k_250k',1000000,2,250250000); +INSERT INTO BENCH_COST VALUES('tcff','u2k_200k',4000000,3,400200000); +INSERT INTO BENCH_COST VALUES('tcff','u2k_500k',4000000,2,1000500000); +INSERT INTO BENCH_COST VALUES('tcff','u2k_1m',4000000,2,2001000000); +INSERT INTO BENCH_COST VALUES('tcff','a1k_50k',471621,8,15269270); +INSERT INTO BENCH_COST VALUES('tcff','a1k_125k',492680,5,40853746); +INSERT INTO BENCH_COST VALUES('tcff','a1k_250k',497763,4,82941061); +INSERT INTO BENCH_COST VALUES('tcff','a2k_200k',1944729,8,128420988); +INSERT INTO BENCH_COST VALUES('tcff','a2k_500k',1985321,6,330666855); +INSERT INTO BENCH_COST VALUES('tcff','a2k_1m',1995698,4,666033613); +INSERT INTO BENCH_COST VALUES('tcff','w1k_1k',1000000,1,1000000); +INSERT INTO BENCH_COST VALUES('tcff','x10k',100020000,2,100020000); + +INSERT INTO BENCH_COST VALUES('sgff','k50',2500,2,6252500); +INSERT INTO BENCH_COST VALUES('sgff','k100',10000,2,100010000); +INSERT INTO BENCH_COST VALUES('sgff','k500',250000,2,62500250000); +INSERT INTO BENCH_COST VALUES('sgff','k1k',1000000,2,1000001000000); +INSERT INTO BENCH_COST VALUES('sgff','k2k',4000000,2,16000004000000); +INSERT INTO BENCH_COST VALUES('sgff','t50',2402,2,1386602); +INSERT INTO BENCH_COST VALUES('sgff','t100',9802,2,23546952); +INSERT INTO BENCH_COST VALUES('sgff','t500',249002,2,15438684752); +INSERT INTO BENCH_COST VALUES('sgff','t1k',998002,2,248504744502); +INSERT INTO BENCH_COST VALUES('sgff','t2k',3996002,2,3988018989002); +INSERT INTO BENCH_COST VALUES('sgff','c1k',1000,1,2000); +INSERT INTO BENCH_COST VALUES('sgff','c2k',2000,1,4000); +INSERT INTO BENCH_COST VALUES('sgff','c3k',3000,1,6000); +INSERT INTO BENCH_COST VALUES('sgff','c4k',4000,1,8000); +INSERT INTO BENCH_COST VALUES('sgff','s1k_1',1000000,501,4002000); +INSERT INTO BENCH_COST VALUES('sgff','s1k_4',1000000,101,25005000); +INSERT INTO BENCH_COST VALUES('sgff','s2k_1',4000000,1001,16004000); +INSERT INTO BENCH_COST VALUES('sgff','s2k_3',4000000,251,64008000); +INSERT INTO BENCH_COST VALUES('sgff','s2k_4',4000000,201,100010000); +INSERT INTO BENCH_COST VALUES('sgff','p1k',1000,2,1998); +INSERT INTO BENCH_COST VALUES('sgff','p2k',2000,2,3998); +INSERT INTO BENCH_COST VALUES('sgff','p3k',3000,2,5998); +INSERT INTO BENCH_COST VALUES('sgff','p4k',4000,2,7998); +INSERT INTO BENCH_COST VALUES('sgff','m4_2ki',8192,2,12288); +INSERT INTO BENCH_COST VALUES('sgff','m16_512',8192,2,15360); +INSERT INTO BENCH_COST VALUES('sgff','m64_128',8192,2,16128); +INSERT INTO BENCH_COST VALUES('sgff','m256_32',8192,2,16320); +INSERT INTO BENCH_COST VALUES('sgff','m1ki_8',8192,2,16368); +INSERT INTO BENCH_COST VALUES('sgff','m4ki_2',8192,2,16380); +INSERT INTO BENCH_COST VALUES('sgff','b17',131071,2,262140); +INSERT INTO BENCH_COST VALUES('sgff','b18',262143,2,524284); +INSERT INTO BENCH_COST VALUES('sgff','b19',524287,2,1048572); +INSERT INTO BENCH_COST VALUES('sgff','v10',349525,9,350546); +INSERT INTO BENCH_COST VALUES('sgff','v11',1398101,10,1400146); +INSERT INTO BENCH_COST VALUES('sgff','v12',5592405,11,5596498); +INSERT INTO BENCH_COST VALUES('sgff','v17',5726623061,16,5726754130); +INSERT INTO BENCH_COST VALUES('sgff','v18',22906492245,17,22906754386); +INSERT INTO BENCH_COST VALUES('sgff','v19',91625968981,18,91626493266); +INSERT INTO BENCH_COST VALUES('sgff','y500_4k',254000,2,258498); +INSERT INTO BENCH_COST VALUES('sgff','y500_8k',258000,2,266498); +INSERT INTO BENCH_COST VALUES('sgff','y1k_4k',1004000,2,1008998); +INSERT INTO BENCH_COST VALUES('sgff','y1k_8k',1008000,2,1016998); + + +-- ============================================================================ +-- Automatically Added Benchmark Cost Data: +-- ============================================================================ + +-- DO NOT DELETE THIS MARK diff --git a/db/create_db.sql b/db/create_db.sql index c2b39d1ee8f1e6cbddcecbb820b2ca1eb0de5dd3..911e01a50014eadaa03833d2dfbae297d6749733 100644 --- a/db/create_db.sql +++ b/db/create_db.sql @@ -403,12 +403,6 @@ CREATE TABLE INPUT_GRAPH ( MIN_OUT_DEGREE NUMERIC(12) NOT NULL, MAX_OUT_DEGREE NUMERIC(12) NOT NULL, CYCLES CHAR(1) NOT NULL, - TC_SIZE NUMERIC(12) NOT NULL, - TC_ITER NUMERIC(12) NOT NULL, - TC_COST NUMERIC(12) NOT NULL, - SG_SIZE NUMERIC(12) NULL, - SG_ITER NUMERIC(12) NULL, - SG_COST NUMERIC(16) NULL, CONSTRAINT INPUT_GRAPH_PK PRIMARY KEY(FILE_ID), CONSTRAINT INPUT_GRAPH_REF_INPUT_FILE @@ -438,19 +432,7 @@ CREATE TABLE INPUT_GRAPH ( CONSTRAINT INPUT_GRAPH_MAX_OUT_DEGREE_NONNEGATIVE CHECK(MAX_OUT_DEGREE >= 0), CONSTRAINT INPUT_GRAPH_CYCLES_YES_NO - CHECK(CYCLES IN ('Y','N')), - CONSTRAINT INPUT_GRAPH_TC_SIZE_NONNEGATIVE - CHECK(TC_SIZE >= NUM_EDGES), - CONSTRAINT INPUT_GRAPH_TC_ITER_NONNEGATIVE - CHECK(TC_ITER >= 0), - CONSTRAINT INPUT_GRAPH_TC_COST_NONNEGATIVE - CHECK(TC_COST >= 0), - CONSTRAINT INPUT_GRAPH_SG_SIZE_NONNEGATIVE - CHECK(SG_SIZE >= 0), - CONSTRAINT INPUT_GRAPH_SG_ITER_NONNEGATIVE - CHECK(SG_ITER >= 0), - CONSTRAINT INPUT_GRAPH_SG_COST_NONNEGATIVE - CHECK(SG_COST >= 0)); + CHECK(CYCLES IN ('Y','N'))); -- GTYPE, PARAM_1, PARAM_2, and PARAM_3 are the parameters for the @@ -467,31 +449,6 @@ CREATE TABLE INPUT_GRAPH ( -- LOOPS='Y' means that there are edges of the form (x,x). --- TC_SIZE, TC_ITER, and TC_COST are for the "Transitive Closure" benchmark: --- tc(X, Y) :- par(X, Y). --- tc(X, Z) :- par(X, Y), tc(Y, Z). --- This is the standard tail-recursive Prolog program for computing the --- transitive closure of the par relation (all pairs of nodes that are --- connected by a path of "par"-edges). --- TC_SIZE is the number of tuples in the derived "tc" relation. --- TC_COST is the number of applicable rule instances in the above program. --- This is also the number of rule instances that semi-naive evaluation would --- consider. --- It can be computed as the sum of the number of edges --- (i.e. the size of the par relation) and --- the size of the join of par and tc. - --- SG_SIZE, SG_ITER, and SG_COST are for the "Same Generation Cousins" --- benchmark: --- sg(_,Y) :- par(_, Y). --- sg(X1, Y1) :- par(X1, X), sg(X, Y), par(Y1, Y). --- SG_SIZE is the size of the so computed sg relation. --- SG_COST is the number of applicable rule instances in the above program. --- SG_ITER is the number of iterations that the T_P operator needs to compute --- the entire sg relation (counting only iterations that derive new tuples). --- The application of the non-recursive rule is the first iteration --- (if par is non-empty). - -- The data is in file input_graphs.sql @@ -512,10 +469,10 @@ CREATE TABLE INPUT_JOIN1 ( B1_SIZE NUMERIC(12) NOT NULL, B2_SIZE NUMERIC(12) NOT NULL, C1_SIZE NUMERIC(12) NOT NULL, - A_COST NUMERIC(12) NOT NULL, - B1_COST NUMERIC(12) NOT NULL, - B2_COST NUMERIC(12) NOT NULL, - C1_COST NUMERIC(12) NOT NULL, + A_INST NUMERIC(12) NOT NULL, + B1_INST NUMERIC(12) NOT NULL, + B2_INST NUMERIC(12) NOT NULL, + C1_INST NUMERIC(12) NOT NULL, CONSTRAINT INPUT_JOIN1_PK PRIMARY KEY(FILE_ID), CONSTRAINT INPUT_JOIN1_REF_INPUT_FILE @@ -540,22 +497,22 @@ CREATE TABLE INPUT_JOIN1 ( CHECK(D1_SIZE >= 0), CONSTRAINT INPUT_JOIN1_D2_SIZE_NONNEGATIVE CHECK(D2_SIZE >= 0), - CONSTRAINT INPUT_JOIN1_A_COST_NONNEGATIVE - CHECK(A_COST >= 0), - CONSTRAINT INPUT_JOIN1_B1_COST_NONNEGATIVE - CHECK(B1_COST >= 0), - CONSTRAINT INPUT_JOIN1_B2_COST_NONNEGATIVE - CHECK(B2_COST >= 0), - CONSTRAINT INPUT_JOIN1_C1_COST_NONNEGATIVE - CHECK(C1_COST >= 0), - CONSTRAINT INPUT_JOIN1_A_COST_GE_A_SIZE - CHECK(A_COST >= A_SIZE), - CONSTRAINT INPUT_JOIN1_B1_COST_GE_B1_SIZE - CHECK(B1_COST >= B1_SIZE), - CONSTRAINT INPUT_JOIN1_B2_COST_GE_B2_SIZE - CHECK(B2_COST >= B2_SIZE), - CONSTRAINT INPUT_JOIN1_C1_COST_GE_C1_SIZE - CHECK(C1_COST >= C1_SIZE)); + CONSTRAINT INPUT_JOIN1_A_INST_NONNEGATIVE + CHECK(A_INST >= 0), + CONSTRAINT INPUT_JOIN1_B1_INST_NONNEGATIVE + CHECK(B1_INST >= 0), + CONSTRAINT INPUT_JOIN1_B2_INST_NONNEGATIVE + CHECK(B2_INST >= 0), + CONSTRAINT INPUT_JOIN1_C1_INST_NONNEGATIVE + CHECK(C1_INST >= 0), + CONSTRAINT INPUT_JOIN1_A_INST_GE_A_SIZE + CHECK(A_INST >= A_SIZE), + CONSTRAINT INPUT_JOIN1_B1_INST_GE_B1_SIZE + CHECK(B1_INST >= B1_SIZE), + CONSTRAINT INPUT_JOIN1_B2_INST_GE_B2_SIZE + CHECK(B2_INST >= B2_SIZE), + CONSTRAINT INPUT_JOIN1_C1_INST_GE_C1_SIZE + CHECK(C1_INST >= C1_SIZE)); -- A_SIZE and so on are the number of facts for the predicate. -- The input file contains the predicates D1, D2, C2, C3, C4. @@ -565,10 +522,46 @@ CREATE TABLE INPUT_JOIN1 ( -- b2(X, Y) :- c3(X, Z), c4(Z, Y). -- c1(X, Y) :- d1(X, Z), d2(Z, Y). --- A_COST and so on are the number of instances of the rule that derives A. +-- A_INST and so on are the number of instances of the rule that derives A. -- The data is in file input_join1.sql +-- ============================================================================ +-- Cost Measures for Benchmarks (for Analysis of Runtimes): +-- ============================================================================ + +CREATE TABLE BENCH_COST ( + BENCH VARCHAR(20) NOT NULL, + FILE_ID VARCHAR(10) NOT NULL, + SIZE NUMERIC(12) NOT NULL, + ITER NUMERIC(12) NOT NULL, + INST NUMERIC(12) NOT NULL, + CONSTRAINT BENCH_COST_PK + PRIMARY KEY(BENCH, FILE_ID), + CONSTRAINT BENCH_COST_REF_BENCHMARK + FOREIGN KEY (BENCH) REFERENCES BENCHMARK, + CONSTRAINT BENCH_COST_REF_INPUT_FILE + FOREIGN KEY(FILE_ID) REFERENCES INPUT_FILE, + CONSTRAINT BENCH_COST_SIZE_NONNEGATIVE + CHECK(SIZE >= 0), + CONSTRAINT BENCH_COST_ITER_NONNEGATIVE + CHECK(ITER >= 0), + CONSTRAINT BENCH_COST_INST_NONNEGATIVE + CHECK(INST >= 0)); + +-- SIZE is the number of derived tuples of the benchmark (result size). +-- INST is the number of applicable rule instances in the benchmark program. +-- This is also the number of rule instances that semi-naive evaluation would +-- consider. +-- ITER is the number of iterations that the T_P operator needs to compute +-- the entire minimal model (counting only iterations that derive new tuples). +-- The application of the non-recursive rules is the first iteration +-- (if they derive at least one tuple, otherwise it does not count as an +-- iteration). + +-- The data is in file bench_cost.sql + + -- ============================================================================ -- Table for Results of Benchmark Runs (Test Runs): -- ============================================================================ diff --git a/db/drop_db.sql b/db/drop_db.sql index 4511220db3534848857655c2511c08f332e94375..66224b714b399f82493089af2b3facc246b2528c 100644 --- a/db/drop_db.sql +++ b/db/drop_db.sql @@ -2,7 +2,7 @@ -- Project: ddbench - Logic Programming and Database Benchmarks -- Filename: db/drop_db.sql -- Purpose: Database for Benchmark Runs: Drop all Tables and Views --- Last Change: 28.03.2019 +-- Last Change: 04.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -39,6 +39,7 @@ DROP VIEW BENCH_FILE_LIST; -- Views to Print Table Data in Markdown Format for Docs (File v_doc.sql): -- ============================================================================ +DROP VIEW BENCH_COST_MD; DROP VIEW INPUT_GRAPH_MD; DROP VIEW INPUT_FILE_MD; DROP VIEW BENCH_IMPL_MD; @@ -190,6 +191,7 @@ DROP TABLE DUMMY_TAB; DROP TABLE BENCH_RUN; DROP TABLE BENCH_IMPL; +DROP TABLE BENCH_COST; DROP TABLE INPUT_JOIN1; DROP TABLE INPUT_GRAPH; DROP TABLE GRAPH_TYPE; diff --git a/db/input_graphs.sql b/db/input_graphs.sql index 9770a79aee6860e76f8571ca6245143f34882ab0..2c8fe1b60453a29c66e49d282ab201a020e4463d 100644 --- a/db/input_graphs.sql +++ b/db/input_graphs.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/input_graphs.sql -- Purpose: Database for Benchmark Runs: Data of Input Graphs --- Last Change: 16.03.2019 +-- Last Change: 04.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -33,12 +33,6 @@ -- MIN_OUT_DEGREE NUMERIC(12) NOT NULL, -- MAX_OUT_DEGREE NUMERIC(12) NOT NULL, -- CYCLES CHAR(1) NOT NULL, --- TC_SIZE NUMERIC(12) NOT NULL, --- TC_ITER NUMERIC(12) NOT NULL, --- TC_COST NUMERIC(12) NOT NULL, --- SG_SIZE NUMERIC(12) NULL, --- SG_ITER NUMERIC(12) NULL, --- SG_COST NUMERIC(12) NULL, -- CONSTRAINT INPUT_GRAPH_PK -- PRIMARY KEY(FILE_ID), -- CONSTRAINT INPUT_GRAPH_REF_INPUT_FILE @@ -68,19 +62,7 @@ -- CONSTRAINT INPUT_GRAPH_MAX_OUT_DEGREE_NONNEGATIVE -- CHECK(MAX_OUT_DEGREE >= 0), -- CONSTRAINT INPUT_GRAPH_CYCLES_YES_NO --- CHECK(CYCLES IN ('Y','N')), --- CONSTRAINT INPUT_GRAPH_TC_SIZE_NONNEGATIVE --- CHECK(TC_SIZE >= NUM_EDGES), --- CONSTRAINT INPUT_GRAPH_TC_ITER_NONNEGATIVE --- CHECK(TC_ITER >= 0), --- CONSTRAINT INPUT_GRAPH_TC_COST_NONNEGATIVE --- CHECK(TC_COST >= 0), --- CONSTRAINT INPUT_GRAPH_SG_SIZE_NONNEGATIVE --- CHECK(SG_SIZE >= 0), --- CONSTRAINT INPUT_GRAPH_SG_ITER_NONNEGATIVE --- CHECK(SG_ITER >= 0), --- CONSTRAINT INPUT_GRAPH_SG_COST_NONNEGATIVE --- CHECK(SG_COST >= 0)); +-- CHECK(CYCLES IN ('Y','N')); -- GTYPE, PARAM_1, PARAM_2, and PARAM_3 are the parameters for the @@ -97,224 +79,197 @@ -- LOOPS='Y' means that there are edges of the form (x,x). --- TC_SIZE, TC_ITER, and TC_COST are for the "Transitive Closure" benchmark: --- tc(X, Y) :- par(X, Y). --- tc(X, Z) :- par(X, Y), tc(Y, Z). --- This is the standard tail-recursive Prolog program for computing the --- transitive closure of the par relation (all pairs of nodes that are --- connected by a path of "par"-edges). --- TC_SIZE is the number of tuples in the derived "tc" relation. --- TC_COST is the number of applicable rule instances in the above program. --- This is also the number of rule instances that semi-naive evaluation would --- consider. --- It can be computed as the sum of the number of edges --- (i.e. the size of the par relation) and --- the size of the join of par and tc. - --- SG_SIZE, SG_ITER, and SG_COST are for the "Same Generation Cousins" --- benchmark: --- sg(_,Y) :- par(_, Y). --- sg(X1, Y1) :- par(X1, X), sg(X, Y), par(Y1, Y). --- SG_SIZE is the size of the so computed sg relation. --- SG_COST is the number of applicable rule instances in the above program. --- SG_ITER is the number of iterations that the T_P operator needs to compute --- the entire sg relation (counting only iterations that derive new tuples). --- The application of the non-recursive rule is the first iteration --- (if par is non-empty). - - -- ============================================================================ -- INSERT Statements for Input Graph Data: -- ============================================================================ INSERT INTO INPUT_GRAPH VALUES('b17','B',17,NULL, NULL, -131071,131070,0,'N',0,1,0,2,'N',1966082,16,1966082,131071,2,262140); +131071,131070,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('b18','B',18,NULL, NULL, -262143,262142,0,'N',0,1,0,2,'N',4194306,17,4194306,262143,2,524284); +262143,262142,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('b19','B',19,NULL, NULL, -524287,524286,0,'N',0,1,0,2,'N',8912898,18,8912898,524287,2,1048572); +524287,524286,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('v10','B',10,NULL,'I', -1023,1022,0,'N',0,1,0,2,'N',8194,9,8194,349525,9,350546); +1023,1022,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('v11','B',11,NULL,'I', -2047,2046,0,'N',0,1,0,2,'N',18434,10,18434,1398101,10,1400146); +2047,2046,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('v12','B',12,NULL,'I', -4095,4094,0,'N',0,1,0,2,'N',40962,11,40962,5592405,11,5596498); +4095,4094,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('v17','B',17,NULL,'I', -131071,131070,0,'N',0,1,0,2,'N',1966082,16,1966082,5726623061,16,5726754130); +131071,131070,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('v18','B',18,NULL,'I', -262143,262142,0,'N',0,1,0,2,'N',4194306,17,4194306,22906492245,17,22906754386); +262143,262142,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('v19','B',19,NULL,'I', -524287,524286,0,'N',0,1,0,2,'N',8912898,18,8912898,91625968981,18,91626493266); +524287,524286,0,'N',0,1,0,2,'N'); INSERT INTO INPUT_GRAPH VALUES('c1k','C',1000,NULL, NULL, -1000,1000,0,'N',1,1,1,1,'Y',1000000,1000,1001000,1000,1,2000); +1000,1000,0,'N',1,1,1,1,'Y'); INSERT INTO INPUT_GRAPH VALUES('c2k','C',2000,NULL, NULL, -2000,2000,0,'N',1,1,1,1,'Y',4000000,2000,4002000,2000,1,4000); +2000,2000,0,'N',1,1,1,1,'Y'); INSERT INTO INPUT_GRAPH VALUES('c3k','C',3000,NULL, NULL, -3000,3000,0,'N',1,1,1,1,'Y',9000000,3000,9003000,3000,1,6000); +3000,3000,0,'N',1,1,1,1,'Y'); INSERT INTO INPUT_GRAPH VALUES('c4k','C',4000,NULL, NULL, -4000,4000,0,'N',1,1,1,1,'Y',16000000,4000,16004000,4000,1,8000); +4000,4000,0,'N',1,1,1,1,'Y'); INSERT INTO INPUT_GRAPH VALUES('k50','K',50,NULL, NULL, -50,2500,0,'Y',50,50,50,50,'Y',2500,1,127500,2500,2,6252500); +50,2500,0,'Y',50,50,50,50,'Y'); INSERT INTO INPUT_GRAPH VALUES('k100','K',100,NULL, NULL, -100,10000,0,'Y',100,100,100,100,'Y',10000,1,1010000,10000,2,100010000); +100,10000,0,'Y',100,100,100,100,'Y'); INSERT INTO INPUT_GRAPH VALUES('k500','K',500,NULL, NULL, -500,250000,0,'Y',500,500,500,500,'Y',250000,1,125250000,250000,2,62500250000); +500,250000,0,'Y',500,500,500,500,'Y'); INSERT INTO INPUT_GRAPH VALUES('k1k','K',1000,NULL, NULL, -1000,1000000,0,'Y',1000,1000,1000,1000,'Y',1000000,1,1001000000,1000000,2,1000001000000); +1000,1000000,0,'Y',1000,1000,1000,1000,'Y'); INSERT INTO INPUT_GRAPH VALUES('k2k','K',2000,NULL, NULL, -2000,4000000,0,'Y',2000,2000,2000,2000,'Y',4000000,1,8004000000,4000000,2,16000004000000); +2000,4000000,0,'Y',2000,2000,2000,2000,'Y'); INSERT INTO INPUT_GRAPH VALUES('t50','T',50,NULL, NULL, -50,1225,0,'N',0,49,0,49,'N',1225,1,20825,2402,2,1386602); +50,1225,0,'N',0,49,0,49,'N'); INSERT INTO INPUT_GRAPH VALUES('t100','T',100,NULL, NULL, -100,4950,0,'N',0,99,0,99,'N',4950,1,166650,9802,2,23546952); +100,4950,0,'N',0,99,0,99,'N'); INSERT INTO INPUT_GRAPH VALUES('t500','T',500,NULL, NULL, -500,124750,0,'N',0,499,0,499,'N',124750,1,20833250,249002,2,15438684752); +500,124750,0,'N',0,499,0,499,'N'); INSERT INTO INPUT_GRAPH VALUES('t1k','T',1000,NULL, NULL, -1000,499500,0,'N',0,999,0,999,'N',499500,1,166666500,998002,2,248504744502); +1000,499500,0,'N',0,999,0,999,'N'); INSERT INTO INPUT_GRAPH VALUES('t2k','T',2000,NULL, NULL, -2000,1999000,0,'N',0,1999,0,1999,'N',1999000,1,1333333000,3996002,2,3988018989002); +2000,1999000,0,'N',0,1999,0,1999,'N'); INSERT INTO INPUT_GRAPH VALUES('m4ki_2','M',4096,2, NULL, -8192,8190,0,'N',0,1,0,1,'N',16773120,4095,16773120,8192,2,16380); +8192,8190,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('m1ki_8','M',1024,8, NULL, -8192,8184,0,'N',0,1,0,1,'N',4190208,1023,4190208,8192,2,16368); +8192,8184,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('m256_32','M',256,32, NULL, -8192,8160,0,'N',0,1,0,1,'N',1044480,255,1044480,8192,2,16320); +8192,8160,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('m64_128','M',64,128, NULL, -8192,8064,0,'N',0,1,0,1,'N',258048,63,258048,8192,2,16128); +8192,8064,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('m16_512','M', 16, 512, NULL, -8192,7680,0,'N',0,1,0,1,'N',61440,15,61440,8192,2,15360); +8192,7680,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('m4_2ki','M', 4, 2048, NULL, -8192,6144,0,'N',0,1,0,1,'N',12288,3,12288,8192,2,12288); +8192,6144,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('p1k','P',1000,NULL, NULL, -1000,999,0,'N',0,1,0,1,'N',499500,999,499500,1000,2,1998); +1000,999,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('p2k','P',2000,NULL, NULL, -2000,1999,0,'N',0,1,0,1,'N',1999000,1999,1999000,2000,2,3998); +2000,1999,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('p3k','P',3000,NULL, NULL, -3000,2999,0,'N',0,1,0,1,'N',4498500,2999,4498500,3000,2,5998); +3000,2999,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('p4k','P',4000,NULL, NULL, -4000,3999,0,'N',0,1,0,1,'N',7998000,3999,7998000,4000,2,7998); +4000,3999,0,'N',0,1,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('s2k_1','S', 2000, 1, NULL, -2000,4000,0,'N',2,2,2,2,'Y',4000000,1000,8004000,4000000,1001,16004000); +2000,4000,0,'N',2,2,2,2,'Y'); INSERT INTO INPUT_GRAPH VALUES('s2k_2','S', 2000, 2, NULL, -2000,6000,0,'N',3,3,3,3,'Y',4000000,288,12006000,4000000,NULL,36006000); +2000,6000,0,'N',3,3,3,3,'Y'); INSERT INTO INPUT_GRAPH VALUES('s2k_3','S', 2000, 3, NULL, -2000,8000,0,'N',4,4,4,4,'Y',4000000,500,16008000,4000000,251,64008000); +2000,8000,0,'N',4,4,4,4,'Y'); INSERT INTO INPUT_GRAPH VALUES('s2k_4','S', 2000, 4, NULL, -2000,10000,0,'N',5,5,5,5,'Y',4000000,400,20010000,4000000,201,100010000); +2000,10000,0,'N',5,5,5,5,'Y'); INSERT INTO INPUT_GRAPH VALUES('s2k_5','S', 2000, 5, NULL, -2000,12000,0,'N',6,6,6,6,'Y',4000000,127,24012000,4000000,NULL,144012000); +2000,12000,0,'N',6,6,6,6,'Y'); INSERT INTO INPUT_GRAPH VALUES('s1k_1','S', 1000, 1, NULL, -1000,2000,0,'N',2,2,2,2,'Y',1000000,500,2002000,1000000,501,4002000); +1000,2000,0,'N',2,2,2,2,'Y'); INSERT INTO INPUT_GRAPH VALUES('s1k_2','S', 1000, 2, NULL, -1000,3000,0,'N',3,3,3,3,'Y',1000000,201,3003000,1000000,NULL,9003000); +1000,3000,0,'N',3,3,3,3,'Y'); INSERT INTO INPUT_GRAPH VALUES('s1k_3','S', 1000, 3, NULL, -1000,4000,0,'N',4,4,4,4,'Y',1000000,250,4004000,1000000,NULL,16004000); +1000,4000,0,'N',4,4,4,4,'Y'); INSERT INTO INPUT_GRAPH VALUES('s1k_4','S', 1000, 4, NULL, -1000,5000,0,'N',5,5,5,5,'Y',1000000,200,5005000,1000000,101,25005000); +1000,5000,0,'N',5,5,5,5,'Y'); INSERT INTO INPUT_GRAPH VALUES('s1k_5','S', 1000, 5, NULL, -1000,6000,0,'N',6,6,6,6,'Y',1000000,42,6006000,1000000,NULL,36006000); +1000,6000,0,'N',6,6,6,6,'Y'); INSERT INTO INPUT_GRAPH VALUES('u1k_50k','U', 1000, 50000, NULL, -1000,50000,0,'N',46,51,46,51,'Y',1000000,3,50050000,1000000,NULL,NULL); +1000,50000,0,'N',46,51,46,51,'Y'); INSERT INTO INPUT_GRAPH VALUES('u1k_125k','U', 1000, 125000, NULL, -1000,125000,0,'N',122,127,122,127,'Y',1000000,2,125125000,NULL,NULL,NULL); +1000,125000,0,'N',122,127,122,127,'Y'); INSERT INTO INPUT_GRAPH VALUES('u1k_250k','U', 1000, 250000, NULL, -1000,250000,0,'N',248,251,248,251,'Y',1000000,2,250250000,NULL,NULL,NULL); +1000,250000,0,'N',248,251,248,251,'Y'); INSERT INTO INPUT_GRAPH VALUES('u2k_200k','U', 2000, 200000, NULL, -2000,200000,0,'N',98,101,98,101,'Y',4000000,3,400200000,NULL,NULL,NULL); +2000,200000,0,'N',98,101,98,101,'Y'); INSERT INTO INPUT_GRAPH VALUES('u2k_500k','U', 2000, 500000, NULL, -2000,500000,0,'N',248,251,248,251,'Y',4000000,2,1000500000,NULL,NULL,NULL); +2000,500000,0,'N',248,251,248,251,'Y'); INSERT INTO INPUT_GRAPH VALUES('u2k_1m','U', 2000, 1000000, NULL, -2000,1000000,0,'N',499,502,499,502,'Y',4000000,2,2001000000,NULL,NULL,NULL); +2000,1000000,0,'N',499,502,499,502,'Y'); INSERT INTO INPUT_GRAPH VALUES('a1k_50k','A', 1000, 50000, NULL, -1000,50000,0,'N',0,102,0,102,'N',471621,8,15269270,NULL,NULL,NULL); +1000,50000,0,'N',0,102,0,102,'N'); INSERT INTO INPUT_GRAPH VALUES('a1k_125k','A', 1000, 125000, NULL, -1000,125000,0,'N',0,250,0,250,'N',492680,5,40853746,NULL,NULL,NULL); +1000,125000,0,'N',0,250,0,250,'N'); INSERT INTO INPUT_GRAPH VALUES('a1k_250k','A', 1000, 250000, NULL, -1000,250000,0,'N',0,500,0,501,'N',497763,4,82941061,NULL,NULL,NULL); +1000,250000,0,'N',0,500,0,501,'N'); INSERT INTO INPUT_GRAPH VALUES('a2k_200k','A', 2000, 200000, NULL, -2000,200000,0,'N',0,201,0,201,'N',1944729,8,128420988,NULL,NULL,NULL); +2000,200000,0,'N',0,201,0,201,'N'); INSERT INTO INPUT_GRAPH VALUES('a2k_500k','A', 2000, 500000, NULL, -2000,500000,0,'N',0,500,0,500,'N',1985321,6,330666855,NULL,NULL,NULL); +2000,500000,0,'N',0,500,0,500,'N'); INSERT INTO INPUT_GRAPH VALUES('a2k_1m','A', 2000, 1000000, NULL, -2000,1000000,0,'N',0,1000,0,1000,'N',1995698,4,666033613,NULL,NULL,NULL); +2000,1000000,0,'N',0,1000,0,1000,'N'); INSERT INTO INPUT_GRAPH VALUES('y1k_4k','Y', 1000, 4000, NULL, -5000,4999,0,'N',0,1000,0,1,'N',11998000,4000,11998000,1004000,2,1008998); +5000,4999,0,'N',0,1000,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('y1k_8k','Y', 1000, 8000, NULL, -9000,8999,0,'N',0,1000,0,1,'N',39996000,8000,39996000,1008000,2,1016998); +9000,8999,0,'N',0,1000,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('y500_4k','Y', 500, 4000, NULL, -4500,4499,0,'N',0,500,0,1,'N',9998000,4000,9998000,254000,2,258498); +4500,4499,0,'N',0,500,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('y500_8k','Y', 500, 8000, NULL, -8500,8499,0,'N',0,500,0,1,'N',35996000,8000,35996000,258000,2,266498); +8500,8499,0,'N',0,500,0,1,'N'); INSERT INTO INPUT_GRAPH VALUES('w1k_1k','W', 1000, 1000, NULL, -2000,1000000,0,'N',0,1000,0,1000,'N',1000000,1,1000000,NULL,NULL,NULL); +2000,1000000,0,'N',0,1000,0,1000,'N'); INSERT INTO INPUT_GRAPH VALUES('x10k','X', 10000, NULL, NULL, -20001,20000,0,'N',0,10000,0,10000,'N',100020000,2,100020000,NULL,NULL,NULL); +20001,20000,0,'N',0,10000,0,10000,'N'); -- INSERT INTO INPUT_GRAPH VALUES(FILE_ID, GTYPE, PARAM_1, PARAM_2, PARAM_3, -- NUM_NODES, NUM_EDGES, DUP_EDGES, LOOPS, MIN_IN_DEGREE, MAX_IN_DEGREE, --- MIN_OUT_DEGREE, MAX_OUT_DEGREE, CYCLES, TC_SIZE, TC_ITER, TC_COST, --- SG_SIZE, SG_ITER, SG_COST); +-- MIN_OUT_DEGREE, MAX_OUT_DEGREE, CYCLES); -- ============================================================================ -- Automatically Added Input Graph Data: diff --git a/db/v_doc.sql b/db/v_doc.sql index 74c65469c03c27f5285b48788d151a7fca473292..3b7936089622421b1f4658da36d248a7bac2f11c 100644 --- a/db/v_doc.sql +++ b/db/v_doc.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_doc.sql -- Purpose: Benchmark DB: Views to Produce Table Data in Markdown Format --- Last Change: 04.03.2019 +-- Last Change: 04.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -66,7 +66,7 @@ SELECT 0 AS SECTION, 0 AS SEQ_NO, 'DUP_EDGES|LOOPS|' || 'MIN_IN_DEGREE|MAX_IN_DEGREE|' || 'MIN_OUT_DEGREE|MAX_OUT_DEGREE|' || - 'CYCLES|TC_SIZE|TC_ITER|TC_COST|' AS MD + 'CYCLES|' AS MD UNION ALL SELECT 1 AS SECTION, 0 AS SEQ_NO, '|-------|' || @@ -75,7 +75,7 @@ SELECT 1 AS SECTION, 0 AS SEQ_NO, '--------:|:---:|' || '------------:|------------:|' || '-------------:|-------------:|' || - ':----:|------:|-------:|---:|' AS MD + ':----:|' AS MD UNION ALL SELECT 2 AS SECTION, F.SEQ_NO, '|' || G.FILE_ID || '|' || @@ -88,9 +88,31 @@ SELECT 2 AS SECTION, F.SEQ_NO, DUP_EDGES || '|' || LOOPS || '|' || MIN_IN_DEGREE || '|' || MAX_IN_DEGREE || '|' || MIN_OUT_DEGREE || '|' || MAX_OUT_DEGREE || '|' || - CYCLES || '|' || TC_SIZE || '|' || TC_ITER || '|' || TC_COST || '|' + CYCLES || '|' || '|' AS MD FROM INPUT_GRAPH G, INPUT_FILE F WHERE G.FILE_ID = F.FILE_ID ORDER BY SECTION, SEQ_NO; + +-- ============================================================================ +-- BENCH_COST_MD: +-- ============================================================================ + +CREATE VIEW BENCH_COST_MD AS +SELECT 0 AS SECTION, 0 AS SEQ_NO, + '|BENCH|FILE_ID|' || + 'SIZE|ITER|INST|' AS MD +UNION ALL +SELECT 1 AS SECTION, 0 AS SEQ_NO, + '|-------|--------|' || + '------------:|------:|------------:|' AS MD +UNION ALL +SELECT 2 AS SECTION, F.SEQ_NO, + '|' || B.BENCH || '|' || B.FILE_ID || '|' || + B.SIZE || '|' || B.ITER || '|' || B.INST || '|' + AS MD +FROM BENCH_COST B, INPUT_FILE F +WHERE B.FILE_ID = F.FILE_ID +ORDER BY SECTION, SEQ_NO; + diff --git a/db/v_estimate.sql b/db/v_estimate.sql index b26e0857019c50968620efda57398833d889f600..9c1a50d90f03721f9675a70215ae7a747a0b09e0 100644 --- a/db/v_estimate.sql +++ b/db/v_estimate.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_estimate.sql -- Purpose: Benchmark DB: Views to Support Research on Runtime Estimation --- Last Change: 16.04.2019 +-- Last Change: 04.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -75,12 +75,13 @@ SELECT R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, ROUND(CAST( E.STARTUP + (G.NUM_EDGES * (LOG(G.NUM_EDGES)/LOG(2)) * E.LOAD_IDX / 1000000000) + - (G.TC_COST * E.RULE_APP / 1000000000) + - (G.TC_SIZE * E.RESULT / 1000000000) AS NUMERIC), + (C.INST * E.RULE_APP / 1000000000) + + (C.SIZE * E.RESULT / 1000000000) AS NUMERIC), 3) AS ESTIMATE, R.REAL_T -FROM RESULT R, INPUT_GRAPH G, ESTIMATE_PAR E +FROM RESULT R, INPUT_GRAPH G, BENCH_COST C, ESTIMATE_PAR E WHERE R.SYS = E.SYS AND E.BENCH = 'tcff' AND R.BENCH = 'tcff' +AND C.BENCH = 'tcff' AND C.FILE_ID = G.FILE_ID AND R.FILE_ID = G.FILE_ID ORDER BY R.SYS, E.STARTUP, E.LOAD_IDX, E.RULE_APP, E.RESULT, ESTIMATE; @@ -117,12 +118,13 @@ SELECT R.SYS, R.BENCH, R.IMPL, 'data_c("' || G.FILE_ID || '", ' || G.NUM_NODES || ', ' || G.NUM_EDGES || ', ' || - 'INT64_C(' || G.TC_SIZE || '), ' || - 'INT64_C(' || G.TC_COST || '), ' || - G.TC_ITER || ', ' || + 'INT64_C(' || C.SIZE || '), ' || + 'INT64_C(' || C.INST || '), ' || + C.ITER || ', ' || ROUND(R.REAL_T*1000)|| '),' AS OBJ -FROM INPUT_GRAPH G, INPUT_FILE F, BEST_RESULT R +FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R WHERE G.FILE_ID = F.FILE_ID AND R.FILE_ID = F.FILE_ID +AND C.FILE_ID = F.FILE_ID AND C.BENCH = 'tcff' ORDER BY R.SYS, R.BENCH, F.SEQ_NO; -- ============================================================================ @@ -137,12 +139,13 @@ SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO, G.FILE_ID || '","' || G.NUM_NODES || '","' || G.NUM_EDGES || '","' || - G.TC_SIZE || '","' || - G.TC_COST || '","' || - G.TC_ITER || '","' || + C.SIZE || '","' || + C.INST || '","' || + C.ITER || '","' || ROUND(R.REAL_T*1000) || '"' AS CSV -FROM INPUT_GRAPH G, INPUT_FILE F, BEST_RESULT R +FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R WHERE R.BENCH = 'tcff' AND G.FILE_ID = F.FILE_ID AND R.FILE_ID = F.FILE_ID +AND C.BENCH = 'tcff' AND C.FILE_ID = F.FILE_ID UNION ALL SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO, '"' || R.SYS || '","' || @@ -151,12 +154,14 @@ SELECT R.SYS, R.BENCH, R.IMPL, F.FILE_ID, F.SEQ_NO, G.FILE_ID || '","' || G.NUM_NODES || '","' || G.NUM_EDGES || '","' || - G.SG_SIZE || '","' || - G.SG_COST || '","' || - G.SG_ITER || '","' || + C.SIZE || '","' || + C.INST || '","' || + C.ITER || '","' || ROUND(R.REAL_T*1000) || '"' AS CSV -FROM INPUT_GRAPH G, INPUT_FILE F, BEST_RESULT R -WHERE R.BENCH = 'sgff' AND G.FILE_ID = F.FILE_ID AND R.FILE_ID = F.FILE_ID +FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R +WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID +AND G.FILE_ID = F.FILE_ID +AND C.BENCH = 'sgff' AND C.FILE_ID = F.FILE_ID UNION ALL SELECT '' AS SYS, '' AS BENCH, '' AS IMPL, '' AS FILE_ID, 0 AS SEQ_NO, '"' || 'SYS' || '","' || @@ -191,13 +196,14 @@ ORDER BY R.SYS, R.BENCH, F.SEQ_NO; CREATE VIEW PAR_RANGE AS SELECT R.SYS, G.FILE_ID, ROUND(R.REAL_T * 1000 * 1000 / G.NUM_EDGES, 3) AS PAR_LOAD, - ROUND(R.REAL_T * 1000 * 1000 / G.TC_COST, 3) AS PAR_RULE, - ROUND(R.REAL_T * 1000 * 1000 / G.TC_SIZE, 3) AS PAR_RESULT, + ROUND(R.REAL_T * 1000 * 1000 / C.INST, 3) AS PAR_RULE, + ROUND(R.REAL_T * 1000 * 1000 / C.SIZE, 3) AS PAR_RESULT, ROUND(R.REAL_T * 1000 * 1000 / G.NUM_NODES, 3) AS PAR_DATA, - ROUND(R.REAL_T * 1000 * 1000 / G.TC_ITER, 3) AS PAR_ITER -FROM INPUT_GRAPH G, INPUT_FILE F, BEST_RESULT R -WHERE G.FILE_ID = F.FILE_ID AND R.FILE_ID = F.FILE_ID -AND R.BENCH = 'tcff' + ROUND(R.REAL_T * 1000 * 1000 / C.ITER, 3) AS PAR_ITER +FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C, BEST_RESULT R +WHERE G.FILE_ID = F.FILE_ID +AND R.FILE_ID = F.FILE_ID AND R.BENCH = 'tcff' +AND C.FILE_ID = F.FILE_ID AND C.BENCH = 'tcff' ORDER BY R.SYS, R.BENCH, F.SEQ_NO; diff --git a/db/v_graph.sql b/db/v_graph.sql index b816121ea25455df5c52aa27af7f5d2af7f5a60c..547e32c5116ad93482d087f7d8d2574490ba9d01 100644 --- a/db/v_graph.sql +++ b/db/v_graph.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_graph.sql -- Purpose: Benchmark DB: Views to Produce Graph Data in Various Formats --- Last Change: 04.03.2019 +-- Last Change: 04.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -38,12 +38,12 @@ SELECT G.FILE_ID, (CASE WHEN CYCLES='Y' THEN 'yes' WHEN CYCLES='N' THEN 'no' ELSE '?' END) || '&' || - G.TC_SIZE || '&' || - G.TC_ITER || '&' || - G.TC_COST || '\\' + C.SIZE || '&' || + C.ITER || '&' || + C.INST || '\\' AS LATEX -FROM INPUT_GRAPH G, INPUT_FILE F -WHERE G.FILE_ID = F.FILE_ID +FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C +WHERE G.FILE_ID = F.FILE_ID AND C.FILE_ID = F.FILE_ID AND C.BENCH = 'tcff' ORDER BY F.SEQ_NO; -- ============================================================================ @@ -91,13 +91,13 @@ SELECT F.SEQ_NO, F.FILE_ID, (CASE WHEN CYCLES='Y' THEN 'yes' WHEN CYCLES='N' THEN 'no' ELSE '?' END) || '</td>' || ' - ' || '<td class="colTCSize" >' || TC_SIZE || '</td>' || ' - ' || '<td class="colIter" >' || TC_ITER || '</td>' || ' - ' || '<td class="colRuleInst">' || TC_COST || '</td>' || ' + ' || '<td class="colTCSize" >' || C.SIZE || '</td>' || ' + ' || '<td class="colIter" >' || C.ITER || '</td>' || ' + ' || '<td class="colRuleInst">' || C.INST || '</td>' || ' ' || '</tr>' AS HTML -FROM INPUT_GRAPH G, INPUT_FILE F -WHERE G.FILE_ID = F.FILE_ID +FROM INPUT_GRAPH G, INPUT_FILE F, BENCH_COST C +WHERE G.FILE_ID = F.FILE_ID AND C.FILE_ID = F.FILE_ID AND C.BENCH = 'tcff' UNION ALL SELECT 1000000 AS SEQ_NO, NULL AS FILE_ID, '</table>' AS HTML diff --git a/db/v_j1axy.sql b/db/v_j1axy.sql index b15253b6a58962af6c8978599e735a292d6930fe..06d1babee1c6450f14bd82bce4093c0de5812f2d 100644 --- a/db/v_j1axy.sql +++ b/db/v_j1axy.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_j1axy.sql -- Purpose: Benchmark DB: Views to analyze Join1 a(X,Y) Benchmark Results --- Last Change: 28.03.2019 +-- Last Change: 04.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -547,7 +547,7 @@ SELECT F.SEQ_NO, F.FILE_ID, ' || '<td class="colB1Size" >' || B1_SIZE || '</td>' || ' ' || '<td class="colB2Size" >' || B2_SIZE || '</td>' || ' ' || '<td class="colC1Size" >' || C1_SIZE || '</td>' || ' - ' || '<td class="colRuleInst">' || A_COST+B1_COST+B2_COST+C1_COST + ' || '<td class="colRuleInst">' || A_INST+B1_INST+B2_INST+C1_INST || '</td>' || ' ' || '</tr>' AS HTML @@ -576,14 +576,14 @@ CREATE VIEW J1AXY_PLOT_REAL AS SELECT R.SYS, R.FILE_ID, F.SEQ_NO AS NO, - (C.A_COST+C.B1_COST+C.B2_COST+C.C1_COST) AS COST, + (C.A_INST+C.B1_INST+C.B2_INST+C.C1_INST) AS COST, R.REAL_T, '"' || R.FILE_ID || '","' || - (C.A_COST+C.B1_COST+C.B2_COST+C.C1_COST) || '","' + (C.A_INST+C.B1_INST+C.B2_INST+C.C1_INST) || '","' || R.REAL_T || '"' AS CSV, R.FILE_ID || ' ' || - (C.A_COST+C.B1_COST+C.B2_COST+C.C1_COST) || ' ' || R.REAL_T + (C.A_INST+C.B1_INST+C.B2_INST+C.C1_INST) || ' ' || R.REAL_T AS TSV FROM RESULT R, INPUT_FILE F, INPUT_JOIN1 C, BENCH_IMPL I WHERE R.BENCH = 'j1axy' AND R.FILE_ID = F.FILE_ID AND R.FILE_ID = C.FILE_ID diff --git a/db/v_sgff.sql b/db/v_sgff.sql index 1b7f1b3838f351b232eb34178f09765d609e5d36..471f1bbe5a63036cfe2564edd94a4ee873e2ee80 100644 --- a/db/v_sgff.sql +++ b/db/v_sgff.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_sgff.sql -- Purpose: Benchmark DB: Views for Analysis of SGFF Benchmark Results --- Last Change: 18.03.2019 +-- Last Change: 04.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -30,15 +30,17 @@ CREATE VIEW SGFF_PLOT_REAL AS SELECT R.SYS, R.FILE_ID, F.SEQ_NO AS NO, - G.SG_COST, + C.INST AS SG_COST, R.REAL_T, - '"' || R.FILE_ID || '","' || G.SG_COST || '","' || R.REAL_T || '"' + '"' || R.FILE_ID || '","' || C.INST || '","' || R.REAL_T || '"' AS CSV, - R.FILE_ID || ' ' || G.SG_COST || ' ' || R.REAL_T + R.FILE_ID || ' ' || C.INST || ' ' || R.REAL_T AS TSV -FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_IMPL I -WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID AND R.FILE_ID = G.FILE_ID -AND I.BENCH = R.BENCH AND I.IMPL = R.IMPL AND I.SYS = R.SYS +FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_COST C, BENCH_IMPL I +WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID +AND G.FILE_ID = F.FILE_ID +AND C.BENCH = 'sgff' AND C.FILE_ID = F.FILE_ID +AND I.BENCH = 'sgff' AND I.IMPL = R.IMPL AND I.SYS = R.SYS AND I.STATUS = 'B' UNION SELECT DISTINCT P.SYS, '', -1, 0 AS SG_COST, 0 AS REAL_T, @@ -60,15 +62,17 @@ CREATE VIEW SGFF_PLOT_CPU AS SELECT R.SYS, R.FILE_ID, F.SEQ_NO AS NO, - G.SG_COST, + C.INST AS SG_COST, R.CPU_T, - '"' || R.FILE_ID || '","' || G.SG_COST || '","' || R.CPU_T || '"' + '"' || R.FILE_ID || '","' || C.INST || '","' || R.CPU_T || '"' AS CSV, - R.FILE_ID || ' ' || G.SG_COST || ' ' || R.CPU_T + R.FILE_ID || ' ' || C.INST || ' ' || R.CPU_T AS TSV -FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_IMPL I -WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID AND R.FILE_ID = G.FILE_ID -AND I.BENCH = R.BENCH AND I.IMPL = R.IMPL AND I.SYS = R.SYS +FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_COST C, BENCH_IMPL I +WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID +AND G.FILE_ID = F.FILE_ID +AND C.BENCH = 'sgff' AND C.FILE_ID = F.FILE_ID +AND I.BENCH = 'sgff' AND I.IMPL = R.IMPL AND I.SYS = R.SYS AND I.STATUS = 'B' UNION SELECT DISTINCT P.SYS, '', -1, 0 AS SG_COST, 0 AS CPU_T, @@ -90,15 +94,17 @@ CREATE VIEW SGFF_PLOT_MEM AS SELECT R.SYS, R.FILE_ID, F.SEQ_NO AS NO, - G.NUM_EDGES + G.SG_SIZE AS FACTS, + G.NUM_EDGES + C.SIZE AS FACTS, R.MEM_MB, - '"' || R.FILE_ID || '","' || (G.NUM_EDGES+G.SG_SIZE) || '","' || + '"' || R.FILE_ID || '","' || (G.NUM_EDGES+C.SIZE) || '","' || R.MEM_MB || '"' AS CSV, - R.FILE_ID || ' ' || (G.NUM_EDGES+G.SG_SIZE) || ' ' || + R.FILE_ID || ' ' || (G.NUM_EDGES+C.SIZE) || ' ' || R.MEM_MB AS TSV -FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_IMPL I -WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID AND R.FILE_ID = G.FILE_ID -AND I.BENCH = R.BENCH AND I.IMPL = R.IMPL AND I.SYS = R.SYS +FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_COST C, BENCH_IMPL I +WHERE R.BENCH = 'sgff' AND R.FILE_ID = F.FILE_ID +AND G.FILE_ID = F.FILE_ID +AND C.BENCH = 'sgff' AND C.FILE_ID = F.FILE_ID +AND I.BENCH = 'sgff' AND I.IMPL = R.IMPL AND I.SYS = R.SYS AND I.STATUS = 'B' UNION SELECT DISTINCT P.SYS, '', -1, 0 AS FACTS, 0 AS CPU_T, diff --git a/db/v_tcff.sql b/db/v_tcff.sql index 71cc026c19a79695de533616ca6823b101dcdd19..51abb217f0347b9692a4b1990158c9e5f85b1ca3 100644 --- a/db/v_tcff.sql +++ b/db/v_tcff.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/v_tcff.sql -- Purpose: Benchmark DB: Views for Data Analysis of TCFF Benchmark Results --- Last Change: 18.03.2019 +-- Last Change: 04.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -30,15 +30,17 @@ CREATE VIEW TCFF_PLOT_REAL AS SELECT R.SYS, R.FILE_ID, F.SEQ_NO AS NO, - G.TC_COST, + C.INST AS TC_COST, R.REAL_T, - '"' || R.FILE_ID || '","' || G.TC_COST || '","' || R.REAL_T || '"' + '"' || R.FILE_ID || '","' || C.INST || '","' || R.REAL_T || '"' AS CSV, - R.FILE_ID || ' ' || G.TC_COST || ' ' || R.REAL_T + R.FILE_ID || ' ' || C.INST || ' ' || R.REAL_T AS TSV -FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_IMPL I -WHERE R.BENCH = 'tcff' AND R.FILE_ID = F.FILE_ID AND R.FILE_ID = G.FILE_ID -AND I.BENCH = R.BENCH AND I.IMPL = R.IMPL AND I.SYS = R.SYS +FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_COST C, BENCH_IMPL I +WHERE R.BENCH = 'tcff' AND R.FILE_ID = F.FILE_ID +AND G.FILE_ID = F.FILE_ID +AND C.BENCH = 'tcff' AND C.FILE_ID = F.FILE_ID +AND I.BENCH = 'tcff' AND I.IMPL = R.IMPL AND I.SYS = R.SYS AND I.STATUS = 'B' UNION SELECT DISTINCT P.SYS, '', -1, 0 AS TC_COST, 0 AS REAL_T, @@ -60,15 +62,17 @@ CREATE VIEW TCFF_PLOT_CPU AS SELECT R.SYS, R.FILE_ID, F.SEQ_NO AS NO, - G.TC_COST, + C.INST AS TC_COST, R.CPU_T, - '"' || R.FILE_ID || '","' || G.TC_COST || '","' || R.CPU_T || '"' + '"' || R.FILE_ID || '","' || C.INST || '","' || R.CPU_T || '"' AS CSV, - R.FILE_ID || ' ' || G.TC_COST || ' ' || R.CPU_T + R.FILE_ID || ' ' || C.INST || ' ' || R.CPU_T AS TSV -FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_IMPL I -WHERE R.BENCH = 'tcff' AND R.FILE_ID = F.FILE_ID AND R.FILE_ID = G.FILE_ID -AND I.BENCH = R.BENCH AND I.IMPL = R.IMPL AND I.SYS = R.SYS +FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_COST C, BENCH_IMPL I +WHERE R.BENCH = 'tcff' AND R.FILE_ID = F.FILE_ID +AND G.FILE_ID = F.FILE_ID +AND C.BENCH = 'tcff' AND C.FILE_ID = F.FILE_ID +AND I.BENCH = 'tcff' AND I.IMPL = R.IMPL AND I.SYS = R.SYS AND I.STATUS = 'B' UNION SELECT DISTINCT P.SYS, '', -1, 0 AS TC_COST, 0 AS CPU_T, @@ -90,15 +94,17 @@ CREATE VIEW TCFF_PLOT_MEM AS SELECT R.SYS, R.FILE_ID, F.SEQ_NO AS NO, - G.NUM_EDGES + G.TC_SIZE AS FACTS, + G.NUM_EDGES + C.SIZE AS FACTS, R.MEM_MB, - '"' || R.FILE_ID || '","' || (G.NUM_EDGES+G.TC_SIZE) || '","' || + '"' || R.FILE_ID || '","' || (G.NUM_EDGES+C.SIZE) || '","' || R.MEM_MB || '"' AS CSV, - R.FILE_ID || ' ' || (G.NUM_EDGES+G.TC_SIZE) || ' ' || + R.FILE_ID || ' ' || (G.NUM_EDGES+C.SIZE) || ' ' || R.MEM_MB AS TSV -FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_IMPL I -WHERE R.BENCH = 'tcff' AND R.FILE_ID = F.FILE_ID AND R.FILE_ID = G.FILE_ID -AND I.BENCH = R.BENCH AND I.IMPL = R.IMPL AND I.SYS = R.SYS +FROM RESULT R, INPUT_FILE F, INPUT_GRAPH G, BENCH_COST C, BENCH_IMPL I +WHERE R.BENCH = 'tcff' AND R.FILE_ID = F.FILE_ID +AND G.FILE_ID = F.FILE_ID +AND C.BENCH = 'tcff' AND C.FILE_ID = F.FILE_ID +AND I.BENCH = 'tcff' AND I.IMPL = R.IMPL AND I.SYS = R.SYS AND I.STATUS = 'B' UNION SELECT DISTINCT P.SYS, '', -1, 0 AS FACTS, 0 AS CPU_T, diff --git a/graph/gen_graphs b/graph/gen_graphs new file mode 100755 index 0000000000000000000000000000000000000000..57982db3b21e41cfd6604be15ca901f493c9fc85 --- /dev/null +++ b/graph/gen_graphs @@ -0,0 +1,69 @@ +#!/bin/bash +P=../../data_p +T=../../data_tsv +C=../../data_csv +S=../../data_sql +./graph a1k_50k $P/a1k_50k.P $T/a1k_50k.tsv $C/a1k_50k.csv $S/a1k_50k.sql +./graph a1k_125k $P/a1k_125k.P $T/a1k_125k.tsv $C/a1k_125k.csv $S/a1k_125k.sql +./graph a1k_250k $P/a1k_250k.P $T/a1k_250k.tsv $C/a1k_250k.csv $S/a1k_250k.sql +./graph a2k_200k $P/a2k_200k.P $T/a2k_200k.tsv $C/a2k_200k.csv $S/a2k_200k.sql +./graph a2k_500k $P/a2k_500k.P $T/a2k_500k.tsv $C/a2k_500k.csv $S/a2k_500k.sql +./graph a2k_1m $P/a2k_1m.P $T/a2k_1m.tsv $C/a2k_1m.csv $S/a2k_1m.sql +./graph b17 $P/b17.P $T/b17.tsv $C/b17.csv $S/b17.sql +./graph b18 $P/b18.P $T/b18.tsv $C/b18.csv $S/b18.sql +./graph b19 $P/b19.P $T/b19.tsv $C/b19.csv $S/b19.sql +./graph c1k $P/c1k.P $T/c1k.tsv $C/c1k.csv $S/c1k.sql +./graph c2k $P/c2k.P $T/c2k.tsv $C/c2k.csv $S/c2k.sql +./graph c3k $P/c3k.P $T/c3k.tsv $C/c3k.csv $S/c3k.sql +./graph c4k $P/c4k.P $T/c4k.tsv $C/c4k.csv $S/c4k.sql +./graph k100 $P/k100.P $T/k100.tsv $C/k100.csv $S/k100.sql +./graph k1k $P/k1k.P $T/k1k.tsv $C/k1k.csv $S/k1k.sql +./graph k1k5 $P/k1k5.P $T/k1k5.tsv $C/k1k5.csv $S/k1k5.sql +./graph k2k $P/k2k.P $T/k2k.tsv $C/k2k.csv $S/k2k.sql +./graph k50 $P/k50.P $T/k50.tsv $C/k50.csv $S/k50.sql +./graph k500 $P/k500.P $T/k500.tsv $C/k500.csv $S/k500.sql +./graph m4_2ki $P/m4_2ki.P $T/m4_2ki.tsv $C/m4_2ki.csv $S/m4_2ki.sql +./graph m16_512 $P/m16_512.P $T/m16_512.tsv $C/m16_512.csv $S/m16_512.sql +./graph m64_128 $P/m64_128.P $T/m64_128.tsv $C/m64_128.csv $S/m64_128.sql +./graph m256_32 $P/m256_32.P $T/m256_32.tsv $C/m256_32.csv $S/m256_32.sql +./graph m1ki_8 $P/m1ki_8.P $T/m1ki_8.tsv $C/m1ki_8.csv $S/m1ki_8.sql +./graph m4ki_2 $P/m4ki_2.P $T/m4ki_2.tsv $C/m4ki_2.csv $S/m4ki_2.sql +./graph p1k $P/p1k.P $T/p1k.tsv $C/p1k.csv $S/p1k.sql +./graph p2k $P/p2k.P $T/p2k.tsv $C/p2k.csv $S/p2k.sql +./graph p3k $P/p3k.P $T/p3k.tsv $C/p3k.csv $S/p3k.sql +./graph p4k $P/p4k.P $T/p4k.tsv $C/p4k.csv $S/p4k.sql +./graph s1k_1 $P/s1k_1.P $T/s1k_1.tsv $C/s1k_1.csv $S/s1k_1.sql +./graph s1k_2 $P/s1k_2.P $T/s1k_2.tsv $C/s1k_2.csv $S/s1k_2.sql +./graph s1k_3 $P/s1k_3.P $T/s1k_3.tsv $C/s1k_3.csv $S/s1k_3.sql +./graph s1k_4 $P/s1k_4.P $T/s1k_4.tsv $C/s1k_4.csv $S/s1k_4.sql +./graph s1k_5 $P/s1k_5.P $T/s1k_5.tsv $C/s1k_5.csv $S/s1k_5.sql +./graph s2k_1 $P/s2k_1.P $T/s2k_1.tsv $C/s2k_1.csv $S/s2k_1.sql +./graph s2k_2 $P/s2k_2.P $T/s2k_2.tsv $C/s2k_2.csv $S/s2k_2.sql +./graph s2k_3 $P/s2k_3.P $T/s2k_3.tsv $C/s2k_3.csv $S/s2k_3.sql +./graph s2k_4 $P/s2k_4.P $T/s2k_4.tsv $C/s2k_4.csv $S/s2k_4.sql +./graph s2k_5 $P/s2k_5.P $T/s2k_5.tsv $C/s2k_5.csv $S/s2k_5.sql +./graph t50 $P/t50.P $T/t50.tsv $C/t50.csv $S/t50.sql +./graph t100 $P/t100.P $T/t100.tsv $C/t100.csv $S/t100.sql +./graph t500 $P/t500.P $T/t500.tsv $C/t500.csv $S/t500.sql +./graph t1k $P/t1k.P $T/t1k.tsv $C/t1k.csv $S/t1k.sql +./graph t1k5 $P/t1k5.P $T/t1k5.tsv $C/t1k5.csv $S/t1k5.sql +./graph t2k $P/t2k.P $T/t2k.tsv $C/t2k.csv $S/t2k.sql +./graph u1k_50k $P/u1k_50k.P $T/u1k_50k.tsv $C/u1k_50k.csv $S/u1k_50k.sql +./graph u1k_125k $P/u1k_125k.P $T/u1k_125k.tsv $C/u1k_125k.csv $S/u1k_125k.sql +./graph u1k_250k $P/u1k_250k.P $T/u1k_250k.tsv $C/u1k_250k.csv $S/u1k_250k.sql +./graph u2k_200k $P/u2k_200k.P $T/u2k_200k.tsv $C/u2k_200k.csv $S/u2k_200k.sql +./graph u2k_500k $P/u2k_500k.P $T/u2k_500k.tsv $C/u2k_500k.csv $S/u2k_500k.sql +./graph u2k_1m $P/u2k_1m.P $T/u2k_1m.tsv $C/u2k_1m.csv $S/u2k_1m.sql +./graph v10 $P/v10.P $T/v10.tsv $C/v10.csv $S/v10.sql +./graph v11 $P/v11.P $T/v11.tsv $C/v11.csv $S/v11.sql +./graph v12 $P/v12.P $T/v12.tsv $C/v12.csv $S/v12.sql +./graph v17 $P/v17.P $T/v17.tsv $C/v17.csv $S/v17.sql +./graph v18 $P/v18.P $T/v18.tsv $C/v18.csv $S/v18.sql +./graph v19 $P/v19.P $T/v19.tsv $C/v19.csv $S/v19.sql +./graph w1k $P/w1k.P $T/w1k.tsv $C/w1k.csv $S/w1k.sql +#./graph wintest $P/wintest.P $T/wintest.tsv $C/wintest.csv $S/wintest.sql +./graph x10k $P/x10k.P $T/x10k.tsv $C/x10k.csv $S/x10k.sql +./graph y1k_4k $P/y1k_4k.P $T/y1k_4k.tsv $C/y1k_4k.csv $S/y1k_4k.sql +./graph y1k_8k $P/y1k_8k.P $T/y1k_8k.tsv $C/y1k_8k.csv $S/y1k_8k.sql +./graph y500_4k $P/y500_4k.P $T/y500_4k.tsv $C/y500_4k.csv $S/y500_4k.sql +./graph y500_8k $P/y500_8k.P $T/y500_8k.tsv $C/y500_8k.csv $S/y500_8k.sql diff --git a/graph/graph.cpp b/graph/graph.cpp index fe72c01bfb60128db9393ce6df703e9d35657485..efd6a0bc8e3bc4ba8ac4784331ff36eeb1849fd1 100644 --- a/graph/graph.cpp +++ b/graph/graph.cpp @@ -2,7 +2,7 @@ // Project: rbench - Logic Programming and Database Benchmarks // Filename: graph/graph.cpp // Purpose: Alternative Graph Generator -// Last Change: 03.10.2019 +// Last Change: 04.10.2019 // Language: C++ // EMail: brass@informatik.uni-halle.de // WWW: http://www.informatik.uni-halle.de/~brass/ @@ -102,6 +102,9 @@ // {(i, 1 + ((i + (n*j)/(k+1)) mod n) | i = 1,...,n; j=1,...,k } // Tn: Total order graph (maximum acyclic graph) +// n vertices, n * (n-1) / 2 edges +// V = {1, ..., n} +// E = {(i, j) | i = 1, ..., n-1; j = i+1, ..., n } // Un_k: Random Graph, Uniform Distribution of Node Degrees // n nodes, k edges @@ -114,14 +117,20 @@ // Wn_k: Single level of edges, mesh, 2n nodes, degree k): +// Wn: Abbreviation for Wn_n (n nodes connected to all n other nodes) +// 2*n nodes, n*k edges // V = {1, ..., 2n} // E = { (i, (i+j-1 mod n)+n+1) | 1 <= i <= n and 0 <= j < k } // Xn_k: X-graph (n edges to central node, k from there) +// Xn: Abbreviation for Xn_n +// n+k+1 nodes, n+k edges +// V = {1, ..., n+k+1} +// E = {(i, n+1) | i = 1, ..., n} u {(n+1, n+1+j) | j = 1, ..., k} // Yn_k: Y-graph (n nodes point to central node, then path of length k) -// n+k nodes: V= {1,...,n+k} -// n+k-1 edges: +// n+k nodes, n+k-1 edges: +// V = {1,...,n+k} // E = {(i, n+1)} | i = 1,...,n} u {(n+i, n+i+1) | i = 1,...,k-1} // The program code for graph g is in the procedure gen_graph_g. @@ -138,6 +147,19 @@ #define MAX_OUT_FILES 4 +//============================================================================= +// Name of the predicate or relation for the edges: +//============================================================================= + +#define PRED_NAME "par" + +//============================================================================= +// Should Test Output be Written? +//============================================================================= + +// 0 means no test output, 1 means test output: +#define TEST_OUTPUT 0 + //============================================================================= // Include Files: //============================================================================= @@ -307,7 +329,7 @@ long str_int(str_t digits) { int prev_result = result; result = result * 10 + digit_val(*p); if(result/10 != prev_result) { - std::cout << "Overflow!\n"; + std::cerr << "Overflow!\n"; exit(2); } } @@ -340,7 +362,7 @@ class Node { void store_connection(int to) { // Check nodes: if(to <= 0 || to > num_nodes_) { - std::cout << "Invalid to node: " << to << "\n"; + std::cerr << "Invalid to node: " << to << "\n"; exit(3); } @@ -357,7 +379,7 @@ class Node { bool connection_exists(int to) const { // Check node: if(to <= 0 || to > num_nodes_) { - std::cout << "Invalid to node: " << to << "\n"; + std::cerr << "Invalid to node: " << to << "\n"; exit(4); } @@ -373,7 +395,7 @@ class Node { // Copy other node into this node: void copy(Node *node) { if(node->num_nodes_ != num_nodes_) { - std::cout << "Can copy only node of same size!\n"; + std::cerr << "Can copy only node of same size!\n"; exit(5); } int bytes = (num_nodes_ + BYTE_SIZE - 1) / BYTE_SIZE; @@ -422,11 +444,11 @@ class Graph { void store_edge(int from, int to) { // Check nodes: if(from <= 0 || from > num_nodes_) { - std::cout << "Invalid from node: " << from << "\n"; + std::cerr << "Invalid from node: " << from << "\n"; exit(6); } if(to <= 0 || to > num_nodes_) { - std::cout << "Invalid to node: " << to << "\n"; + std::cerr << "Invalid to node: " << to << "\n"; exit(7); } @@ -439,11 +461,11 @@ class Graph { bool edge_exists(int from, int to) const { // Check nodes: if(from <= 0 || from > num_nodes_) { - std::cout << "Invalid from node: " << from << "\n"; + std::cerr << "Invalid from node: " << from << "\n"; exit(8); } if(to <= 0 || to > num_nodes_) { - std::cout << "Invalid to node: " << to << "\n"; + std::cerr << "Invalid to node: " << to << "\n"; exit(9); } @@ -456,7 +478,7 @@ class Graph { void copy(Graph *g) { if(g->num_nodes_ != num_nodes_) { - std::cout << "Can copy only graph of same size!\n"; + std::cerr << "Can copy only graph of same size!\n"; exit(10); } for(int i = 0; i < num_nodes_; i++) @@ -886,7 +908,7 @@ typedef TC *tc_t; // Data Type for File Formats: //============================================================================= -typedef enum { FORMAT_P, FORMAT_TSV } format_t; +typedef enum { FORMAT_P, FORMAT_TSV, FORMAT_CSV, FORMAT_SQL } format_t; str_t format_name(format_t f) { switch(f) { @@ -894,6 +916,10 @@ str_t format_name(format_t f) { return ".P"; case FORMAT_TSV: return ".tsv"; + case FORMAT_CSV: + return ".csv"; + case FORMAT_SQL: + return ".sql"; default: return "UNKNOWN FORMAT"; } @@ -924,7 +950,7 @@ class Output { void add_file(str_t filename) { // Check that there is still space in the array: if(num_files_ >= MAX_OUT_FILES) { - std::cout << "Too many output files!\n"; + std::cerr << "Too many output files!\n"; exit(11); } @@ -936,8 +962,16 @@ class Output { filename[n-3] == 't' && filename[n-2] == 's' && filename[n-1] == 'v') format_[num_files_] = FORMAT_TSV; + else if(n > 4 && filename[n-4] == '.' && + filename[n-3] == 'c' && filename[n-2] == 's' && + filename[n-1] == 'v') + format_[num_files_] = FORMAT_CSV; + else if(n > 4 && filename[n-4] == '.' && + filename[n-3] == 's' && filename[n-2] == 'q' && + filename[n-1] == 'l') + format_[num_files_] = FORMAT_SQL; else { - std::cout << "Output file '" << filename << + std::cerr << "Output file '" << filename << "' has unknown extension.\n"; exit(12); } @@ -946,7 +980,7 @@ class Output { std::ofstream *output = files_ + num_files_; output->open(filename); if(output->fail()) { - std::cout << "Opening output file '" << filename << + std::cerr << "Opening output file '" << filename << "' failed.\n"; exit(13); } @@ -958,7 +992,7 @@ class Output { // The graph data structure depends on the number of nodes: if(num_nodes_ != 0) { - std::cout << "Number of nodes in output can be set " << + std::cerr << "Number of nodes in output can be set " << "only once!\n"; exit(14); } @@ -975,23 +1009,23 @@ class Output { // The number of nodes must be set: if(num_nodes_ <= 0) { - std::cout << "The number of nodes must be set first!\n"; + std::cerr << "The number of nodes must be set first!\n"; exit(15); } // Check validity of parameter values: if(from <= 0 || from > num_nodes_) { - std::cout << "from node: " << from << " invalid.\n"; + std::cerr << "from node: " << from << " invalid.\n"; exit(16); } if(to <= 0 || to > num_nodes_) { - std::cout << "to node: " << to << " invalid.\n"; + std::cerr << "to node: " << to << " invalid.\n"; exit(17); } // Check that there is at least one file open: if(num_files_ == 0) { - std::cout << "No file open.\n"; + std::cerr << "No file open.\n"; exit(18); } @@ -1000,12 +1034,21 @@ class Output { std::ofstream &file = files_[i]; switch(format_[i]) { case FORMAT_P: - file << "par(" << from << ", " << to << + file << PRED_NAME << "(" << + from << ", " << to << ")." << "\n"; break; case FORMAT_TSV: file << from << "\t" << to << "\n"; break; + case FORMAT_CSV: + file << from << "," << to << "\n"; + break; + case FORMAT_SQL: + file << "INSERT INTO " << PRED_NAME << + " VALUES (" << + from << "," << to << ");\n"; + break; } } @@ -1021,7 +1064,7 @@ class Output { for(int i = 0; i < num_files_; i++) { std::ofstream &file = files_[i]; if(file.bad()) { - std::cout << "Problem with output file " << + std::cerr << "Problem with output file " << "(Format " << format_name(format_[i]) << ").\n"; exit(19); @@ -1058,7 +1101,7 @@ typedef Output *output_t; //----------------------------------------------------------------------------- void no_second_par(str_t graph_name) { - std::cout << "Graph '" << graph_name << "' has no second parameter.\n"; + std::cerr << "Graph '" << graph_name << "' has no second parameter.\n"; exit(20); } @@ -1067,7 +1110,7 @@ void no_second_par(str_t graph_name) { //----------------------------------------------------------------------------- void second_par_missing(str_t graph_name) { - std::cout << "Graph '" << graph_name << "' needs a second parameter.\n"; + std::cerr << "Graph '" << graph_name << "' needs a second parameter.\n"; exit(21); } @@ -1076,7 +1119,7 @@ void second_par_missing(str_t graph_name) { //----------------------------------------------------------------------------- void par_too_small(str_t graph_name, str_t par_name, int min) { - std::cout << "Value of parameter '" << par_name << "' " << + std::cerr << "Value of parameter '" << par_name << "' " << "for Graph '" << graph_name << "' too small: " << "Minimum " << min << ".\n"; exit(22); @@ -1123,7 +1166,7 @@ int next_prime(int p) { while(!is_prime(p)) { p++; if(p < 0) { - std::cout << + std::cerr << "Overflow in computation of prime number\n"; exit(23); } @@ -1196,8 +1239,10 @@ int rand_until(int n) { std::cerr << "Error in random number generator!\n"; exit(24); } +#if TEST_OUTPUT std::cout << "rand_until(" << n << ") = " << result << "\t[rand = " << r << "]\n"; +#endif return result; } @@ -1213,11 +1258,13 @@ void rand_shuffle(int arr[], int len) { arr[j] = swap; } +#if TEST_OUTPUT std::cout << "\n"; std::cout << "Result of Shuffle (Array Length: " << len << ")\n"; for(int i = 0; i < len; i++) std::cout << "\t arr[" << i << "] = " << arr[i] << "\n"; std::cout << "\n"; +#endif } @@ -1417,13 +1464,13 @@ void gen_graph_s(int n, int k, output_t out, gsize_t gsize) { // Check divisibility condition: if(n % (k+1) != 0) { - std::cout << "S[n,k] requires that n is divisible by k+1.\n"; + std::cerr << "S[n,k] requires that n is divisible by k+1.\n"; exit(25); } // Check that k is not too large: if(n < (k+1)) { - std::cout << "S[n,k] requires that n > k.\n"; + std::cerr << "S[n,k] requires that n > k.\n"; exit(26); } @@ -1650,7 +1697,7 @@ void gen_graph_w(int n, int k, output_t out, gsize_t gsize) { // Default for second parameter: if(k < 1) - k = 1; + k = n; // Set number of nodes: out->set_nodes(2 * n); @@ -1746,7 +1793,7 @@ void gen_graph_u_a(int n, int k, output_t out, gsize_t gsize, bool acyclic) { // Ensure termination: if(i >= p1 * p2) { - std::cout << + std::cerr << "Number of edges too large in random graph.\n"; exit(27); } @@ -1825,7 +1872,7 @@ void gen_graph_u(int n, int k, output_t out, gsize_t gsize) { // Check relation between n and k: if(k > n * n) { - std::cout << + std::cerr << "Impossible number of edges (too large) in U-Graph"; exit(28); } @@ -1867,7 +1914,7 @@ void gen_graph_a(int n, int k, output_t out, gsize_t gsize) { // Check relation between n and k: if(k > n * (n-1) / 2) { - std::cout << + std::cerr << "Impossible number of edges (too large) in A-Graph"; exit(29); } @@ -1890,7 +1937,7 @@ int main(int argc, str_t argv[]) // The program should be called with the graph and the output files: if(argc < 3) { - std::cout << "Usage: ./graph GraphID OutputFile1 ...\n"; + std::cerr << "Usage: ./graph GraphID OutputFile1 ...\n"; exit(30); } @@ -1899,7 +1946,7 @@ int main(int argc, str_t argv[]) str_t p = graph_id; char graph_code = *p++; if(graph_code == '\0') { - std::cout << "Impossible empty Graph ID.\n"; + std::cerr << "Impossible empty Graph ID.\n"; exit(31); } @@ -1910,7 +1957,7 @@ int main(int argc, str_t argv[]) par_chars[i] = *p++; par_chars[i] = 0; if(i == 0) { - std::cout << "First parameter in graph ID missing.\n"; + std::cerr << "First parameter in graph ID missing.\n"; exit(32); } int par1 = str_int(par_chars); @@ -1945,7 +1992,7 @@ int main(int argc, str_t argv[]) par_chars[i] = *p++; par_chars[i] = 0; if(i == 0) { - std::cout << "Second parameter in graph ID missing.\n"; + std::cerr << "Second parameter in graph ID missing.\n"; exit(33); } par2 = str_int(par_chars); @@ -1975,7 +2022,7 @@ int main(int argc, str_t argv[]) // Check that we have successfully parsed the entire graph ID: if(*p != '\0') { - std::cout << "Unexpected characters at the end of graph ID.\n"; + std::cerr << "Unexpected characters at the end of graph ID.\n"; exit(34); } @@ -2060,7 +2107,7 @@ int main(int argc, str_t argv[]) gen_graph_a(par1, par2, &output, &formula); break; default: - std::cout << "Unknown graph type '" << graph_code << + std::cerr << "Unknown graph type '" << graph_code << "'.\n"; exit(35); } @@ -2078,7 +2125,9 @@ int main(int argc, str_t argv[]) std::cout << "\n"; // Test output: - //tc.print(); +#if TEST_OUTPUT + tc.print(); +#endif // Compute same generation data: std::cout << "Compute same generation data ...\n"; @@ -2087,7 +2136,9 @@ int main(int argc, str_t argv[]) std::cout << "\n"; // Test output: +#if TEST_OUTPUT sg.print(); +#endif // Create object for graph/benchmark size computed from graph: GSize gsize;