From 96ece41d4116ba88a75943f37e55eb4c3f0c8cdd Mon Sep 17 00:00:00 2001 From: Stefan Brass <stefan.brass@informatik.uni-halle.de> Date: Sat, 5 Oct 2019 19:05:41 +0200 Subject: [PATCH] bench_cost.sql was missing in load_data --- db/create_db.sql | 6 +- db/load_data | 3 + graph/bench_cost.sql | 45 +++++++ graph/graph.cpp | 281 +++++++++++++++++++++++++++++++++---------- 4 files changed, 268 insertions(+), 67 deletions(-) create mode 100644 graph/bench_cost.sql diff --git a/db/create_db.sql b/db/create_db.sql index 911e01a..38e5508 100644 --- a/db/create_db.sql +++ b/db/create_db.sql @@ -2,7 +2,7 @@ -- Project: rbench - Logic Programming and Database Benchmarks -- Filename: db/create_db.sql -- Purpose: Database for Benchmark Runs: Create Tables and Views --- Last Change: 19.03.2019 +-- Last Change: 05.10.2019 -- Language: SQL (Tested with PostgreSQL) -- Author: Stefan Brass -- EMail: brass@informatik.uni-halle.de @@ -533,9 +533,9 @@ CREATE TABLE INPUT_JOIN1 ( CREATE TABLE BENCH_COST ( BENCH VARCHAR(20) NOT NULL, FILE_ID VARCHAR(10) NOT NULL, - SIZE NUMERIC(12) NOT NULL, + SIZE NUMERIC(18) NOT NULL, ITER NUMERIC(12) NOT NULL, - INST NUMERIC(12) NOT NULL, + INST NUMERIC(18) NOT NULL, CONSTRAINT BENCH_COST_PK PRIMARY KEY(BENCH, FILE_ID), CONSTRAINT BENCH_COST_REF_BENCHMARK diff --git a/db/load_data b/db/load_data index 060d7e8..03c1a47 100755 --- a/db/load_data +++ b/db/load_data @@ -95,6 +95,9 @@ psql -f graph_types.sql | grep -v "INSERT 0 1" psql -f input_graphs.sql | grep -v "INSERT 0 1" psql -f input_join1.sql | grep -v "INSERT 0 1" +# Cost measures of a benchmark for an input file: +psql -f bench_cost.sql | grep -v "INSERT 0 1" + # Convert tsv-files with benchmark results to SQL: # (might be unnecessary, if the SQL files already exist, but is safer, # because SQL files might be missing or outdated) diff --git a/graph/bench_cost.sql b/graph/bench_cost.sql new file mode 100644 index 0000000..686d8e2 --- /dev/null +++ b/graph/bench_cost.sql @@ -0,0 +1,45 @@ +INSERT INTO BENCH_COST VALUES('tcff','b17',1966082,16,1966082); +INSERT INTO BENCH_COST VALUES('tcff','b18',4194306,17,4194306); +INSERT INTO BENCH_COST VALUES('tcff','c1k',1000000,1000,1001000); +INSERT INTO BENCH_COST VALUES('tcff','c2k',4000000,2000,4002000); +INSERT INTO BENCH_COST VALUES('tcff','c3k',9000000,3000,9003000); +INSERT INTO BENCH_COST VALUES('tcff','c4k',16000000,4000,16004000); +INSERT INTO BENCH_COST VALUES('tcff','k100',10000,1,1010000); +INSERT INTO BENCH_COST VALUES('tcff','k1k',1000000,1,1001000000); +INSERT INTO BENCH_COST VALUES('tcff','k1k5',2250000,1,3377250000); +INSERT INTO BENCH_COST VALUES('tcff','k2k',4000000,1,8004000000); +INSERT INTO BENCH_COST VALUES('tcff','k50',2500,1,127500); +INSERT INTO BENCH_COST VALUES('tcff','k500',250000,1,125250000); +INSERT INTO BENCH_COST VALUES('tcff','m4_2ki',12288,3,12288); +INSERT INTO BENCH_COST VALUES('tcff','m16_512',61440,15,61440); +INSERT INTO BENCH_COST VALUES('tcff','m64_128',258048,63,258048); +INSERT INTO BENCH_COST VALUES('tcff','m256_32',1044480,255,1044480); +INSERT INTO BENCH_COST VALUES('tcff','m1ki_8',4190208,1023,4190208); +INSERT INTO BENCH_COST VALUES('tcff','m4ki_2',16773120,4095,16773120); +INSERT INTO BENCH_COST VALUES('tcff','p1k',499500,999,499500); +INSERT INTO BENCH_COST VALUES('tcff','p2k',1999000,1999,1999000); +INSERT INTO BENCH_COST VALUES('tcff','p3k',4498500,2999,4498500); +INSERT INTO BENCH_COST VALUES('tcff','p4k',7998000,3999,7998000); +INSERT INTO BENCH_COST VALUES('tcff','s1k_1',1000000,500,2002000); +INSERT INTO BENCH_COST VALUES('tcff','s1k_3',1000000,250,4004000); +INSERT INTO BENCH_COST VALUES('tcff','s1k_4',1000000,200,5005000); +INSERT INTO BENCH_COST VALUES('tcff','s2k_1',4000000,1000,8004000); +INSERT INTO BENCH_COST VALUES('tcff','s2k_3',4000000,500,16008000); +INSERT INTO BENCH_COST VALUES('tcff','s2k_4',4000000,400,20010000); +INSERT INTO BENCH_COST VALUES('tcff','t50',1225,1,20825); +INSERT INTO BENCH_COST VALUES('tcff','t100',4950,1,166650); +INSERT INTO BENCH_COST VALUES('tcff','t500',124750,1,20833250); +INSERT INTO BENCH_COST VALUES('tcff','t1k',499500,1,166666500); +INSERT INTO BENCH_COST VALUES('tcff','t1k5',1124250,1,562499750); +INSERT INTO BENCH_COST VALUES('tcff','t2k',1999000,1,1333333000); +INSERT INTO BENCH_COST VALUES('tcff','v10',8194,9,8194); +INSERT INTO BENCH_COST VALUES('tcff','v11',18434,10,18434); +INSERT INTO BENCH_COST VALUES('tcff','v12',40962,11,40962); +INSERT INTO BENCH_COST VALUES('tcff','v17',1966082,16,1966082); +INSERT INTO BENCH_COST VALUES('tcff','v18',4194306,17,4194306); +INSERT INTO BENCH_COST VALUES('tcff','w1k',1000000,1,1000000); +INSERT INTO BENCH_COST VALUES('tcff','x10k',100020000,2,100020000); +INSERT INTO BENCH_COST VALUES('tcff','y1k_4k',11998000,4000,11998000); +INSERT INTO BENCH_COST VALUES('tcff','y1k_8k',39996000,8000,39996000); +INSERT INTO BENCH_COST VALUES('tcff','y500_4k',9998000,4000,9998000); +INSERT INTO BENCH_COST VALUES('tcff','y500_8k',35996000,8000,35996000); diff --git a/graph/graph.cpp b/graph/graph.cpp index efd6a0b..ebdfeb9 100644 --- a/graph/graph.cpp +++ b/graph/graph.cpp @@ -154,12 +154,32 @@ #define PRED_NAME "par" //============================================================================= -// Should Test Output be Written? +// Should Test/Debug Output be Written? //============================================================================= // 0 means no test output, 1 means test output: #define TEST_OUTPUT 0 +//============================================================================= +// Generate new version of S-Graph? +//============================================================================= + +// 0 means old version (Mario Wenzel), 1 means new version (Stefan Brass): +#define S_GRAPH_NEW_VER 0 + +//============================================================================= +// Name of file with INSERT statements for benchmark cost information: +//============================================================================= + +#define COST_FILE "bench_cost.sql" + +//============================================================================= +// Name of table with benchmark cost information: +//============================================================================= + +#define COST_TABLE "BENCH_COST" + + //============================================================================= // Include Files: //============================================================================= @@ -167,6 +187,7 @@ #include <iostream> #include <fstream> #include <cstdint> +#include <time.h> //============================================================================= // Type for Large Integers (64 Bit): @@ -543,21 +564,21 @@ class GSize { // Print defined values: void print() { if(num_nodes_ >= 0) - std::cout << "Nodes: " << num_nodes_ << "\n"; + std::cout << "\tNodes: " << num_nodes_ << "\n"; if(num_edges_ >= 0) - std::cout << "Edges: " << num_edges_ << "\n"; + std::cout << "\tEdges: " << num_edges_ << "\n"; if(tc_iter_ >= 0) - std::cout << "TC Iter: " << tc_iter_ << "\n"; + std::cout << "\tTC Iter: " << tc_iter_ << "\n"; if(tc_size_ >= 0) - std::cout << "TC Size: " << tc_size_ << "\n"; + std::cout << "\tTC Size: " << tc_size_ << "\n"; if(tc_inst_ >= 0) - std::cout << "TC Inst: " << tc_inst_ << "\n"; + std::cout << "\tTC Inst: " << tc_inst_ << "\n"; if(sg_iter_ >= 0) - std::cout << "SG Iter: " << sg_iter_ << "\n"; + std::cout << "\tSG Iter: " << sg_iter_ << "\n"; if(sg_size_ >= 0) - std::cout << "SG Size: " << sg_size_ << "\n"; + std::cout << "\tSG Size: " << sg_size_ << "\n"; if(sg_inst_ >= 0) - std::cout << "SG Inst: " << sg_inst_ << "\n"; + std::cout << "\tSG Inst: " << sg_inst_ << "\n"; } // Check consistency of two graph size objects: @@ -1487,8 +1508,11 @@ void gen_graph_s(int n, int k, output_t out, gsize_t gsize) { for(int j = 1; j <= k; j++) { int skip = (n*j)/(k+1); for(int i = 1; i <= n; i++) { - //out->write_edge(i, 1 + (i-1 + skip) % n); +#if S_GRAPH_NEW_VER out->write_edge(i, 1 + (i + skip) % n); +#else + out->write_edge(i, 1 + (i-1 + skip) % n); +#endif } } @@ -1503,14 +1527,7 @@ void gen_graph_s(int n, int k, output_t out, gsize_t gsize) { gsize->set_tc_inst(r); // SG Size: - //if(k == 1 && n % 4 == 2) { - // gsize->set_sg_size(m * m / 2); - // gsize->set_sg_inst(2 * (m * m + m)); - //} - //else { - // gsize->set_sg_size(m * m); - // gsize->set_sg_inst(m * (k + 1) + m * m * (k+1) * (k+1)); - //} +#if S_GRAPH_NEW_VER gsize->set_sg_size(m * (k+1)); //gsize->set_sg_inst(m*(k+1) + (k+1) * (m * (k+1)) * (k+1)); //The following is the same, slightly shorter: @@ -1519,6 +1536,16 @@ void gen_graph_s(int n, int k, output_t out, gsize_t gsize) { gsize->set_sg_iter(2); else gsize->set_sg_iter(1); +#else + if(k == 1 && n % 4 == 2) { + gsize->set_sg_size(m * m / 2); + gsize->set_sg_inst(2 * (m * m + m)); + } + else { + gsize->set_sg_size(m * m); + gsize->set_sg_inst(m * (k + 1) + m * m * (k+1) * (k+1)); + } +#endif } //----------------------------------------------------------------------------- @@ -1932,22 +1959,69 @@ void gen_graph_a(int n, int k, output_t out, gsize_t gsize) { int main(int argc, str_t argv[]) { // Print welcome: - std::cout << "Welcome to the Graph Data Generator.\n"; - std::cout << "\n"; + // std::cout << "Welcome to the Graph Data Generator.\n"; + // std::cout << "\n"; + + // Compute elapsed time in seconds: + time_t start_time; + time(&start_time); // The program should be called with the graph and the output files: if(argc < 3) { - std::cerr << "Usage: ./graph GraphID OutputFile1 ...\n"; + std::cerr << + "Usage: ./graph [-atsv] GraphID OutputFile1 ...\n"; exit(30); } + // Initialize options: + bool opt_tc = false; + bool opt_sg = false; + bool opt_verbose = false; + + // Parse Options: + int argno = 1; + while(argno < argc && argv[argno][0] == '-') { + str_t opts = argv[argno++]; + opts++; // Skip '-' + while(*opts) { + switch(*opts) { + case 'a': + // Compute all benchmarks: + opt_tc = true; + opt_sg = true; + break; + case 's': + // Compute same generation: + opt_sg = true; + break; + case 't': + // Compute transitive closure: + opt_tc = true; + break; + case 'v': + // Verbose output (e.g. show benchmark result): + opt_verbose = true; + break; + default: + // Unknown option: + std::cerr << "Unknown option: " << + "'" << *opts << "'\n"; + exit(31); + } + } + } + // Get graph parameters, first code: - str_t graph_id = argv[1]; + if(argno >= argc) { + std::cerr << "Command line argument for Graph ID missing.\n"; + exit(32); + } + str_t graph_id = argv[argno++]; str_t p = graph_id; char graph_code = *p++; if(graph_code == '\0') { std::cerr << "Impossible empty Graph ID.\n"; - exit(31); + exit(33); } // First parameter: @@ -1958,7 +2032,7 @@ int main(int argc, str_t argv[]) par_chars[i] = 0; if(i == 0) { std::cerr << "First parameter in graph ID missing.\n"; - exit(32); + exit(34); } int par1 = str_int(par_chars); if(*p == 'k') { @@ -1993,7 +2067,7 @@ int main(int argc, str_t argv[]) par_chars[i] = 0; if(i == 0) { std::cerr << "Second parameter in graph ID missing.\n"; - exit(33); + exit(35); } par2 = str_int(par_chars); if(*p == 'k') { @@ -2023,13 +2097,16 @@ int main(int argc, str_t argv[]) // Check that we have successfully parsed the entire graph ID: if(*p != '\0') { std::cerr << "Unexpected characters at the end of graph ID.\n"; - exit(34); + exit(36); } + // Some feedback about graph to be generated: + std::cout << "Generating graph " << graph_id << " ...\n"; + // Open output file(s): Output output; - for(int arg_no = 2; arg_no < argc; arg_no++) - output.add_file(argv[arg_no]); + while(argno < argc) + output.add_file(argv[argno++]); // Create object for graph/benchmark size (computed with formula): GSize formula; @@ -2113,56 +2190,132 @@ int main(int argc, str_t argv[]) } // Output number of edges written: - std::cout << "Number of edges written: " << output.num_edges() << ".\n"; + std::cout << " Number of edges written: " << + output.num_edges() << ".\n"; // Close output file (in case we interrupt computation): output.close_files(); + // Create object for graph/benchmark size computed from graph: + GSize gsize; + // Compute transitive closure data: - std::cout << "Compute transitive closure data ...\n"; - TC tc(output.graph()); - std::cout << "done\n"; - std::cout << "\n"; + if(opt_tc) { + std::cout << "Compute transitive closure data ...\n"; + TC tc(output.graph()); + std::cout << "done\n"; + std::cout << "\n"; - // Test output: -#if TEST_OUTPUT - tc.print(); -#endif + // Show computation result: + if(opt_verbose) + tc.print(); + + // Store computed size data in graph size object: + tc.export_gsize(&gsize); + } // Compute same generation data: - std::cout << "Compute same generation data ...\n"; - SG sg(output.graph()); - std::cout << "done\n"; - std::cout << "\n"; + if(opt_sg) { + std::cout << "Compute same generation data ...\n"; + SG sg(output.graph()); + std::cout << "done\n"; + std::cout << "\n"; - // Test output: -#if TEST_OUTPUT - sg.print(); -#endif + // Show computation result: + if(opt_verbose) + sg.print(); - // Create object for graph/benchmark size computed from graph: - GSize gsize; - tc.export_gsize(&gsize); - sg.export_gsize(&gsize); - - // Print computed size data: - std::cout << "\n"; - std::cout << "Size Data Computed From Graph:\n"; - std::cout << "=============================\n"; - gsize.print(); - std::cout << "\n"; + // Store computed size data in graph size object: + sg.export_gsize(&gsize); + } // Print size data from formula: - std::cout << "Size Data Computed With Formulas:\n"; - std::cout << "================================\n"; + std::cout << " Size Data Computed With Formulas:\n"; + //std::cout << " ================================\n"; formula.print(); std::cout << "\n"; - // Check consistency: - if(gsize.consistent(&formula)) - std::cout << "Values are consistent.\n"; - else - std::cout << "*** VALUES DIFFER ***\n"; - std::cout << "\n"; + // If size data was computed from graph, print that and check result: + if(opt_tc || opt_sg) { + std::cout << "\n"; + std::cout << " Size Data Computed From Graph:\n"; + //std::cout << " =============================\n"; + gsize.print(); + std::cout << "\n"; + + // Check consistency: + if(gsize.consistent(&formula)) + std::cout << " Values are consistent.\n"; + else { + std::cerr << " *** SIZE VALUES DIFFER ***\n"; + exit(36); + } + std::cout << "\n"; + } + + // Write cost data. First open output file in append mode: + std::ofstream cost_file; + cost_file.open(COST_FILE, std::ios::out|std::ios::app); + if(cost_file.fail() || !cost_file.is_open()) { + std::cerr << "Opening '" << COST_FILE << "' failed.\n"; + exit(37); + } + + // Get tc_size where defined (from formula or computed from graph): + bigint_t tc_size = formula.tc_size(); + if(tc_size < 0) + tc_size = gsize.tc_size(); + + // Get tc_iter where defined: + int tc_iter = formula.tc_iter(); + if(tc_iter < 0) + tc_iter = gsize.tc_iter(); + + // Get tc_inst where defined: + bigint_t tc_inst = formula.tc_inst(); + if(tc_inst < 0) + tc_inst = gsize.tc_inst(); + + // If all three are defined, write cost information: + if(tc_size >= 0 && tc_iter >= 0 && tc_inst >= 0) { + cost_file << "INSERT INTO " << COST_TABLE << " VALUES(" << + "'tcff'" << "," << "'" << graph_id << "'" << "," << + tc_size << "," << tc_iter << "," << tc_inst << ");\n"; + } + + // Get sg_size where defined (from formula or computed from graph): + bigint_t sg_size = formula.sg_size(); + if(sg_size < 0) + sg_size = gsize.sg_size(); + + // Get sg_iter where defined: + int sg_iter = formula.sg_iter(); + if(sg_iter < 0) + sg_iter = gsize.sg_iter(); + + // Get sg_inst where defined: + bigint_t sg_inst = formula.sg_inst(); + if(sg_inst < 0) + sg_inst = gsize.sg_inst(); + + // If all three are defined, write cost information: + if(sg_size >= 0 && sg_iter >= 0 && sg_inst >= 0) { + cost_file << "INSERT INTO " << COST_TABLE << " VALUES(" << + "'sgff'" << "," << "'" << graph_id << "'" << "," << + sg_size << "," << sg_iter << "," << sg_inst << ");\n"; + } + + // Close file: + cost_file.close(); + if(cost_file.fail()) { + std::cerr << "Closing '" << COST_FILE << "' failed.\n"; + exit(38); + } + + // Print elapsed time in seconds: + time_t end_time; + time(&end_time); + int seconds = (int) difftime(end_time, start_time); + std::cout << " Elapsed time: " << seconds << "s\n"; } -- GitLab