From 31685075b8fb0f41ad08da76a04bfa75106f7b8f Mon Sep 17 00:00:00 2001 From: Stefan Brass <stefan.brass@informatik.uni-halle.de> Date: Fri, 4 Oct 2019 10:58:00 +0200 Subject: [PATCH] Graph generator --- graph/graph.cpp | 168 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 139 insertions(+), 29 deletions(-) diff --git a/graph/graph.cpp b/graph/graph.cpp index a3ed67b..fe72c01 100644 --- a/graph/graph.cpp +++ b/graph/graph.cpp @@ -2,7 +2,7 @@ // Project: rbench - Logic Programming and Database Benchmarks // Filename: graph/graph.cpp // Purpose: Alternative Graph Generator -// Last Change: 10.05.2019 +// Last Change: 03.10.2019 // Language: C++ // EMail: brass@informatik.uni-halle.de // WWW: http://www.informatik.uni-halle.de/~brass/ @@ -1421,6 +1421,12 @@ void gen_graph_s(int n, int k, output_t out, gsize_t gsize) { exit(25); } + // Check that k is not too large: + if(n < (k+1)) { + std::cout << "S[n,k] requires that n > k.\n"; + exit(26); + } + // Set number of nodes: out->set_nodes(n); @@ -1705,25 +1711,13 @@ void gen_graph_d(int n, int k, output_t out, gsize_t gsize) { } //----------------------------------------------------------------------------- -// U-Graph (Random Graph with Uniform Distribution of Node Degrees): +// Joint Procedure for U-Graph and A-Graph (does the real work): //----------------------------------------------------------------------------- -// This Graph Generation Algorithm is more or less copied from OpenRuleBench. -// The first parameter n is the number of nodes. -// The second parameter k the number of edges. -// The two next prime numbers p1 and p2 greater than n are computed -// and two arrays a1 and a2 of size p1 and p2 are generated. -// These arrays are filled with the numbers 1..n and p1/p2 - n entries -// with the value 0. -// The two arrays are randomly shuffled and then a1[i mod p1] -// is connected with a2[i mod p2] and all connections with 0 are ignored. - -void gen_graph_u(int n, int k, output_t out, gsize_t gsize) { +// The graph generation algorithm is from OpenRuleBench with slight +// modifications. - // Check that second parameter is defined: - if(k < 0) { - second_par_missing("U"); - } +void gen_graph_u_a(int n, int k, output_t out, gsize_t gsize, bool acyclic) { // Set number of nodes: out->set_nodes(n); @@ -1749,12 +1743,54 @@ void gen_graph_u(int n, int k, output_t out, gsize_t gsize) { // Generate edges: int generated_edges = 0; for(int i = 0; generated_edges < k; i++) { + + // Ensure termination: + if(i >= p1 * p2) { + std::cout << + "Number of edges too large in random graph.\n"; + exit(27); + } + + // Draw the two nodes of a candidate edge: int node1 = a1[i % p1]; int node2 = a2[i % p2]; - if(node1 != 0 && node2 != 0) { - out->write_edge(node1, node2); - generated_edges++; + + // Skip edges with dummy nodes (added to make lists coprime): + if(node1 == 0) + continue; + if(node2 == 0) + continue; + + // For acyclic graphs, orient edges towards the greater node: + if(acyclic) { + // Skip loops: + if(node1 == node2) + continue; + // Make edge pointing towards the greater node: + if(node1 > node2) { + int swap = node1; + node1 = node2; + node2 = swap; + } + + // Because we changed edges, duplicates are possible: + bool duplicate = false; + for(int j = 0; j < i; j++) { + int n1 = a1[j % p1]; + int n2 = a2[j % p2]; + if((node1 == n1 && node2 == n2) || + (node1 == n2 && node2 == n1)) { + duplicate = true; + break; + } + } + if(duplicate) + continue; } + + // Ok, edge has passed all tests: + out->write_edge(node1, node2); + generated_edges++; } // Free allocated memory: @@ -1766,6 +1802,81 @@ void gen_graph_u(int n, int k, output_t out, gsize_t gsize) { gsize->set_num_edges(k); } +//----------------------------------------------------------------------------- +// U-Graph (Random Graph with Uniform Distribution of Node Degrees): +//----------------------------------------------------------------------------- + +// This Graph Generation Algorithm is more or less copied from OpenRuleBench. +// The first parameter n is the number of nodes. +// The second parameter k the number of edges. +// The two next prime numbers p1 and p2 greater than n are computed +// and two arrays a1 and a2 of size p1 and p2 are generated. +// These arrays are filled with the numbers 1..n and p1/p2 - n entries +// with the value 0. +// The two arrays are randomly shuffled and then a1[i mod p1] +// is connected with a2[i mod p2] and all connections with 0 are ignored. + +void gen_graph_u(int n, int k, output_t out, gsize_t gsize) { + + // Check that second parameter is defined: + if(k < 0) { + second_par_missing("U"); + } + + // Check relation between n and k: + if(k > n * n) { + std::cout << + "Impossible number of edges (too large) in U-Graph"; + exit(28); + } + + // Call joint procedure for graphs U and A: + gen_graph_u_a(n, k, out, gsize, false); +} + +//----------------------------------------------------------------------------- +// A-Graph (Acyclic Random Graph): +//----------------------------------------------------------------------------- + +// This is similar to the OpenRuleBench algorithm, but improved. +// OpenRuleBench made the Graph acyclic by ordering generated edges +// (as in the U-Graph, see above) towards the greater node. +// However, this generates some duplicate edges. +// Furthermore, the OpenRuleBench scripts allowed loops (edges from a node +// to itself). Therefore, the graphs were not acyclic, although claimed +// to be so. +// +// We improved this as follows: +// Loops are immediately skipped and a new edges is generated. +// We do orient the edges towards the greater node as in OpenRuleBench, +// but we use a duplicate check: +// Duplicate edges are skipped. +// +// Note that here the node degrees are not uniform: +// Because we orient all edges towards the greater node, +// node 1 has an in-degree of 0 and the last node (node n) an out-degree of 0. +// However, the total degree (sum of in and out degree) is quite uniformly +// distributed. + +void gen_graph_a(int n, int k, output_t out, gsize_t gsize) { + + // Check that second parameter is defined: + if(k < 0) { + second_par_missing("A"); + } + + // Check relation between n and k: + if(k > n * (n-1) / 2) { + std::cout << + "Impossible number of edges (too large) in A-Graph"; + exit(29); + } + + // Call joint procedure for graphs U and A: + gen_graph_u_a(n, k, out, gsize, true); +} + + //============================================================================= // The main Function: //============================================================================= @@ -1780,7 +1891,7 @@ int main(int argc, str_t argv[]) // The program should be called with the graph and the output files: if(argc < 3) { std::cout << "Usage: ./graph GraphID OutputFile1 ...\n"; - exit(26); + exit(30); } // Get graph parameters, first code: @@ -1789,7 +1900,7 @@ int main(int argc, str_t argv[]) char graph_code = *p++; if(graph_code == '\0') { std::cout << "Impossible empty Graph ID.\n"; - exit(27); + exit(31); } // First parameter: @@ -1800,7 +1911,7 @@ int main(int argc, str_t argv[]) par_chars[i] = 0; if(i == 0) { std::cout << "First parameter in graph ID missing.\n"; - exit(28); + exit(32); } int par1 = str_int(par_chars); if(*p == 'k') { @@ -1835,7 +1946,7 @@ int main(int argc, str_t argv[]) par_chars[i] = 0; if(i == 0) { std::cout << "Second parameter in graph ID missing.\n"; - exit(29); + exit(33); } par2 = str_int(par_chars); if(*p == 'k') { @@ -1865,7 +1976,7 @@ int main(int argc, str_t argv[]) // Check that we have successfully parsed the entire graph ID: if(*p != '\0') { std::cout << "Unexpected characters at the end of graph ID.\n"; - exit(30); + exit(34); } // Open output file(s): @@ -1946,13 +2057,12 @@ int main(int argc, str_t argv[]) case 'a': case 'A': // Acyclic random graph - std::cout << "Please use the other graph generator " << - "for this graph.\n"; - exit(31); + gen_graph_a(par1, par2, &output, &formula); + break; default: std::cout << "Unknown graph type '" << graph_code << "'.\n"; - exit(32); + exit(35); } // Output number of edges written: -- GitLab