From 31685075b8fb0f41ad08da76a04bfa75106f7b8f Mon Sep 17 00:00:00 2001
From: Stefan Brass <stefan.brass@informatik.uni-halle.de>
Date: Fri, 4 Oct 2019 10:58:00 +0200
Subject: [PATCH] Graph generator

---
 graph/graph.cpp | 168 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 139 insertions(+), 29 deletions(-)

diff --git a/graph/graph.cpp b/graph/graph.cpp
index a3ed67b..fe72c01 100644
--- a/graph/graph.cpp
+++ b/graph/graph.cpp
@@ -2,7 +2,7 @@
 // Project:	rbench - Logic Programming and Database Benchmarks
 // Filename:	graph/graph.cpp
 // Purpose:	Alternative Graph Generator
-// Last Change:	10.05.2019
+// Last Change:	03.10.2019
 // Language:	C++
 // EMail:	brass@informatik.uni-halle.de
 // WWW:		http://www.informatik.uni-halle.de/~brass/
@@ -1421,6 +1421,12 @@ void gen_graph_s(int n, int k, output_t out, gsize_t gsize) {
 		exit(25);
 	}
 
+	// Check that k is not too large:
+	if(n < (k+1)) {
+		std::cout << "S[n,k] requires that n > k.\n";
+		exit(26);
+	}
+
 	// Set number of nodes:
 	out->set_nodes(n);
 
@@ -1705,25 +1711,13 @@ void gen_graph_d(int n, int k, output_t out, gsize_t gsize) {
 }
 
 //-----------------------------------------------------------------------------
-// U-Graph (Random Graph with Uniform Distribution of Node Degrees):
+// Joint Procedure for U-Graph and A-Graph (does the real work):
 //-----------------------------------------------------------------------------
 
-// This Graph Generation Algorithm is more or less copied from OpenRuleBench.
-// The first parameter n is the number of nodes.
-// The second parameter k the number of edges.
-// The two next prime numbers p1 and p2 greater than n are computed
-// and two arrays a1 and a2 of size p1 and p2 are generated.
-// These arrays are filled with the numbers 1..n and p1/p2 - n entries
-// with the value 0.
-// The two arrays are randomly shuffled and then a1[i mod p1]
-// is connected with a2[i mod p2] and all connections with 0 are ignored.
-
-void gen_graph_u(int n, int k, output_t out, gsize_t gsize) {
+// The graph generation algorithm is from OpenRuleBench with slight
+// modifications.
 
-	// Check that second parameter is defined:
-	if(k < 0) {
-		second_par_missing("U");
-	}
+void gen_graph_u_a(int n, int k, output_t out, gsize_t gsize, bool acyclic) {
 
 	// Set number of nodes:
 	out->set_nodes(n);
@@ -1749,12 +1743,54 @@ void gen_graph_u(int n, int k, output_t out, gsize_t gsize) {
 	// Generate edges:
 	int generated_edges = 0;
 	for(int i = 0; generated_edges < k; i++) {
+
+		// Ensure termination:
+		if(i >= p1 * p2) {
+			std::cout <<
+				"Number of edges too large in random graph.\n";
+			exit(27);
+		}
+
+		// Draw the two nodes of a candidate edge:
 		int node1 = a1[i % p1];
 		int node2 = a2[i % p2];
-		if(node1 != 0 && node2 != 0) {
-			out->write_edge(node1, node2);
-			generated_edges++;
+
+		// Skip edges with dummy nodes (added to make lists coprime):
+		if(node1 == 0)
+			continue;
+		if(node2 == 0)
+			continue;
+
+		// For acyclic graphs, orient edges towards the greater node:
+		if(acyclic) {
+			// Skip loops:
+			if(node1 == node2)
+				continue;
+			// Make edge pointing towards the greater node:
+			if(node1 > node2) {
+				int swap = node1;
+				node1 = node2;
+				node2 = swap;
+			}
+
+			// Because we changed edges, duplicates are possible:
+			bool duplicate = false;
+			for(int j = 0; j < i; j++) {
+				int n1 = a1[j % p1];
+				int n2 = a2[j % p2];
+				if((node1 == n1 && node2 == n2) ||
+					(node1 == n2 && node2 == n1)) {
+					duplicate = true;
+					break;
+				}
+			}
+			if(duplicate)
+				continue;
 		}
+
+		// Ok, edge has passed all tests:
+		out->write_edge(node1, node2);
+		generated_edges++;
 	}
 
 	// Free allocated memory:
@@ -1766,6 +1802,81 @@ void gen_graph_u(int n, int k, output_t out, gsize_t gsize) {
 	gsize->set_num_edges(k);
 }
 
+//-----------------------------------------------------------------------------
+// U-Graph (Random Graph with Uniform Distribution of Node Degrees):
+//-----------------------------------------------------------------------------
+
+// This Graph Generation Algorithm is more or less copied from OpenRuleBench.
+// The first parameter n is the number of nodes.
+// The second parameter k the number of edges.
+// The two next prime numbers p1 and p2 greater than n are computed
+// and two arrays a1 and a2 of size p1 and p2 are generated.
+// These arrays are filled with the numbers 1..n and p1/p2 - n entries
+// with the value 0.
+// The two arrays are randomly shuffled and then a1[i mod p1]
+// is connected with a2[i mod p2] and all connections with 0 are ignored.
+
+void gen_graph_u(int n, int k, output_t out, gsize_t gsize) {
+
+	// Check that second parameter is defined:
+	if(k < 0) {
+		second_par_missing("U");
+	}
+
+	// Check relation between n and k:
+	if(k > n * n) {
+		std::cout <<
+			"Impossible number of edges (too large) in U-Graph";
+		exit(28);
+	}
+
+	// Call joint procedure for graphs U and A:
+	gen_graph_u_a(n, k, out, gsize, false);
+}
+
+//-----------------------------------------------------------------------------
+// A-Graph (Acyclic Random Graph):
+//-----------------------------------------------------------------------------
+
+// This is similar to the OpenRuleBench algorithm, but improved.
+// OpenRuleBench made the Graph acyclic by ordering generated edges
+// (as in the U-Graph, see above) towards the greater node.
+// However, this generates some duplicate edges.
+// Furthermore, the OpenRuleBench scripts allowed loops (edges from a node
+// to itself). Therefore, the graphs were not acyclic, although claimed
+// to be so.
+//
+// We improved this as follows:
+// Loops are immediately skipped and a new edges is generated.
+// We do orient the edges towards the greater node as in OpenRuleBench,
+// but we use a duplicate check:
+// Duplicate edges are skipped.
+//
+// Note that here the node degrees are not uniform:
+// Because we orient all edges towards the greater node,
+// node 1 has an in-degree of 0 and the last node (node n) an out-degree of 0.
+// However, the total degree (sum of in and out degree) is quite uniformly
+// distributed.
+
+void gen_graph_a(int n, int k, output_t out, gsize_t gsize) {
+
+	// Check that second parameter is defined:
+	if(k < 0) {
+		second_par_missing("A");
+	}
+
+	// Check relation between n and k:
+	if(k > n * (n-1) / 2) {
+		std::cout <<
+			"Impossible number of edges (too large) in A-Graph";
+		exit(29);
+	}
+
+	// Call joint procedure for graphs U and A:
+	gen_graph_u_a(n, k, out, gsize, true);
+}
+
+
 //=============================================================================
 // The main Function:
 //=============================================================================
@@ -1780,7 +1891,7 @@ int main(int argc, str_t argv[])
 	// The program should be called with the graph and the output files:
 	if(argc < 3) {
 		std::cout << "Usage: ./graph GraphID OutputFile1 ...\n";
-		exit(26);
+		exit(30);
 	}
 
 	// Get graph parameters, first code:
@@ -1789,7 +1900,7 @@ int main(int argc, str_t argv[])
 	char graph_code = *p++;
 	if(graph_code == '\0') {
 		std::cout << "Impossible empty Graph ID.\n";
-		exit(27);
+		exit(31);
 	}
 
 	// First parameter:
@@ -1800,7 +1911,7 @@ int main(int argc, str_t argv[])
 	par_chars[i] = 0;
 	if(i == 0) {
 		std::cout << "First parameter in graph ID missing.\n";
-		exit(28);
+		exit(32);
 	}
 	int par1 = str_int(par_chars);
 	if(*p == 'k') {
@@ -1835,7 +1946,7 @@ int main(int argc, str_t argv[])
 		par_chars[i] = 0;
 		if(i == 0) {
 			std::cout << "Second parameter in graph ID missing.\n";
-			exit(29);
+			exit(33);
 		}
 		par2 = str_int(par_chars);
 		if(*p == 'k') {
@@ -1865,7 +1976,7 @@ int main(int argc, str_t argv[])
 	// Check that we have successfully parsed the entire graph ID:
 	if(*p != '\0') {
 		std::cout << "Unexpected characters at the end of graph ID.\n";
-		exit(30);
+		exit(34);
 	}
 
 	// Open output file(s):
@@ -1946,13 +2057,12 @@ int main(int argc, str_t argv[])
 		case 'a':
 		case 'A':
 			// Acyclic random graph
-			std::cout << "Please use the other graph generator " <<
-				"for this graph.\n";
-			exit(31);
+			gen_graph_a(par1, par2, &output, &formula);
+			break;
 		default:
 			std::cout << "Unknown graph type '" << graph_code <<
 				"'.\n";
-			exit(32);
+			exit(35);
 	}
 
 	// Output number of edges written:
-- 
GitLab