Create algos_graphormer.pyx
Browse files- algos_graphormer.pyx +107 -0
algos_graphormer.pyx
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Microsoft Corporation and HuggingFace
|
| 2 |
+
# Licensed under the MIT License.
|
| 3 |
+
|
| 4 |
+
import cython
|
| 5 |
+
|
| 6 |
+
cimport numpy
|
| 7 |
+
from cython.parallel cimport parallel, prange
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Reduce this number if matrices are too big for large graphs
|
| 13 |
+
UNREACHABLE_NODE_DISTANCE = 510
|
| 14 |
+
|
| 15 |
+
def floyd_warshall(adjacency_matrix):
|
| 16 |
+
"""
|
| 17 |
+
Applies the Floyd-Warshall algorithm to the adjacency matrix, to compute the
|
| 18 |
+
shortest paths distance between all nodes, up to UNREACHABLE_NODE_DISTANCE.
|
| 19 |
+
"""
|
| 20 |
+
(nrows, ncols) = adjacency_matrix.shape
|
| 21 |
+
assert nrows == ncols
|
| 22 |
+
cdef unsigned int n = nrows
|
| 23 |
+
|
| 24 |
+
adj_mat_copy = adjacency_matrix.astype(np.int32, order='C', casting='safe', copy=True)
|
| 25 |
+
assert adj_mat_copy.flags['C_CONTIGUOUS']
|
| 26 |
+
cdef numpy.ndarray[numpy.int32_t, ndim=2, mode='c'] M = adj_mat_copy
|
| 27 |
+
cdef numpy.ndarray[numpy.int32_t, ndim=2, mode='c'] path = -1 * np.ones([n, n], dtype=np.int32)
|
| 28 |
+
|
| 29 |
+
cdef unsigned int i, j, k
|
| 30 |
+
cdef numpy.int32_t M_ij, M_ik, cost_ikkj
|
| 31 |
+
cdef numpy.int32_t* M_ptr = &M[0,0]
|
| 32 |
+
cdef numpy.int32_t* M_i_ptr
|
| 33 |
+
cdef numpy.int32_t* M_k_ptr
|
| 34 |
+
|
| 35 |
+
# set unreachable nodes distance to UNREACHABLE_NODE_DISTANCE
|
| 36 |
+
for i in range(n):
|
| 37 |
+
for j in range(n):
|
| 38 |
+
if i == j:
|
| 39 |
+
M[i][j] = 0
|
| 40 |
+
elif M[i][j] == 0:
|
| 41 |
+
M[i][j] = UNREACHABLE_NODE_DISTANCE
|
| 42 |
+
|
| 43 |
+
# floyed algo
|
| 44 |
+
for k in range(n):
|
| 45 |
+
M_k_ptr = M_ptr + n*k
|
| 46 |
+
for i in range(n):
|
| 47 |
+
M_i_ptr = M_ptr + n*i
|
| 48 |
+
M_ik = M_i_ptr[k]
|
| 49 |
+
for j in range(n):
|
| 50 |
+
cost_ikkj = M_ik + M_k_ptr[j]
|
| 51 |
+
M_ij = M_i_ptr[j]
|
| 52 |
+
if M_ij > cost_ikkj:
|
| 53 |
+
M_i_ptr[j] = cost_ikkj
|
| 54 |
+
path[i][j] = k
|
| 55 |
+
|
| 56 |
+
# set unreachable path to UNREACHABLE_NODE_DISTANCE
|
| 57 |
+
for i in range(n):
|
| 58 |
+
for j in range(n):
|
| 59 |
+
if M[i][j] >= UNREACHABLE_NODE_DISTANCE:
|
| 60 |
+
path[i][j] = UNREACHABLE_NODE_DISTANCE
|
| 61 |
+
M[i][j] = UNREACHABLE_NODE_DISTANCE
|
| 62 |
+
|
| 63 |
+
return M, path
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_all_edges(path, i, j):
|
| 67 |
+
"""
|
| 68 |
+
Recursive function to compute all possible paths between two nodes from the graph adjacency matrix.
|
| 69 |
+
"""
|
| 70 |
+
cdef int k = path[i][j]
|
| 71 |
+
if k == -1:
|
| 72 |
+
return []
|
| 73 |
+
else:
|
| 74 |
+
return get_all_edges(path, i, k) + [k] + get_all_edges(path, k, j)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def gen_edge_input(max_dist, path, edge_feat):
|
| 78 |
+
"""
|
| 79 |
+
Generates the full edge feature and adjacency matrix.
|
| 80 |
+
Shape: num_nodes * num_nodes * max_distance_between_nodes * num_edge_features
|
| 81 |
+
Dim 1 is the input node, dim 2 the output node of the edge, dim 3 the depth of the edge, dim 4 the feature
|
| 82 |
+
"""
|
| 83 |
+
(nrows, ncols) = path.shape
|
| 84 |
+
assert nrows == ncols
|
| 85 |
+
cdef unsigned int n = nrows
|
| 86 |
+
cdef unsigned int max_dist_copy = max_dist
|
| 87 |
+
|
| 88 |
+
path_copy = path.astype(long, order='C', casting='safe', copy=True)
|
| 89 |
+
edge_feat_copy = edge_feat.astype(long, order='C', casting='safe', copy=True)
|
| 90 |
+
assert path_copy.flags['C_CONTIGUOUS']
|
| 91 |
+
assert edge_feat_copy.flags['C_CONTIGUOUS']
|
| 92 |
+
|
| 93 |
+
cdef numpy.ndarray[numpy.int32_t, ndim=4, mode='c'] edge_fea_all = -1 * np.ones([n, n, max_dist_copy, edge_feat.shape[-1]], dtype=np.int32)
|
| 94 |
+
cdef unsigned int i, j, k, num_path, cur
|
| 95 |
+
|
| 96 |
+
for i in range(n):
|
| 97 |
+
for j in range(n):
|
| 98 |
+
if i == j:
|
| 99 |
+
continue
|
| 100 |
+
if path_copy[i][j] == UNREACHABLE_NODE_DISTANCE:
|
| 101 |
+
continue
|
| 102 |
+
path = [i] + get_all_edges(path_copy, i, j) + [j]
|
| 103 |
+
num_path = len(path) - 1
|
| 104 |
+
for k in range(num_path):
|
| 105 |
+
edge_fea_all[i, j, k, :] = edge_feat_copy[path[k], path[k+1], :]
|
| 106 |
+
|
| 107 |
+
return edge_fea_all
|