Create algos_graphormer.pyx
Browse files- algos_graphormer.pyx +107 -0
algos_graphormer.pyx
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Microsoft Corporation and HuggingFace
|
2 |
+
# Licensed under the MIT License.
|
3 |
+
|
4 |
+
import cython
|
5 |
+
|
6 |
+
cimport numpy
|
7 |
+
from cython.parallel cimport parallel, prange
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
|
12 |
+
# Reduce this number if matrices are too big for large graphs
|
13 |
+
UNREACHABLE_NODE_DISTANCE = 510
|
14 |
+
|
15 |
+
def floyd_warshall(adjacency_matrix):
|
16 |
+
"""
|
17 |
+
Applies the Floyd-Warshall algorithm to the adjacency matrix, to compute the
|
18 |
+
shortest paths distance between all nodes, up to UNREACHABLE_NODE_DISTANCE.
|
19 |
+
"""
|
20 |
+
(nrows, ncols) = adjacency_matrix.shape
|
21 |
+
assert nrows == ncols
|
22 |
+
cdef unsigned int n = nrows
|
23 |
+
|
24 |
+
adj_mat_copy = adjacency_matrix.astype(np.int32, order='C', casting='safe', copy=True)
|
25 |
+
assert adj_mat_copy.flags['C_CONTIGUOUS']
|
26 |
+
cdef numpy.ndarray[numpy.int32_t, ndim=2, mode='c'] M = adj_mat_copy
|
27 |
+
cdef numpy.ndarray[numpy.int32_t, ndim=2, mode='c'] path = -1 * np.ones([n, n], dtype=np.int32)
|
28 |
+
|
29 |
+
cdef unsigned int i, j, k
|
30 |
+
cdef numpy.int32_t M_ij, M_ik, cost_ikkj
|
31 |
+
cdef numpy.int32_t* M_ptr = &M[0,0]
|
32 |
+
cdef numpy.int32_t* M_i_ptr
|
33 |
+
cdef numpy.int32_t* M_k_ptr
|
34 |
+
|
35 |
+
# set unreachable nodes distance to UNREACHABLE_NODE_DISTANCE
|
36 |
+
for i in range(n):
|
37 |
+
for j in range(n):
|
38 |
+
if i == j:
|
39 |
+
M[i][j] = 0
|
40 |
+
elif M[i][j] == 0:
|
41 |
+
M[i][j] = UNREACHABLE_NODE_DISTANCE
|
42 |
+
|
43 |
+
# floyed algo
|
44 |
+
for k in range(n):
|
45 |
+
M_k_ptr = M_ptr + n*k
|
46 |
+
for i in range(n):
|
47 |
+
M_i_ptr = M_ptr + n*i
|
48 |
+
M_ik = M_i_ptr[k]
|
49 |
+
for j in range(n):
|
50 |
+
cost_ikkj = M_ik + M_k_ptr[j]
|
51 |
+
M_ij = M_i_ptr[j]
|
52 |
+
if M_ij > cost_ikkj:
|
53 |
+
M_i_ptr[j] = cost_ikkj
|
54 |
+
path[i][j] = k
|
55 |
+
|
56 |
+
# set unreachable path to UNREACHABLE_NODE_DISTANCE
|
57 |
+
for i in range(n):
|
58 |
+
for j in range(n):
|
59 |
+
if M[i][j] >= UNREACHABLE_NODE_DISTANCE:
|
60 |
+
path[i][j] = UNREACHABLE_NODE_DISTANCE
|
61 |
+
M[i][j] = UNREACHABLE_NODE_DISTANCE
|
62 |
+
|
63 |
+
return M, path
|
64 |
+
|
65 |
+
|
66 |
+
def get_all_edges(path, i, j):
|
67 |
+
"""
|
68 |
+
Recursive function to compute all possible paths between two nodes from the graph adjacency matrix.
|
69 |
+
"""
|
70 |
+
cdef int k = path[i][j]
|
71 |
+
if k == -1:
|
72 |
+
return []
|
73 |
+
else:
|
74 |
+
return get_all_edges(path, i, k) + [k] + get_all_edges(path, k, j)
|
75 |
+
|
76 |
+
|
77 |
+
def gen_edge_input(max_dist, path, edge_feat):
|
78 |
+
"""
|
79 |
+
Generates the full edge feature and adjacency matrix.
|
80 |
+
Shape: num_nodes * num_nodes * max_distance_between_nodes * num_edge_features
|
81 |
+
Dim 1 is the input node, dim 2 the output node of the edge, dim 3 the depth of the edge, dim 4 the feature
|
82 |
+
"""
|
83 |
+
(nrows, ncols) = path.shape
|
84 |
+
assert nrows == ncols
|
85 |
+
cdef unsigned int n = nrows
|
86 |
+
cdef unsigned int max_dist_copy = max_dist
|
87 |
+
|
88 |
+
path_copy = path.astype(long, order='C', casting='safe', copy=True)
|
89 |
+
edge_feat_copy = edge_feat.astype(long, order='C', casting='safe', copy=True)
|
90 |
+
assert path_copy.flags['C_CONTIGUOUS']
|
91 |
+
assert edge_feat_copy.flags['C_CONTIGUOUS']
|
92 |
+
|
93 |
+
cdef numpy.ndarray[numpy.int32_t, ndim=4, mode='c'] edge_fea_all = -1 * np.ones([n, n, max_dist_copy, edge_feat.shape[-1]], dtype=np.int32)
|
94 |
+
cdef unsigned int i, j, k, num_path, cur
|
95 |
+
|
96 |
+
for i in range(n):
|
97 |
+
for j in range(n):
|
98 |
+
if i == j:
|
99 |
+
continue
|
100 |
+
if path_copy[i][j] == UNREACHABLE_NODE_DISTANCE:
|
101 |
+
continue
|
102 |
+
path = [i] + get_all_edges(path_copy, i, j) + [j]
|
103 |
+
num_path = len(path) - 1
|
104 |
+
for k in range(num_path):
|
105 |
+
edge_fea_all[i, j, k, :] = edge_feat_copy[path[k], path[k+1], :]
|
106 |
+
|
107 |
+
return edge_fea_all
|