def aggregate(self, samples, input_features, dims, num_samples, support_sizes, batch_size=None, aggregators=None, name=None, concat=False, model_size="small"): """ At each layer, aggregate hidden representations of neighbors to compute the hidden representations at next layer. Args: samples: a list of samples of variable hops away for convolving at each layer of the network. Length is the number of layers + 1. Each is a vector of node indices. input_features: the input features for each sample of various hops away. dims: a list of dimensions of the hidden representations from the input layer to the final layer. Length is the number of layers + 1. num_samples: list of number of samples for each layer. support_sizes: the number of nodes to gather information from for each layer. batch_size: the number of inputs (different for batch inputs and negative samples). Returns: The hidden representation at the final layer for all nodes in batch """ if batch_size is None: batch_size = self.batch_size # length: number of layers + 1 hidden = [tf.nn.embedding_lookup(input_features, node_samples) for node_samples in samples] new_agg = aggregators is None if new_agg: aggregators = [] for layer in range(len(num_samples)): if new_agg: dim_mult = 2 if concat and (layer != 0) else 1 # aggregator at current layer if layer == len(num_samples) - 1: aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1], act=lambda x : x, dropout=self.placeholders['dropout'], name=name, concat=concat, model_size=model_size) else: aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1], dropout=self.placeholders['dropout'], name=name, concat=concat, model_size=model_size) aggregators.append(aggregator) else: aggregator = aggregators[layer] # hidden representation at current layer for all support nodes that are various hops away next_hidden = [] # as layer increases, the number of support nodes needed decreases for hop in range(len(num_samples) - layer): dim_mult = 2 if concat and (layer != 0) else 1 neigh_dims = [batch_size * support_sizes[hop], num_samples[len(num_samples) - hop - 1], dim_mult*dims[layer]] h = aggregator((hidden[hop], tf.reshape(hidden[hop + 1], neigh_dims))) next_hidden.append(h) hidden = next_hidden return hidden[0], aggregators
GraphSAGE(Graph Sample and Aggregated)是一种用于图节点嵌入学习的图神经网络模型。推荐系统、聚合的过程可以通过多层的聚合器进行迭代。一个卷积神经网络等。
2.代码讲解与实战
采样代码:
def sample(self, inputs, layer_infos, batch_size=None): """ Sample neighbors to be the supportive fields for multi-layer convolutions. Args: inputs: batch inputs batch_size: the number of inputs (different for batch inputs and negative samples). """ if batch_size is None: batch_size = self.batch_size samples = [inputs] # size of convolution support at each layer per node support_size = 1 support_sizes = [support_size] for k in range(len(layer_infos)): t = len(layer_infos) - k - 1 support_size *= layer_infos[t].num_samples sampler = layer_infos[t].neigh_sampler node = sampler((samples[k], layer_infos[t].num_samples)) samples.append(tf.reshape(node, [support_size * batch_size,])) support_sizes.append(support_size) return samples, support_sizesSAGEInfo = namedtuple("SAGEInfo", ['layer_name', # name of the layer (to get feature embedding etc.) 'neigh_sampler', # callable neigh_sampler constructor 'num_samples', 'output_dim' # the output (i.e., hidden) dimension ])