图特征工程_Python实现

前置知识: 特征工程_图

依赖环境:networkx

数据和环境准备:

import networkx as nx

G = nx.karate_club_graph()
# 空手道俱乐部 34 名成员的社交网络

图的平均度

def average_degree(num_edges, num_nodes):
    avg_degree = 2*num_edges/num_nodes
    avg_degree = int(round(avg_degree))
    return avg_degree

num_edges = G.number_of_edges()
num_nodes = G.number_of_nodes()
avg_degree = average_degree(num_edges, num_nodes)
print("Average degree of karate club network is {}".format(avg_degree))

集群系数

def average_clustering_coefficient(G):
    avg_cluster_coef = nx.average_clustering(G)
    avg_cluster_coef = round(avg_cluster_coef, 2)
    return avg_cluster_coef

avg_cluster_coef = average_clustering_coefficient(G)
print("Average clustering coefficient of karate club network is {}".format(avg_cluster_coef))

PageRank 值

前置知识:PageRank 排序算法

# 方式1:迭代
def one_iter_pagerank(G, beta, r0, node_id):
  r1 = 0
  for ni in nx.neighbors(G,node_id):
    di=G.degree[ni]  # 遍历node0的邻节点(node_ni),并计算对应的度数
    r1+=beta*r0/di # 更新1:累积邻节点的重要性
  r1+=(1-beta)*(1/G.number_of_nodes()) # 更新2:累积随机游走的重要性
  r1=round(r1,2)
  return r1

beta = 0.8 # 阻尼系数
r0 = 1 / G.number_of_nodes() # 初始化PR值
node = 0
r1 = one_iter_pagerank(G, beta, r0, node)
print("The PageRank value for node 0 after one iteration is {}".format(r1))

# 方式2:直接调用nx.pagerank

紧密中心性

def closeness_centrality(G, node=5):
    closeness = 0
    path_length_total = 0 
    for path in list(nx.single_source_shortest_path(G,node).values())[1:]:
        path_length_total += len(path)-1
    closeness = 1 / path_length_total
    closeness = round(closeness, 2)
    return closeness

node = 5
closeness = closeness_centrality(G, node=node)
print("The karate club network has closeness centrality (raw) {:.2f}".format(closeness))

往年同期文章