前置知识: 特征工程_图
依赖环境:networkx
数据和环境准备:
import networkx as nx
G = nx.karate_club_graph()
# 空手道俱乐部 34 名成员的社交网络
图的平均度
def average_degree(num_edges, num_nodes):
avg_degree = 2*num_edges/num_nodes
avg_degree = int(round(avg_degree))
return avg_degree
num_edges = G.number_of_edges()
num_nodes = G.number_of_nodes()
avg_degree = average_degree(num_edges, num_nodes)
print("Average degree of karate club network is {}".format(avg_degree))
集群系数
def average_clustering_coefficient(G):
avg_cluster_coef = nx.average_clustering(G)
avg_cluster_coef = round(avg_cluster_coef, 2)
return avg_cluster_coef
avg_cluster_coef = average_clustering_coefficient(G)
print("Average clustering coefficient of karate club network is {}".format(avg_cluster_coef))
PageRank 值
前置知识:PageRank 排序算法
# 方式1:迭代
def one_iter_pagerank(G, beta, r0, node_id):
r1 = 0
for ni in nx.neighbors(G,node_id):
di=G.degree[ni] # 遍历node0的邻节点(node_ni),并计算对应的度数
r1+=beta*r0/di # 更新1:累积邻节点的重要性
r1+=(1-beta)*(1/G.number_of_nodes()) # 更新2:累积随机游走的重要性
r1=round(r1,2)
return r1
beta = 0.8 # 阻尼系数
r0 = 1 / G.number_of_nodes() # 初始化PR值
node = 0
r1 = one_iter_pagerank(G, beta, r0, node)
print("The PageRank value for node 0 after one iteration is {}".format(r1))
# 方式2:直接调用nx.pagerank
紧密中心性
def closeness_centrality(G, node=5):
closeness = 0
path_length_total = 0
for path in list(nx.single_source_shortest_path(G,node).values())[1:]:
path_length_total += len(path)-1
closeness = 1 / path_length_total
closeness = round(closeness, 2)
return closeness
node = 5
closeness = closeness_centrality(G, node=node)
print("The karate club network has closeness centrality (raw) {:.2f}".format(closeness))