sna
In[1]:
import networkx as nx
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from networkx.readwrite import json_graph
import json
%matplotlib inline
In[2]:
h = nx.read_gpickle(graph.bin)
print nx.info(h)
Name: ()
Type: DiGraph
Number of nodes: 4120
Number of edges: 4678
Average in degree: 1.1354
Average out degree: 1.1354
In[3]:
from operator import itemgetter
def getTopCentrality(centralityFun, h, n):
c = sorted(centralityFun(h).items(), key=itemgetter(1), reverse=True)
top =c[:n]
print(| | | |)
for i, x in enumerate(top):
# print(%it%10st%f %(i+1, x[0], x[1]) )
print(|%2i| %12s| %f | %(i+1, x[0], x[1]))
return top
centraFun = [nx.degree_centrality, nx.in_degree_centrality,nx.out_degree_centrality, nx.betweenness_centrality, nx.closeness_centrality, nx.eigenvector_centrality, nx.pagerank]
names = [Degree Centrality, In-degree Centrality, Out-degree Centrality, Betweenness Centrality, Closeness Centrality, Eigenvector Centrality, Pagerank]
tops = []
for i in range(len(names)):
print(| Rank | User name | %18s| % names[i])
top = getTopCentrality(centraFun[i], h, 10)
print(
)
tops.append(top)
# getTopCentrality(nx.degree_centrality, h, 10)
| Rank | User name |Degree Centrality|
| | | |
| 1| angusshire| 0.267298 |
| 2|batermj| 0.051712 |
| 3| fly51fly| 0.043943 |
| 4| nelsonic| 0.042000 |
| 5|donnemartin| 0.027919 |
| 6| daimajia| 0.025492 |
| 7| trietptm| 0.021850 |
| 8|galaris| 0.020636 |
| 9| gauravssnl| 0.018208 |
|10| fperez| 0.017237 |
| Rank | User name | In-degree Centrality|
| | | |
| 1|donnemartin| 0.027919 |
| 2| daimajia| 0.025492 |
| 3| angusshire| 0.022578 |
| 4| fperez| 0.017237 |
| 5| amueller| 0.014324 |
| 6| mrocklin| 0.013596 |
| 7|Zulko| 0.013110 |
| 8| ppwwyyxx| 0.011411 |
| 9| pudo| 0.010439 |
|10|mahmoud| 0.009954 |
| Rank | User name | Out-degree Centrality|
| | | |
| 1| angusshire| 0.244720 |
| 2|batermj| 0.050012 |
| 3| fly51fly| 0.040787 |
| 4| nelsonic| 0.035931 |
| 5| trietptm| 0.018208 |
| 6|galaris| 0.017723 |
| 7| gauravssnl| 0.017480 |
| 8| radovankavicky| 0.013838 |
| 9| indrajithbandara| 0.008497 |
|10| vishalbelsare| 0.008012 |
| Rank | User name | Betweenness Centrality|
| | | |
| 1| angusshire| 0.025285 |
| 2| nelsonic| 0.008361 |
| 3| paulhendricks| 0.005906 |
| 4| pranitbauva1997| 0.005058 |
| 5| baya| 0.004657 |
| 6| hooopo| 0.004492 |
| 7| tonyseek| 0.003691 |
| 8| pirate| 0.002931 |
| 9|batermj| 0.002700 |
|10| OrkoHunter| 0.002674 |
| Rank | User name | Closeness Centrality|
| | | |
| 1| angusshire| 0.259906 |
| 2|batermj| 0.183126 |
| 3| indrajithbandara| 0.168880 |
| 4| paulhendricks| 0.167636 |
| 5| radovankavicky| 0.167528 |
| 6| fly51fly| 0.166674 |
| 7|galaris| 0.161429 |
| 8| trietptm| 0.158976 |
| 9| cprogrammer1994| 0.158735 |
|10|mcanthony| 0.158064 |
| Rank | User name | Eigenvector Centrality|
| | | |
| 1|donnemartin| 0.219502 |
| 2| angusshire| 0.213775 |
| 3| daimajia| 0.190127 |
| 4| fperez| 0.133724 |
| 5| pudo| 0.110548 |
| 6|byt3bl33d3r| 0.108596 |
| 7|Zulko| 0.104271 |
| 8| mrocklin| 0.101942 |
| 9| Miserlou| 0.101771 |
|10| amueller| 0.099386 |
| Rank | User name | Pagerank|
| | | |
| 1| daimajia| 0.010603 |
| 2| mrocklin| 0.009026 |
| 3|donnemartin| 0.008744 |
| 4| fperez| 0.008304 |
| 5|moskytw| 0.007320 |
| 6| angusshire| 0.007087 |
| 7| spitfire-sidra| 0.006225 |
| 8|Zulko| 0.005374 |
| 9|avikj| 0.004606 |
|10| amueller| 0.004512 |
In[4]:
commons = set()
for i in range(len(tops)):
commons = commons.union(set([x[0] for x in tops[i]]))
print(len(commons))
print(commons)
print(nx.eigenvector_centrality(h.subgraph(commons)))
print(| Rank | User name | In-degree Centrality| )
print(getTopCentrality(nx.in_degree_centrality, h.subgraph(commons), 10))
# nx.draw(h.subgraph(commons), with_labels = True)
# plt.savefig(labels.png)
# save to json which can be visualized by d3.js
json.dump(json_graph.node_link_data(h.subgraph(commons)), open(importantnodes.json, w))
33
set([ubyt3bl33d3r, ufperez, uOrkoHunter, udonnemartin, umoskytw, udaimajia, ubatermj, ucprogrammer1994, ufly51fly, umahmoud, uradovankavicky, uindrajithbandara, upaulhendricks, uppwwyyxx, uZulko, uhooopo, ubaya, uMiserlou, uangusshire, utonyseek, upranitbauva1997, upirate, uvishalbelsare, umrocklin, uavikj, uamueller, uspitfire-sidra, utrietptm, umcanthony, unelsonic, upudo, ugalaris, ugauravssnl])
{ubyt3bl33d3r: 0.19719231271273835, uhooopo: 0.09450000023689209, udonnemartin: 0.3539009811713699, udaimajia: 0.16267415047168873, ugalaris: 0.16910787668452737, ucprogrammer1994: 0.12915992525257552, ufly51fly: 0.17757265456849733, umahmoud: 0.08382607808344876, uradovankavicky: 0.2234214325447363, uindrajithbandara: 0.0929665995746708, upaulhendricks: 0.2903715258243272, utonyseek: 0.07663183566240735, umrocklin: 0.11912972170797892, uZulko: 0.14594240881909407, ufperez: 0.2841149892734136, ubaya: 0.11662124560695658, uMiserlou: 0.1739575998910423, uangusshire: 0.32901399486558913, uppwwyyxx: 0.12230052966009973, upranitbauva1997: 0.16834587516223534, utrietptm: 0.17757971840802272, upirate: 0.1657162657604965, uvishalbelsare: 0.13208976866920571, umoskytw: 0.07268429855953133, ubatermj: 0.16932098147709398, uamueller: 0.10857192315214592, uspitfire-sidra: 0.07268429855953133, uavikj: 0.0, umcanthony: 0.22312593285868107, unelsonic: 0.22100600407913354, upudo: 0.13059632639502589, uOrkoHunter: 0.08999353921630769, ugauravssnl: 0.07082891508865617}
| Rank | User name | In-degree Centrality|
| | | |
| 1|donnemartin| 0.312500 |
| 2| angusshire| 0.312500 |
| 3| paulhendricks| 0.250000 |
| 4| fperez| 0.250000 |
| 5| radovankavicky| 0.187500 |
| 6| nelsonic| 0.187500 |
| 7|byt3bl33d3r| 0.187500 |
| 8|galaris| 0.156250 |
| 9| mrocklin| 0.156250 |
|10|mcanthony| 0.156250 |
[(udonnemartin, 0.3125), (uangusshire, 0.3125), (upaulhendricks, 0.25), (ufperez, 0.25), (uradovankavicky, 0.1875), (unelsonic, 0.1875), (ubyt3bl33d3r, 0.1875), (ugalaris, 0.15625), (umrocklin, 0.15625), (umcanthony, 0.15625)]
In[5]:
commons = set(h.nodes())
for i in [5, 6]:
commons = commons.intersection(set([x[0] for x in tops[i]]))
commons
Out[5]:
{uZulko,
uamueller,
uangusshire,
udaimajia,
udonnemartin,
ufperez,
umrocklin}
In[6]:
commons = set(h.nodes())
for i in [0, 2]:
commons = commons.intersection(set([x[0] for x in tops[i]]))
commons
Out[6]:
{uangusshire,
ubatermj,
ufly51fly,
ugalaris,
ugauravssnl,
unelsonic,
utrietptm}
In[7]:
print(h.in_degree(udonnemartin))
print(h.out_degree(udonnemartin))
115
0
In[8]:
print(h.in_degree(udaimajia))
print(h.out_degree(udaimajia))
105
0
In[9]:
print(h.in_degree(uangusshire))
print(h.out_degree(uangusshire))
93
1008
In[10]:
print(h.in_degree(ubatermj))
print(h.out_degree(ubatermj))
7
206
In[11]:
nx.number_weakly_connected_components(h)
Out[11]:
2127
In[12]:
nx.number_strongly_connected_components(h)
Out[12]:
3764
In[13]:
wc = sorted(nx.weakly_connected_components(h), key = len, reverse=True)
In[14]:
print(nx.info(h.subgraph(wc[0])))
Name: ()
Type: DiGraph
Number of nodes: 1954
Number of edges: 4627
Average in degree: 2.3680
Average out degree: 2.3680
In[15]:
wcs = sorted(nx.strongly_connected_components(h), key = len, reverse=True)
print(nx.info(h.subgraph(wcs[0])))
Name: ()
Type: DiGraph
Number of nodes: 263
Number of edges: 919
Average in degree: 3.4943
Average out degree: 3.4943
In[16]:
print(len(wc))
lens = [len(x) for x in wc]
print(np.sum(np.array(lens) == 1))
print(lens[:10])
2127
2092
[1954, 4, 4, 3, 3, 2, 2, 2, 2, 2]
In[17]:
print(len(wcs))
lens = [len(x) for x in wcs]
print(np.sum(np.array(lens) == 1))
print(lens[:10])
3764
3702
[263, 7, 6, 6, 5, 5, 5, 4, 4, 3]
In[18]:
# nx.draw(h.subgraph(wcs[0]), with_labels=True)
# save to json which can be visualized by d3.js
json.dump(json_graph.node_link_data(h.subgraph(wcs[0])), open(largestStrongComponent.json, w))
In[19]:
degrees =sorted(nx.degree(h).values(),reverse=True)
In[20]:
import matplotlib.pyplot as plt
# plt.hist(degrees, bins = [1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 12, 13, 14, 15, 20, 40, 100])
plt.hist(nx.degree(h).values(), bins = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.hist(nx.degree(h).values(), bins = list(range(11)))
plt.title(Degree distribution)
plt.xlabel(Degree)
plt.ylabel(Number of nodes)
# plt.hist(nx.degree(h).values())
Out[20]:
In[21]:
import numpy as np
degrees = np.array(nx.degree(h).values())
for i in range(11):
num = sum(degrees == i)
print(| %d | %d | %(i, num))
print(np.sum(np.array(nx.degree(h).values()) > 10))
# print(| %d | %d | %(i, num))
| 0 | 2092 |
| 1 | 847 |
| 2 | 422 |
| 3 | 219 |
| 4 | 135 |
| 5 | 95 |
| 6 | 63 |
| 7 | 37 |
| 8 | 34 |
| 9 | 32 |
| 10 | 21 |
123
In[22]:
# nx.neighbors(h, donnemartin)
# nx.neighbors(h, angusshire)
# h[donnemartin]
# h.has_edge(angusshire, donnemartin)
nodes = h.nodes()
pairs = []
for i in range(len(nodes)):
for j in range(i+1, len(nodes)):
if(h.has_edge(nodes[i], nodes[j]) and h.has_edge(nodes[j], nodes[i])):
pairs.append((nodes[i], nodes[j]))
print(len(pairs))
print(pairs[:10])
335
[(ufffaraz, u1995parham), (ufffaraz, uTabrizian), (ugeekplux, ugaocegege), (updelong42, unielssorensen), (uboliza, ugeometrybase), (usorra, uAndriyLin), (uZeroCrystal, uriomus), (uashubly25, unelsonic), (uMarlysson, upirate), (uMarlysson, ualephmelo)]
In[23]:
plt.hist(h.in_degree().values(), bins = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
Out[23]:
(array([ 44.,13., 1., 5., 3., 0., 1., 0., 1.]),
array([ 10,20,30,40,50,60,70,80,90, 100]),
)
In[24]:
plt.hist(h.out_degree().values(), bins = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
Out[24]:
(array([ 31., 3., 5., 0., 1., 0., 3., 0., 0.]),
array([ 10,20,30,40,50,60,70,80,90, 100]),
)
In[25]:
plt.boxplot(nx.pagerank(h).values())
Out[25]:
{boxes: [
caps: [
fliers: [
means: [],
medians: [
whiskers: [
In[]:
Reviews
There are no reviews yet.