Network Clustering¶

In [1]:
# Load the libraries
import pandas as pd
import networkx
import matplotlib.pyplot as plt
import numpy as np
In [2]:
# Load the data of the network of the first book of A Song of Ice and Fire
asoiaf_df = pd.read_csv('Data/asoiaf-book1-edges.csv')
asoiaf_df
Out[2]:
Source Target Type weight book
0 Addam-Marbrand Jaime-Lannister Undirected 3 1
1 Addam-Marbrand Tywin-Lannister Undirected 6 1
2 Aegon-I-Targaryen Daenerys-Targaryen Undirected 5 1
3 Aegon-I-Targaryen Eddard-Stark Undirected 4 1
4 Aemon-Targaryen-(Maester-Aemon) Alliser-Thorne Undirected 4 1
... ... ... ... ... ...
679 Tyrion-Lannister Willis-Wode Undirected 4 1
680 Tyrion-Lannister Yoren Undirected 10 1
681 Tywin-Lannister Varys Undirected 4 1
682 Tywin-Lannister Walder-Frey Undirected 8 1
683 Waymar-Royce Will-(prologue) Undirected 18 1

684 rows × 5 columns

In [3]:
# Create a Network from a Pandas dataframe
G = networkx.from_pandas_edgelist(asoiaf_df, 'Source', 'Target', 'weight')
G['Jon-Snow']
Out[3]:
AtlasView({'Aemon-Targaryen-(Maester-Aemon)': {'weight': 34}, 'Albett': {'weight': 5}, 'Alliser-Thorne': {'weight': 32}, 'Arya-Stark': {'weight': 37}, 'Benjen-Stark': {'weight': 41}, 'Bowen-Marsh': {'weight': 10}, 'Bran-Stark': {'weight': 56}, 'Catelyn-Stark': {'weight': 14}, 'Cersei-Lannister': {'weight': 4}, 'Chett': {'weight': 4}, 'Dareon': {'weight': 6}, 'Donal-Noye': {'weight': 7}, 'Dywen': {'weight': 4}, 'Eddard-Stark': {'weight': 38}, 'Grenn': {'weight': 31}, 'Halder': {'weight': 23}, 'Hobb': {'weight': 5}, 'Jaremy-Rykker': {'weight': 7}, 'Jeor-Mormont': {'weight': 81}, 'Joffrey-Baratheon': {'weight': 10}, 'Jory-Cassel': {'weight': 3}, 'Luwin': {'weight': 10}, 'Matthar': {'weight': 3}, 'Mordane': {'weight': 4}, 'Othor': {'weight': 5}, 'Pypar': {'weight': 45}, 'Rast': {'weight': 8}, 'Rickon-Stark': {'weight': 11}, 'Robb-Stark': {'weight': 53}, 'Robert-Baratheon': {'weight': 18}, 'Rodrik-Cassel': {'weight': 3}, 'Samwell-Tarly': {'weight': 81}, 'Sansa-Stark': {'weight': 8}, 'Theon-Greyjoy': {'weight': 8}, 'Todder': {'weight': 13}, 'Tyrion-Lannister': {'weight': 56}, 'Yoren': {'weight': 6}})
In [4]:
# Calculate the number of triangles each node is in
triangle_dict = networkx.triangles(G)
triangle_dict
Out[4]:
{'Addam-Marbrand': 1,
 'Jaime-Lannister': 144,
 'Tywin-Lannister': 77,
 'Aegon-I-Targaryen': 1,
 'Daenerys-Targaryen': 55,
 'Eddard-Stark': 285,
 'Aemon-Targaryen-(Maester-Aemon)': 12,
 'Alliser-Thorne': 26,
 'Bowen-Marsh': 10,
 'Chett': 3,
 'Clydas': 0,
 'Jeor-Mormont': 30,
 'Jon-Snow': 140,
 'Samwell-Tarly': 27,
 'Aerys-II-Targaryen': 10,
 'Brandon-Stark': 7,
 'Gerold-Hightower': 3,
 'Jon-Arryn': 61,
 'Robert-Baratheon': 260,
 'Aggo': 11,
 'Drogo': 48,
 'Jhogo': 12,
 'Jorah-Mormont': 37,
 'Quaro': 8,
 'Rakharo': 8,
 'Albett': 3,
 'Halder': 19,
 'Rast': 12,
 'Grenn': 16,
 'Pypar': 17,
 'Tyrion-Lannister': 187,
 'Alyn': 7,
 'Harwin': 4,
 'Jory-Cassel': 82,
 'Tomard': 13,
 'Arthur-Dayne': 0,
 'Arya-Stark': 126,
 'Benjen-Stark': 40,
 'Bran-Stark': 149,
 'Catelyn-Stark': 175,
 'Cersei-Lannister': 179,
 'Desmond': 1,
 'Ilyn-Payne': 28,
 'Jeyne-Poole': 12,
 'Joffrey-Baratheon': 183,
 'Meryn-Trant': 25,
 'Mordane': 42,
 'Mycah': 10,
 'Myrcella-Baratheon': 19,
 'Petyr-Baelish': 155,
 'Rickon-Stark': 33,
 'Robb-Stark': 154,
 'Rodrik-Cassel': 77,
 'Sandor-Clegane': 75,
 'Sansa-Stark': 199,
 'Syrio-Forel': 2,
 'Tommen-Baratheon': 20,
 'Vayon-Poole': 10,
 'Yoren': 18,
 'Arys-Oakheart': 0,
 'Balon-Greyjoy': 1,
 'Balon-Swann': 1,
 'Renly-Baratheon': 91,
 'Barristan-Selmy': 68,
 'Boros-Blount': 10,
 'Pycelle': 67,
 'Varys': 93,
 'Jaremy-Rykker': 3,
 'Luwin': 60,
 'Mance-Rayder': 0,
 'Theon-Greyjoy': 57,
 'Waymar-Royce': 1,
 'Beric-Dondarrion': 10,
 'Gregor-Clegane': 42,
 'Loras-Tyrell': 49,
 'Thoros-of-Myr': 0,
 'Hali': 0,
 'Hallis-Mollen': 10,
 'Hodor': 6,
 'Hullen': 10,
 'Joseth': 1,
 'Nan': 6,
 'Osha': 3,
 'Rickard-Karstark': 23,
 'Rickard-Stark': 0,
 'Stiv': 1,
 'Lyanna-Stark': 5,
 'Bronn': 18,
 'Chiggen': 3,
 'Marillion': 10,
 'Shae': 1,
 'Shagga': 3,
 'Vardis-Egen': 8,
 'Willis-Wode': 12,
 'Brynden-Tully': 29,
 'Edmure-Tully': 23,
 'Hoster-Tully': 8,
 'Lysa-Arryn': 42,
 'Nestor-Royce': 1,
 'Walder-Frey': 11,
 'Colemon': 1,
 'Donnel-Waynwood': 0,
 'Eon-Hunter': 3,
 'Jon-Umber-(Greatjon)': 15,
 'Masha-Heddle': 0,
 'Moreo-Tumitis': 0,
 'Mya-Stone': 0,
 'Mychel-Redfort': 0,
 'Robert-Arryn': 3,
 'Stevron-Frey': 1,
 'Tytos-Blackwood': 1,
 'Wendel-Manderly': 1,
 'Cayn': 2,
 'Janos-Slynt': 21,
 'Stannis-Baratheon': 46,
 'Chella': 0,
 'Clement-Piper': 0,
 'Karyl-Vance': 1,
 'Cohollo': 6,
 'Haggo': 11,
 'Qotho': 14,
 'Conn': 1,
 'Coratt': 0,
 'Doreah': 14,
 'Eroeh': 0,
 'Illyrio-Mopatis': 14,
 'Irri': 21,
 'Jhiqui': 10,
 'Mirri-Maz-Duur': 12,
 'Rhaegar-Targaryen': 12,
 'Viserys-Targaryen': 21,
 'Danwell-Frey': 1,
 'Hosteen-Frey': 1,
 'Jared-Frey': 1,
 'Dareon': 0,
 'Daryn-Hornwood': 1,
 'Torrhen-Karstark': 2,
 'Dolf': 0,
 'Donal-Noye': 0,
 'Jommo': 1,
 'Ogo': 1,
 'Rhaego': 0,
 'Dywen': 0,
 'Galbart-Glover': 11,
 'Gendry': 0,
 'High-Septon-(fat_one)': 1,
 'Howland-Reed': 0,
 'Jacks': 0,
 'Joss': 0,
 'Marq-Piper': 5,
 'Porther': 0,
 'Raymun-Darry': 1,
 'Tobho-Mott': 0,
 'Tregar': 0,
 'Varly': 0,
 'Wyl-(guard)': 4,
 'Wylla': 0,
 'Fogo': 0,
 'Roose-Bolton': 8,
 'Gared': 1,
 'Will-(prologue)': 1,
 'Oswell-Whent': 0,
 'Todder': 6,
 'Gunthor-son-of-Gurn': 0,
 'Harys-Swyft': 0,
 'Heward': 1,
 'Hobb': 0,
 'Hugh': 0,
 'Jafer-Flowers': 0,
 'Kevan-Lannister': 3,
 'Matthar': 0,
 'Othor': 0,
 'Maege-Mormont': 1,
 'Jonos-Bracken': 0,
 'Jyck': 1,
 'Morrec': 1,
 'Kurleket': 0,
 'Lancel-Lannister': 0,
 'Leo-Lefford': 0,
 'Mace-Tyrell': 0,
 'Lyn-Corbray': 0,
 'Paxter-Redwyne': 0,
 'Maegor-I-Targaryen': 0,
 'Mord': 0,
 'Randyll-Tarly': 0,
 'Timett': 1,
 'Ulf-son-of-Umar': 0}
In [5]:
# The clustering of a node is the fraction of possible triangles through that node that exist
clustering_dict = networkx.clustering(G)
clustering_dict
Out[5]:
{'Addam-Marbrand': 1.0,
 'Jaime-Lannister': 0.35467980295566504,
 'Tywin-Lannister': 0.3333333333333333,
 'Aegon-I-Targaryen': 1.0,
 'Daenerys-Targaryen': 0.2619047619047619,
 'Eddard-Stark': 0.13286713286713286,
 'Aemon-Targaryen-(Maester-Aemon)': 0.5714285714285714,
 'Alliser-Thorne': 0.5777777777777777,
 'Bowen-Marsh': 1.0,
 'Chett': 1.0,
 'Clydas': 0,
 'Jeor-Mormont': 0.38461538461538464,
 'Jon-Snow': 0.21021021021021022,
 'Samwell-Tarly': 0.4090909090909091,
 'Aerys-II-Targaryen': 0.6666666666666666,
 'Brandon-Stark': 0.4666666666666667,
 'Gerold-Hightower': 0.3,
 'Jon-Arryn': 0.580952380952381,
 'Robert-Baratheon': 0.21224489795918366,
 'Aggo': 0.7333333333333333,
 'Drogo': 0.2807017543859649,
 'Jhogo': 0.8,
 'Jorah-Mormont': 0.47435897435897434,
 'Quaro': 0.8,
 'Rakharo': 0.8,
 'Albett': 1.0,
 'Halder': 0.6785714285714286,
 'Rast': 0.8,
 'Grenn': 0.7619047619047619,
 'Pypar': 0.8095238095238095,
 'Tyrion-Lannister': 0.18067632850241547,
 'Alyn': 0.7,
 'Harwin': 0.6666666666666666,
 'Jory-Cassel': 0.3904761904761905,
 'Tomard': 0.6190476190476191,
 'Arthur-Dayne': 0,
 'Arya-Stark': 0.358974358974359,
 'Benjen-Stark': 0.43956043956043955,
 'Bran-Stark': 0.3004032258064516,
 'Catelyn-Stark': 0.1937984496124031,
 'Cersei-Lannister': 0.4114942528735632,
 'Desmond': 1.0,
 'Ilyn-Payne': 0.7777777777777778,
 'Jeyne-Poole': 0.8,
 'Joffrey-Baratheon': 0.4206896551724138,
 'Meryn-Trant': 0.6944444444444444,
 'Mordane': 0.7636363636363637,
 'Mycah': 1.0,
 'Myrcella-Baratheon': 0.9047619047619048,
 'Petyr-Baelish': 0.47692307692307695,
 'Rickon-Stark': 0.9166666666666666,
 'Robb-Stark': 0.25882352941176473,
 'Rodrik-Cassel': 0.5032679738562091,
 'Sandor-Clegane': 0.625,
 'Sansa-Stark': 0.334453781512605,
 'Syrio-Forel': 0.6666666666666666,
 'Tommen-Baratheon': 0.7142857142857143,
 'Vayon-Poole': 1.0,
 'Yoren': 0.8571428571428571,
 'Arys-Oakheart': 0,
 'Balon-Greyjoy': 1.0,
 'Balon-Swann': 1.0,
 'Renly-Baratheon': 0.5947712418300654,
 'Barristan-Selmy': 0.7472527472527473,
 'Boros-Blount': 1.0,
 'Pycelle': 0.7362637362637363,
 'Varys': 0.6838235294117647,
 'Jaremy-Rykker': 0.5,
 'Luwin': 0.39215686274509803,
 'Mance-Rayder': 0,
 'Theon-Greyjoy': 0.5428571428571428,
 'Waymar-Royce': 0.3333333333333333,
 'Beric-Dondarrion': 0.6666666666666666,
 'Gregor-Clegane': 0.6363636363636364,
 'Loras-Tyrell': 0.5384615384615384,
 'Thoros-of-Myr': 0,
 'Hali': 0,
 'Hallis-Mollen': 1.0,
 'Hodor': 1.0,
 'Hullen': 0.6666666666666666,
 'Joseth': 1.0,
 'Nan': 1.0,
 'Osha': 1.0,
 'Rickard-Karstark': 0.5111111111111111,
 'Rickard-Stark': 0,
 'Stiv': 1.0,
 'Lyanna-Stark': 0.8333333333333334,
 'Bronn': 0.4,
 'Chiggen': 1.0,
 'Marillion': 1.0,
 'Shae': 1.0,
 'Shagga': 0.2,
 'Vardis-Egen': 0.5333333333333333,
 'Willis-Wode': 0.8,
 'Brynden-Tully': 0.5272727272727272,
 'Edmure-Tully': 0.41818181818181815,
 'Hoster-Tully': 0.8,
 'Lysa-Arryn': 0.4,
 'Nestor-Royce': 1.0,
 'Walder-Frey': 0.5238095238095238,
 'Colemon': 1.0,
 'Donnel-Waynwood': 0,
 'Eon-Hunter': 1.0,
 'Jon-Umber-(Greatjon)': 0.5357142857142857,
 'Masha-Heddle': 0,
 'Moreo-Tumitis': 0,
 'Mya-Stone': 0,
 'Mychel-Redfort': 0,
 'Robert-Arryn': 1.0,
 'Stevron-Frey': 1.0,
 'Tytos-Blackwood': 0.3333333333333333,
 'Wendel-Manderly': 1.0,
 'Cayn': 0.6666666666666666,
 'Janos-Slynt': 1.0,
 'Stannis-Baratheon': 0.696969696969697,
 'Chella': 0,
 'Clement-Piper': 0,
 'Karyl-Vance': 0.3333333333333333,
 'Cohollo': 1.0,
 'Haggo': 0.7333333333333333,
 'Qotho': 0.6666666666666666,
 'Conn': 0.3333333333333333,
 'Coratt': 0,
 'Doreah': 0.9333333333333333,
 'Eroeh': 0,
 'Illyrio-Mopatis': 0.9333333333333333,
 'Irri': 0.75,
 'Jhiqui': 1.0,
 'Mirri-Maz-Duur': 0.8,
 'Rhaegar-Targaryen': 0.5714285714285714,
 'Viserys-Targaryen': 0.75,
 'Danwell-Frey': 0.3333333333333333,
 'Hosteen-Frey': 1.0,
 'Jared-Frey': 1.0,
 'Dareon': 0,
 'Daryn-Hornwood': 1.0,
 'Torrhen-Karstark': 0.6666666666666666,
 'Dolf': 0,
 'Donal-Noye': 0,
 'Jommo': 1.0,
 'Ogo': 0.3333333333333333,
 'Rhaego': 0,
 'Dywen': 0,
 'Galbart-Glover': 0.7333333333333333,
 'Gendry': 0,
 'High-Septon-(fat_one)': 1.0,
 'Howland-Reed': 0,
 'Jacks': 0,
 'Joss': 0,
 'Marq-Piper': 0.5,
 'Porther': 0,
 'Raymun-Darry': 1.0,
 'Tobho-Mott': 0,
 'Tregar': 0,
 'Varly': 0,
 'Wyl-(guard)': 0.6666666666666666,
 'Wylla': 0,
 'Fogo': 0,
 'Roose-Bolton': 0.8,
 'Gared': 0.3333333333333333,
 'Will-(prologue)': 1.0,
 'Oswell-Whent': 0,
 'Todder': 1.0,
 'Gunthor-son-of-Gurn': 0,
 'Harys-Swyft': 0,
 'Heward': 1.0,
 'Hobb': 0,
 'Hugh': 0,
 'Jafer-Flowers': 0,
 'Kevan-Lannister': 1.0,
 'Matthar': 0,
 'Othor': 0,
 'Maege-Mormont': 1.0,
 'Jonos-Bracken': 0,
 'Jyck': 1.0,
 'Morrec': 1.0,
 'Kurleket': 0,
 'Lancel-Lannister': 0,
 'Leo-Lefford': 0,
 'Mace-Tyrell': 0,
 'Lyn-Corbray': 0,
 'Paxter-Redwyne': 0,
 'Maegor-I-Targaryen': 0,
 'Mord': 0,
 'Randyll-Tarly': 0,
 'Timett': 1.0,
 'Ulf-son-of-Umar': 0}
In [6]:
# Create a dataframe from the triangle dictionary
triangle_df = pd.DataFrame({'Name': triangle_dict.keys(), 'Triangles': triangle_dict.values()})
triangle_df = triangle_df.sort_values(by='Triangles', ascending=False)
triangle_df
Out[6]:
Name Triangles
5 Eddard-Stark 285
18 Robert-Baratheon 260
54 Sansa-Stark 199
30 Tyrion-Lannister 187
44 Joffrey-Baratheon 183
... ... ...
152 Tobho-Mott 0
153 Tregar 0
76 Hali 0
156 Wylla 0
186 Ulf-son-of-Umar 0

187 rows × 2 columns

In [7]:
# Create a dataframe from the clustering dictionary
clustering_df = pd.DataFrame({'Name': clustering_dict.keys(), 'Clustering': clustering_dict.values()})
clustering_df
Out[7]:
Name Clustering
0 Addam-Marbrand 1.000000
1 Jaime-Lannister 0.354680
2 Tywin-Lannister 0.333333
3 Aegon-I-Targaryen 1.000000
4 Daenerys-Targaryen 0.261905
... ... ...
182 Maegor-I-Targaryen 0.000000
183 Mord 0.000000
184 Randyll-Tarly 0.000000
185 Timett 1.000000
186 Ulf-son-of-Umar 0.000000

187 rows × 2 columns

In [8]:
# Merge the two dataframes
triangle_and_clustering_df = pd.merge(triangle_df, clustering_df, on='Name')
triangle_and_clustering_df
Out[8]:
Name Triangles Clustering
0 Eddard-Stark 285 0.132867
1 Robert-Baratheon 260 0.212245
2 Sansa-Stark 199 0.334454
3 Tyrion-Lannister 187 0.180676
4 Joffrey-Baratheon 183 0.420690
... ... ... ...
182 Tobho-Mott 0 0.000000
183 Tregar 0 0.000000
184 Hali 0 0.000000
185 Wylla 0 0.000000
186 Ulf-son-of-Umar 0 0.000000

187 rows × 3 columns

Task 1¶

In [9]:
# Duplicate the process above to create a dataframe with triangles and clustering for each node in A Song of Ice and Fire Book 3.
# As is the case above, the results should be sorted by number of triangles, in descending order.

asoiaf_b3_df = pd.read_csv('Data/asoiaf-book3-edges.csv')
Gb3 = networkx.from_pandas_edgelist(asoiaf_b3_df, 'Source', 'Target', 'weight')
triangle_dict2 = networkx.triangles(Gb3)
clustering_dict2 = networkx.clustering(Gb3)

triangle_df2 = pd.DataFrame({'Name': triangle_dict2.keys(), 'Triangles': triangle_dict2.values()})
triangle_df2 = triangle_df2.sort_values(by='Triangles', ascending=False)

clustering_df2 = pd.DataFrame({'Name': clustering_dict2.keys(), 'Clustering': clustering_dict2.values()})

triangle_and_clustering_df2 = pd.merge(triangle_df2, clustering_df2, on='Name')

triangle_and_clustering_df2
Out[9]:
Name Triangles Clustering
0 Tyrion-Lannister 243 0.142022
1 Joffrey-Baratheon 236 0.192653
2 Cersei-Lannister 180 0.256046
3 Sansa-Stark 177 0.156915
4 Jaime-Lannister 171 0.172727
... ... ... ...
298 Arson 0 0.000000
299 Galyeon-of-Cuy 0 0.000000
300 Galbart-Glover 0 0.000000
301 Mad-Huntsman 0 0.000000
302 Tion-Frey 0 0.000000

303 rows × 3 columns

In [10]:
# Graph transitivity, the fraction of all possible triangles present in G.
# Possible triangles are identified by the number of "triads" (two edges with a shared node).
networkx.transitivity(G)
Out[10]:
0.3302342878393455

Task 2¶

Calculate the graph transitivity for the network created based on the data from A Song of Ice and Fire Book 3. Is the graph transitivity higher or lower than the transitivity of the network constructed from A Song of Ice and Fire Book 1? Analyze the significance of this comparison, considering what it may reveal about the narrative development within A Song of Ice and Fire. If you're not familiar with the work, feel free to speculate on potential reasons. To provide context, please refer to the definitions of graph transitivity (included above) to clarify their implications. Include the calculation in the following cell (code) and the discussion in the next cell (markdown).

In [11]:
asoiaf_b3_df = pd.read_csv('Data/asoiaf-book3-edges.csv')
Gb3 = networkx.from_pandas_edgelist(asoiaf_b3_df, 'Source', 'Target', 'weight')
networkx.transitivity(Gb3)
Out[11]:
0.27702231520223153

The graph transitivity is lower in 'A Song of Ice and Fire Book 3' compared to Book 1. As someone who has only heard of game of thrones this may mean that as the books continue many supporting characters are introduced that don't form triangle connections or 'triads'. Or possibly the characters that were connected in triads and triangle connections in past books do not appear in future books for narrative reasons leaving the character roster with a smaller fraction of triangle connections. Since graph transitivity refers to the fraction of real triangle connections to possible triangle connections this means that the characters in Book 3 have less real triangular connections while having more 'possible' triangle connections compared to Book 1 characters.

Basic Network Visualization with Bokeh¶

In [12]:
# Changing bokeh version because the latest version will result in bugs in network visualization
!pip install bokeh==2.4.3
Requirement already satisfied: bokeh==2.4.3 in c:\users\colto\anaconda3\lib\site-packages (2.4.3)
Requirement already satisfied: Jinja2>=2.9 in c:\users\colto\anaconda3\lib\site-packages (from bokeh==2.4.3) (2.11.3)
Requirement already satisfied: typing-extensions>=3.10.0 in c:\users\colto\anaconda3\lib\site-packages (from bokeh==2.4.3) (4.1.1)
Requirement already satisfied: pillow>=7.1.0 in c:\users\colto\anaconda3\lib\site-packages (from bokeh==2.4.3) (9.0.1)
Requirement already satisfied: numpy>=1.11.3 in c:\users\colto\anaconda3\lib\site-packages (from bokeh==2.4.3) (1.21.5)
Requirement already satisfied: packaging>=16.8 in c:\users\colto\anaconda3\lib\site-packages (from bokeh==2.4.3) (21.3)
Requirement already satisfied: tornado>=5.1 in c:\users\colto\anaconda3\lib\site-packages (from bokeh==2.4.3) (6.1)
Requirement already satisfied: PyYAML>=3.10 in c:\users\colto\anaconda3\lib\site-packages (from bokeh==2.4.3) (6.0)
Requirement already satisfied: MarkupSafe>=0.23 in c:\users\colto\anaconda3\lib\site-packages (from Jinja2>=2.9->bokeh==2.4.3) (2.0.1)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\users\colto\anaconda3\lib\site-packages (from packaging>=16.8->bokeh==2.4.3) (3.0.4)
In [13]:
# Check the bokeh version. It should be 2.4.3.
!pip show bokeh
Name: bokeh
Version: 2.4.3
Summary: Interactive plots and applications in the browser from Python
Home-page: https://github.com/bokeh/bokeh
Author: Bokeh Team
Author-email: info@bokeh.org
License: BSD-3-Clause
Location: c:\users\colto\anaconda3\lib\site-packages
Requires: numpy, Jinja2, tornado, packaging, typing-extensions, PyYAML, pillow
Required-by: panel, hvplot
In [14]:
# Set up Bokeh to work in Jupyter notebook
from bokeh.io import output_notebook
output_notebook()
Loading BokehJS ...
In [15]:
# Import necessary Bokeh modules
from bokeh.io import show, save
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
In [16]:
# Basic visualization

#Choose a title
title = 'A Song of Ice and Fire (Book 1) Network'

#Establish which categories will appear when hovering over each node
HOVER_TOOLTIPS = [("Character", "@index")] # Index is the name of the character

#Create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)

#Create a network graph object with spring layout
#https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.drawing.layout.spring_layout.html
network_graph = from_networkx(G, networkx.spring_layout, scale=10, center=(0, 0))

#Set node size and color
network_graph.node_renderer.glyph = Circle(size=15, fill_color='skyblue')

#Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

#Add network graph to the plot
plot.renderers.append(network_graph)

show(plot)

Network with Nodes Sized and Colored By Attribute (Degree)¶

In [17]:
# Include Bokeh color palettes
from bokeh.palettes import Blues8, Reds8, Purples8, Oranges8, Viridis8, Spectral8
from bokeh.transform import linear_cmap
In [18]:
# Calculate degree for each node and add as node attribute
degrees = dict(networkx.degree(G))
networkx.set_node_attributes(G, name='degree', values=degrees)
In [19]:
#Choose attributes from G network to size and color by degree (NEW)
size_by_this_attribute = 'degree'
color_by_this_attribute = 'degree'

#Pick a color palette — Blues8, Reds8, Purples8, Oranges8, Viridis8, Spectral8 (NEW)
color_palette = Blues8

#Choose a title
title = 'A Song of Ice and Fire (Book 1) Network'

#Establish which categories will appear when hovering over each node
HOVER_TOOLTIPS = [
       ("Character", "@index"),
        ("Degree", "@degree")
]

#Create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)

#Create a network graph object
network_graph = from_networkx(G, networkx.spring_layout, scale=10, center=(0, 0))

#Set node sizes and colors according to node degree (color as spectrum of color palette) (NEW)
minimum_value_color = min(network_graph.node_renderer.data_source.data[color_by_this_attribute])
maximum_value_color = max(network_graph.node_renderer.data_source.data[color_by_this_attribute])
network_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=linear_cmap(color_by_this_attribute, color_palette, minimum_value_color, maximum_value_color))

#Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

plot.renderers.append(network_graph)

show(plot)

Network with Nodes Colored By Attribute (Community)¶¶

In [20]:
# Import community detection module and get the communities
from networkx.algorithms import community
communities = community.greedy_modularity_communities(G)
In [21]:
# Make a dictionary by looping through the communities and, for each member of the community, adding their community number
# Make another dictionary to store colors for modularity
modularity_class = {}
modularity_color = {}
count = 0
#Loop through each community in the network
for commun in communities:
    #For each member of the community, add their community number
    for name in commun:
        modularity_class[name] = count
        modularity_color[name] = Blues8[count] # Set a distinct color
    count += 1
modularity_color
Out[21]:
{'Tywin-Lannister': '#084594',
 'Rickard-Karstark': '#084594',
 'Marillion': '#084594',
 'Rickon-Stark': '#084594',
 'Hali': '#084594',
 'Vardis-Egen': '#084594',
 'Kurleket': '#084594',
 'Hoster-Tully': '#084594',
 'Willis-Wode': '#084594',
 'Masha-Heddle': '#084594',
 'Tytos-Blackwood': '#084594',
 'Robert-Arryn': '#084594',
 'Lyn-Corbray': '#084594',
 'Theon-Greyjoy': '#084594',
 'Addam-Marbrand': '#084594',
 'Heward': '#084594',
 'Hallis-Mollen': '#084594',
 'Morrec': '#084594',
 'Leo-Lefford': '#084594',
 'Bran-Stark': '#084594',
 'Osha': '#084594',
 'Eon-Hunter': '#084594',
 'Jaime-Lannister': '#084594',
 'Stevron-Frey': '#084594',
 'Galbart-Glover': '#084594',
 'Wendel-Manderly': '#084594',
 'Tyrion-Lannister': '#084594',
 'Robb-Stark': '#084594',
 'Lysa-Arryn': '#084594',
 'Jory-Cassel': '#084594',
 'Joseth': '#084594',
 'Rodrik-Cassel': '#084594',
 'Maege-Mormont': '#084594',
 'Mord': '#084594',
 'Catelyn-Stark': '#084594',
 'Walder-Frey': '#084594',
 'Moreo-Tumitis': '#084594',
 'Roose-Bolton': '#084594',
 'Edmure-Tully': '#084594',
 'Bronn': '#084594',
 'Donnel-Waynwood': '#084594',
 'Stiv': '#084594',
 'Gunthor-son-of-Gurn': '#084594',
 'Nan': '#084594',
 'Chella': '#084594',
 'Hodor': '#084594',
 'Colemon': '#084594',
 'Jonos-Bracken': '#084594',
 'Jon-Umber-(Greatjon)': '#084594',
 'Hullen': '#084594',
 'Luwin': '#084594',
 'Wyl-(guard)': '#084594',
 'Mychel-Redfort': '#084594',
 'Nestor-Royce': '#084594',
 'Harys-Swyft': '#084594',
 'Brynden-Tully': '#084594',
 'Kevan-Lannister': '#084594',
 'Shae': '#084594',
 'Jyck': '#084594',
 'Chiggen': '#084594',
 'Mya-Stone': '#084594',
 'Barristan-Selmy': '#2171b5',
 'Mace-Tyrell': '#2171b5',
 'Gregor-Clegane': '#2171b5',
 'Varys': '#2171b5',
 'Lancel-Lannister': '#2171b5',
 'Mordane': '#2171b5',
 'Meryn-Trant': '#2171b5',
 'Thoros-of-Myr': '#2171b5',
 'Robert-Baratheon': '#2171b5',
 'Beric-Dondarrion': '#2171b5',
 'Syrio-Forel': '#2171b5',
 'Maegor-I-Targaryen': '#2171b5',
 'Hugh': '#2171b5',
 'Mycah': '#2171b5',
 'Boros-Blount': '#2171b5',
 'Janos-Slynt': '#2171b5',
 'Balon-Swann': '#2171b5',
 'Renly-Baratheon': '#2171b5',
 'Tommen-Baratheon': '#2171b5',
 'Cersei-Lannister': '#2171b5',
 'Arys-Oakheart': '#2171b5',
 'Sansa-Stark': '#2171b5',
 'Ilyn-Payne': '#2171b5',
 'Joffrey-Baratheon': '#2171b5',
 'Myrcella-Baratheon': '#2171b5',
 'Stannis-Baratheon': '#2171b5',
 'Paxter-Redwyne': '#2171b5',
 'Sandor-Clegane': '#2171b5',
 'Jeyne-Poole': '#2171b5',
 'Pycelle': '#2171b5',
 'Vayon-Poole': '#2171b5',
 'Jon-Arryn': '#2171b5',
 'Petyr-Baelish': '#2171b5',
 'Loras-Tyrell': '#2171b5',
 'Arya-Stark': '#2171b5',
 'Gerold-Hightower': '#4292c6',
 'Arthur-Dayne': '#4292c6',
 'Lyanna-Stark': '#4292c6',
 'Marq-Piper': '#4292c6',
 'Joss': '#4292c6',
 'Cayn': '#4292c6',
 'Daryn-Hornwood': '#4292c6',
 'Brandon-Stark': '#4292c6',
 'Gendry': '#4292c6',
 'Desmond': '#4292c6',
 'Harwin': '#4292c6',
 'Tomard': '#4292c6',
 'Karyl-Vance': '#4292c6',
 'Alyn': '#4292c6',
 'High-Septon-(fat_one)': '#4292c6',
 'Tobho-Mott': '#4292c6',
 'Wylla': '#4292c6',
 'Oswell-Whent': '#4292c6',
 'Clement-Piper': '#4292c6',
 'Porther': '#4292c6',
 'Jacks': '#4292c6',
 'Varly': '#4292c6',
 'Rickard-Stark': '#4292c6',
 'Aegon-I-Targaryen': '#4292c6',
 'Aerys-II-Targaryen': '#4292c6',
 'Raymun-Darry': '#4292c6',
 'Eddard-Stark': '#4292c6',
 'Tregar': '#4292c6',
 'Torrhen-Karstark': '#4292c6',
 'Balon-Greyjoy': '#4292c6',
 'Howland-Reed': '#4292c6',
 'Will-(prologue)': '#6baed6',
 'Othor': '#6baed6',
 'Aemon-Targaryen-(Maester-Aemon)': '#6baed6',
 'Todder': '#6baed6',
 'Hobb': '#6baed6',
 'Halder': '#6baed6',
 'Dywen': '#6baed6',
 'Jeor-Mormont': '#6baed6',
 'Clydas': '#6baed6',
 'Grenn': '#6baed6',
 'Yoren': '#6baed6',
 'Pypar': '#6baed6',
 'Bowen-Marsh': '#6baed6',
 'Gared': '#6baed6',
 'Rast': '#6baed6',
 'Alliser-Thorne': '#6baed6',
 'Dareon': '#6baed6',
 'Randyll-Tarly': '#6baed6',
 'Albett': '#6baed6',
 'Matthar': '#6baed6',
 'Samwell-Tarly': '#6baed6',
 'Jaremy-Rykker': '#6baed6',
 'Jon-Snow': '#6baed6',
 'Chett': '#6baed6',
 'Waymar-Royce': '#6baed6',
 'Mance-Rayder': '#6baed6',
 'Jafer-Flowers': '#6baed6',
 'Donal-Noye': '#6baed6',
 'Benjen-Stark': '#6baed6',
 'Doreah': '#9ecae1',
 'Irri': '#9ecae1',
 'Jhogo': '#9ecae1',
 'Ogo': '#9ecae1',
 'Viserys-Targaryen': '#9ecae1',
 'Haggo': '#9ecae1',
 'Rakharo': '#9ecae1',
 'Rhaegar-Targaryen': '#9ecae1',
 'Jommo': '#9ecae1',
 'Rhaego': '#9ecae1',
 'Aggo': '#9ecae1',
 'Jorah-Mormont': '#9ecae1',
 'Quaro': '#9ecae1',
 'Daenerys-Targaryen': '#9ecae1',
 'Drogo': '#9ecae1',
 'Qotho': '#9ecae1',
 'Cohollo': '#9ecae1',
 'Eroeh': '#9ecae1',
 'Illyrio-Mopatis': '#9ecae1',
 'Fogo': '#9ecae1',
 'Mirri-Maz-Duur': '#9ecae1',
 'Jhiqui': '#9ecae1',
 'Dolf': '#c6dbef',
 'Conn': '#c6dbef',
 'Timett': '#c6dbef',
 'Ulf-son-of-Umar': '#c6dbef',
 'Shagga': '#c6dbef',
 'Coratt': '#c6dbef',
 'Jared-Frey': '#deebf7',
 'Danwell-Frey': '#deebf7',
 'Hosteen-Frey': '#deebf7'}
In [22]:
# Add class and color as node attributes
networkx.set_node_attributes(G, modularity_class, 'modularity_class')
networkx.set_node_attributes(G, modularity_color, 'modularity_color')
In [23]:
#Choose attributes from G network to size and color 
size_by_this_attribute = 'degree'
color_by_this_attribute = 'modularity_color' # (NEW)

#Choose a title
title = 'A Song of Ice and Fire (Book 1) Network'

#Establish which categories will appear when hovering over each node
HOVER_TOOLTIPS = [
       ("Character", "@index"),
       ("Degree", "@degree"),
       ("Modularity Class", "@modularity_class")
]

#Create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset, tap", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)

#Create a network graph object
network_graph = from_networkx(G, networkx.spring_layout, scale=10, center=(0, 0))

#Set node sizes according to node degree and colors according to modularity class (color as category from attribute) (NEW)
network_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=color_by_this_attribute)

#Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

plot.renderers.append(network_graph)

show(plot)

Task 3¶

In [24]:
# Create a visualization, where node size is based on eigenvector centrality, and color is based on community.
# Copy the code in the last cell, and update the "size_by_this_attribute" based on eigenvector centrality, and update colors.
# The eigenvector_centrality attribute should be mutiplied by 100 to make the nods visible. This can be realized by 
# writing a loop, and multiply the value of each node by 100, and add the result as a node attribute.
# The colors should be Spectral8 instead of Blues8. This can be realized by rerun the loop of setting colors for each community,
# and change the colors to Spectral8[count], and add the color as a node attribute again.

G = networkx.from_pandas_edgelist(asoiaf_df, 'Source', 'Target', 'weight')

eigenvector_centrality = networkx.eigenvector_centrality(G)
for node in eigenvector_centrality:
    eigenvector_centrality[node] *= 100

networkx.set_node_attributes(G, name='eigenvector', values=eigenvector_centrality)

communities = community.greedy_modularity_communities(G)

modularity_class = {}
modularity_color = {}
count = 0

for commun in communities:
    for name in commun:
        modularity_class[name] = count
        modularity_color[name] = Spectral8[count]
    count += 1

networkx.set_node_attributes(G, modularity_class, 'modularity_class')
networkx.set_node_attributes(G, modularity_color, 'modularity_color')

size_by_this_attribute = 'eigenvector'
color_by_this_attribute = 'modularity_color'

title = 'A Song of Ice and Fire (Book 1) Network'
HOVER_TOOLTIPS = [
    ("Character", "@index"),
    ("Eigenvector", "@eigenvector"),
    ("Modularity Class", "@modularity_class")
]

plot = figure(tooltips=HOVER_TOOLTIPS, tools="pan,wheel_zoom,save,reset, tap", active_scroll='wheel_zoom',
              x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)

network_graph = from_networkx(G, networkx.spring_layout, scale=10, center=(0, 0))

network_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=color_by_this_attribute)

network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

plot.renderers.append(network_graph)

show(plot)

Network with Responsive Highlighting¶

In [25]:
# Include EdgesAndLinkedNodes, NodesAndLinkedEdges
from bokeh.models import EdgesAndLinkedNodes, NodesAndLinkedEdges
In [26]:
#Choose colors for node and edge highlighting (NEW)
node_highlight_color = 'white'
edge_highlight_color = 'black'

#Choose attributes from G network to size and color
size_by_this_attribute = 'degree'
color_by_this_attribute = 'modularity_color'

#Choose a title
title = 'A Song of Ice and Fire (Book 1) Network'

#Establish which categories will appear when hovering over each node
HOVER_TOOLTIPS = [
       ("Character", "@index"),
       ("Degree", "@degree"),
       ("Modularity Class", "@modularity_class")
]

#Create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset, tap", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)

#Create a network graph object
network_graph = from_networkx(G, networkx.spring_layout, scale=10, center=(0, 0))

#Set node sizes according to node degree and colors according to modularity class (color as category from attribute)
network_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=color_by_this_attribute)

#Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

#Set node highlight colors (NEW)
network_graph.node_renderer.hover_glyph = Circle(size=size_by_this_attribute, fill_color=node_highlight_color, line_width=2)
network_graph.node_renderer.selection_glyph = Circle(size=size_by_this_attribute, fill_color=node_highlight_color, line_width=2)

#Set edge highlight colors (NEW)
network_graph.edge_renderer.selection_glyph = MultiLine(line_color=edge_highlight_color, line_width=2)
network_graph.edge_renderer.hover_glyph = MultiLine(line_color=edge_highlight_color, line_width=2)

#Highlight nodes and edges (NEW)
network_graph.selection_policy = NodesAndLinkedEdges()
network_graph.inspection_policy = NodesAndLinkedEdges()

plot.renderers.append(network_graph)

show(plot)
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : key "size" value "degree" [renderer: GlyphRenderer(id='1929', ...)]

Network with Labels¶

In [27]:
# Include LabelSet
from bokeh.models import LabelSet
In [28]:
#Choose colors for node and edge highlighting
node_highlight_color = 'white'
edge_highlight_color = 'black'

#Choose attributes from G network to size and color
size_by_this_attribute = 'degree'
color_by_this_attribute = 'modularity_color'

#Choose a title
title = 'A Song of Ice and Fire (Book 1) Network'

#Establish which categories will appear when hovering over each node
HOVER_TOOLTIPS = [
       ("Character", "@index"),
       ("Degree", "@degree"),
       ("Modularity Class", "@modularity_class")
]

#Create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset, tap", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)

#Create a network graph object
network_graph = from_networkx(G, networkx.spring_layout, scale=10, center=(0, 0))

#Set node sizes according to node degree and colors according to modularity class (color as category from attribute)
network_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=color_by_this_attribute)

#Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

#Set node highlight colors
network_graph.node_renderer.hover_glyph = Circle(size=size_by_this_attribute, fill_color=node_highlight_color, line_width=2)
network_graph.node_renderer.selection_glyph = Circle(size=size_by_this_attribute, fill_color=node_highlight_color, line_width=2)

#Set edge highlight colors
network_graph.edge_renderer.selection_glyph = MultiLine(line_color=edge_highlight_color, line_width=2)
network_graph.edge_renderer.hover_glyph = MultiLine(line_color=edge_highlight_color, line_width=2)

#Highlight nodes and edges
network_graph.selection_policy = NodesAndLinkedEdges()
network_graph.inspection_policy = NodesAndLinkedEdges()

plot.renderers.append(network_graph)

#Add Labels (NEW)
x, y = zip(*network_graph.layout_provider.graph_layout.values())
node_labels = list(G.nodes())
source = ColumnDataSource({'x': x, 'y': y, 'name': [node_labels[i] for i in range(len(x))]})
labels = LabelSet(x='x', y='y', text='name', source=source, background_fill_color='white', text_font_size='10px', background_fill_alpha=.7)
plot.renderers.append(labels)

show(plot)
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : key "size" value "degree" [renderer: GlyphRenderer(id='2215', ...)]

Task 4¶

In [29]:
# Duplicate the process above to create a visualization based on data of A Song of Ice and Fire Book 3.
# The node size should be based on degree centrality, and color should be based on community, and responsive highlighting and
# labels should also be included.
# You have to create a new network based on asoiaf_b3_df, recalculate degree of the new network and add it as node attribute.
# Then you should regenerate communities of the new network, rerun the loop to generate community number and colors, and add
# both modularity class and color as node attributes.
# Finally, copy the code in the last cell, and change the title, and update network names when creating graph and adding labels.

asoiaf_b3_df = pd.read_csv('Data/asoiaf-book3-edges.csv')
Gb3 = networkx.from_pandas_edgelist(asoiaf_b3_df, 'Source', 'Target', 'weight')

degree = dict(networkx.degree(Gb3))
node_highlight_color = 'white'
edge_highlight_color = 'black'
networkx.set_node_attributes(Gb3, modularity_class, 'modularity_class')
networkx.set_node_attributes(Gb3, modularity_color, 'modularity_color')

size_by_this_attribute = 'degree_centrality'
color_by_this_attribute = 'modularity_class'

title = 'A Song of Ice and Fire (Book 3) Network'

HOVER_TOOLTIPS = [
       ("Character", "@index"),
       ("Degree", "@degree_centrality"),
       ("Modularity Class", "@modularity_class")
]
communities = list(networkx.community.greedy_modularity_communities(Gb3))

modularity_class = {}
modularity_color = {}
count = 0
for commun in communities:
    for name in commun:
        modularity_class[name]= count
        modularity_color[name]= Spectral8[count]
    count += 1

plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset, tap", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)

network_graph = from_networkx(Gb3, networkx.spring_layout, scale=10, center=(0, 0))

network_graph.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=color_by_this_attribute)

network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

network_graph.node_renderer.hover_glyph = Circle(size=size_by_this_attribute, fill_color=node_highlight_color, line_width=2)
network_graph.node_renderer.selection_glyph = Circle(size=size_by_this_attribute, fill_color=node_highlight_color, line_width=2)

network_graph.edge_renderer.selection_glyph = MultiLine(line_color=edge_highlight_color, line_width=2)
network_graph.edge_renderer.hover_glyph = MultiLine(line_color=edge_highlight_color, line_width=2)

network_graph.selection_policy = NodesAndLinkedEdges()
network_graph.inspection_policy = NodesAndLinkedEdges()

plot.renderers.append(network_graph)

x, y = zip(*network_graph.layout_provider.graph_layout.values())
node_labels = list(Gb3.nodes())
source = ColumnDataSource({'x': x, 'y': y, 'name': [node_labels[i] for i in range(len(x))]})
labels = LabelSet(x='x', y='y', text='name', source=source, background_fill_color='white', text_font_size='10px', background_fill_alpha=.7)
plot.renderers.append(labels)

show(plot)
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : key "size" value "degree_centrality" [renderer: GlyphRenderer(id='2535', ...)]

Network Simulation¶

In [37]:
# Generate the list of people in the network

p1 = list(asoiaf_df['Source']) # convert "Source" in the dataframe to a list
p2 = list(asoiaf_df['Target']) # convert "Target" in the dataframe to a list
people = p1 + p2 # concatenate the two lists
print(len(people)) # let's see how many people we have

people = list(set(people)) # remove the duplicates
node_num = len(people) # number of nodes (people) in the network: the length of the people list
print(node_num) # let's see how many people we have now
print(people) # what the list looks like
1368
187
['Lyanna-Stark', 'Ogo', 'Dywen', 'Jeor-Mormont', 'Hali', 'Vardis-Egen', 'Kurleket', 'Cayn', 'Daryn-Hornwood', 'Willis-Wode', 'Joss', 'Gregor-Clegane', 'Hallis-Mollen', 'Cohollo', 'Wylla', 'Leo-Lefford', 'Matthar', 'Porther', 'Eon-Hunter', 'Syrio-Forel', 'Raymun-Darry', 'Waymar-Royce', 'Mirri-Maz-Duur', 'Torrhen-Karstark', 'Boros-Blount', 'Tommen-Baratheon', 'Doreah', 'Yoren', 'Pypar', 'Roose-Bolton', 'Arys-Oakheart', 'Bowen-Marsh', 'Edmure-Tully', 'Rhaegar-Targaryen', 'Sansa-Stark', 'Desmond', 'Gendry', 'Joffrey-Baratheon', 'Shagga', 'Dareon', 'Sandor-Clegane', 'Pycelle', 'Jon-Arryn', 'Aegon-I-Targaryen', 'Jared-Frey', 'Danwell-Frey', 'Shae', 'Chiggen', 'Irri', 'Clydas', 'Brandon-Stark', 'Lancel-Lannister', 'Alyn', 'Lyn-Corbray', 'Quaro', 'Qotho', 'Meryn-Trant', 'Thoros-of-Myr', 'Oswell-Whent', 'Jaremy-Rykker', 'Hugh', 'Aerys-II-Targaryen', 'Wendel-Manderly', 'Joseth', 'Maege-Mormont', 'Rodrik-Cassel', 'Janos-Slynt', 'Renly-Baratheon', 'Mord', 'Cersei-Lannister', 'Aemon-Targaryen-(Maester-Aemon)', 'Todder', 'Hobb', 'Moreo-Tumitis', 'Timett', 'Nan', 'Tomard', 'Aggo', 'Chella', 'Jon-Umber-(Greatjon)', 'Eroeh', 'Clement-Piper', 'Vayon-Poole', 'Chett', 'Illyrio-Mopatis', 'Nestor-Royce', 'Jaime-Lannister', 'Benjen-Stark', 'Petyr-Baelish', 'Jyck', 'Ulf-son-of-Umar', 'Arya-Stark', 'Tywin-Lannister', 'Rickard-Karstark', 'Marillion', 'Rickon-Stark', 'Grenn', 'Marq-Piper', 'Haggo', 'Masha-Heddle', 'Gared', 'Harwin', 'Varys', 'Mordane', 'Karyl-Vance', 'Tytos-Blackwood', 'Jorah-Mormont', 'Addam-Marbrand', 'Theon-Greyjoy', 'Bran-Stark', 'Osha', 'Eddard-Stark', 'Jafer-Flowers', 'Tyrion-Lannister', 'Jory-Cassel', 'Lysa-Arryn', 'Jhiqui', 'Howland-Reed', 'Catelyn-Stark', 'Jhogo', 'Othor', 'Halder', 'Walder-Frey', 'Jommo', 'Conn', 'Ilyn-Payne', 'Stiv', 'Rast', 'Myrcella-Baratheon', 'Daenerys-Targaryen', 'Hodor', 'Alliser-Thorne', 'Colemon', 'Samwell-Tarly', 'Tobho-Mott', 'Coratt', 'Mychel-Redfort', 'Brynden-Tully', 'Harys-Swyft', 'Hosteen-Frey', 'Barristan-Selmy', 'Mace-Tyrell', 'Will-(prologue)', 'Gerold-Hightower', 'Hoster-Tully', 'Rakharo', 'Robert-Arryn', 'Rhaego', 'Dolf', 'Heward', 'High-Septon-(fat_one)', 'Morrec', 'Robert-Baratheon', 'Beric-Dondarrion', 'Maegor-I-Targaryen', 'Stevron-Frey', 'Mycah', 'Galbart-Glover', 'Mance-Rayder', 'Tregar', 'Robb-Stark', 'Donal-Noye', 'Balon-Greyjoy', 'Balon-Swann', 'Viserys-Targaryen', 'Arthur-Dayne', 'Bronn', 'Donnel-Waynwood', 'Gunthor-son-of-Gurn', 'Drogo', 'Stannis-Baratheon', 'Randyll-Tarly', 'Paxter-Redwyne', 'Albett', 'Jeyne-Poole', 'Jonos-Bracken', 'Hullen', 'Luwin', 'Jon-Snow', 'Jacks', 'Wyl-(guard)', 'Varly', 'Rickard-Stark', 'Fogo', 'Kevan-Lannister', 'Loras-Tyrell', 'Mya-Stone']
In [38]:
# Generate the matrix of people's relationships

relation_matrix = np.zeros((len(people), len(people)))  # create a 187*187 empty matrix

for i in range(len(asoiaf_df)): # loop through each line (relationship) in the data
    p1_index = people.index(asoiaf_df.loc[i, 'Source']) # get the index of "Source" in the list of people
    p2_index = people.index(asoiaf_df.loc[i, 'Target']) # get the index of "Target" in the list of people
    relation_matrix[p1_index, p2_index] = 1 # assign value, which means "Source has relationship with Target"
    relation_matrix[p2_index, p1_index] = 1 # correspondingly, assign value, which means "Target has relationship with Source"
    # Here, for the pairs of people with multiple relationships, we assign value 1 for multiple times, and the value is still 1
print(relation_matrix.shape) # let's see how large the matrix is
print(relation_matrix) # what the matrix looks like

edge_num = int(sum(sum(relation_matrix))/2) # sum all relationships and divide 2 (as each relationship is calculated twice)
print("The number of total edges in the network is {}".format(edge_num)) # let's see the number of total edges in the network
(187, 187)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
The number of total edges in the network is 684
In [39]:
# Generate the degree distribution

deg_num_dict = {} # an empty dictionary to record the number of nodes of each degree, key: degree; value: number of nodes with that degree

for i in range(len(relation_matrix)): # loop through the relation matrix
    deg_i  = sum(relation_matrix[i,]) # for each node (person), its degree is the summation of the line (number of its relationships)
    key = int(deg_i)
    if key in deg_num_dict.keys(): # add one to the number of nodes of that degree, if there are already some nodes
        deg_num_dict[key] += 1
    else:
        deg_num_dict[key] = 1 # assign value one as the number of nodes of that degree, if there is no node

deg_num_dict # what the dictionary looks like
Out[39]:
{4: 8,
 3: 17,
 1: 48,
 13: 2,
 6: 16,
 2: 26,
 12: 3,
 5: 14,
 8: 5,
 7: 10,
 11: 3,
 35: 2,
 30: 2,
 16: 1,
 14: 4,
 15: 3,
 9: 3,
 18: 3,
 29: 1,
 26: 1,
 27: 1,
 22: 1,
 10: 3,
 17: 1,
 32: 1,
 66: 1,
 46: 1,
 21: 2,
 43: 1,
 50: 1,
 19: 1,
 37: 1}
In [40]:
# Sort the degree distribution

sort_deg_num_list = sorted(deg_num_dict.items(), key=lambda d:-d[0]) # sort the dictionary and reorder it by degree number 
sort_deg_num_list # what the dictionary looks like after sorting
Out[40]:
[(66, 1),
 (50, 1),
 (46, 1),
 (43, 1),
 (37, 1),
 (35, 2),
 (32, 1),
 (30, 2),
 (29, 1),
 (27, 1),
 (26, 1),
 (22, 1),
 (21, 2),
 (19, 1),
 (18, 3),
 (17, 1),
 (16, 1),
 (15, 3),
 (14, 4),
 (13, 2),
 (12, 3),
 (11, 3),
 (10, 3),
 (9, 3),
 (8, 5),
 (7, 10),
 (6, 16),
 (5, 14),
 (4, 8),
 (3, 17),
 (2, 26),
 (1, 48)]
In [41]:
# Generate the nodes and the number of nodes of each degree

deg_num_list = [[i for i, j in sort_deg_num_list], 
               [j for i, j in sort_deg_num_list]]  # unzip the tuples to two lists
degs = deg_num_list[0] # the first list is degree
nums = deg_num_list[1] # the second list is number of nodes of each degree
print(degs) # what the list of degrees looks like
print(nums) # what the number of nodes of each degree looks like
[66, 50, 46, 43, 37, 35, 32, 30, 29, 27, 26, 22, 21, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
[1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 3, 1, 1, 3, 4, 2, 3, 3, 3, 3, 5, 10, 16, 14, 8, 17, 26, 48]
In [42]:
# Visualization

plt.style.use('ggplot') # use the "ggplot" style
fig1 = plt.scatter(nums, degs, color = 'blue') # plot a scatterplot of degrees in the x axis and the number of nodes in the y axis
plt.xlabel('Number of Nodes') # set the label of the x axis
plt.ylabel('Degree') # set the label of the y axis
Out[42]:
Text(0, 0.5, 'Degree')
In [43]:
# Preferential attachment model

import math, random # library "random", for generating the random number

def preferential_attachment(k): # the preferential attachment model
    dNetwork = {} # dictionary of lists
    iNodes = node_num # total number of nodes in the network
    iLinks = 0 # set the initial number of links (edges/relationships) as 0

    for i in range(iNodes): # loop through all the nodes
        dNetwork[i] = [] # initialize node i with a empty list, which will record its link with other nodes
        for node in dNetwork.values(): 
            fThresh = 1.0 / (iLinks + i + 1) * k *(len(node) + 1) 
            # create a "threshold" value, which is proportional to its current degree + 1,
            # and inversly proportional to the total number of links in the network
            if(random.random() <= fThresh): # if the random value (a random number between 0 and 1) is smaller than the threshold
                node.append(i) # add a link between this node and the node "i"
                iLinks += 1 # add one to the total number of links in the network
    lDegrees = [len(node) for node in dNetwork.values()] # calculate the degree distribution
    return lDegrees, iLinks
In [44]:
# Run the preferential attachment model
def run_pa(k): 
    # k is a parameter which influences the fThresh (so that influence the probability of generating links)
    # I set k=3.7 below, but you can try to change the number and see what will happen!

    lDegrees,iLinks = preferential_attachment(k) # get the degrees and number of links generated in the preferential attachment model
    # below we duplicated what we have done earlier
    deg_num_sim_dict = {}
    for i in lDegrees :
        if i in deg_num_sim_dict.keys():
            deg_num_sim_dict[i] += 1
        else:
            deg_num_sim_dict[i] = 1

    sort_deg_num_sim_list = sorted(deg_num_sim_dict.items(), key=lambda d:-d[0])

    deg_num_sim_list = [[i for i, j in sort_deg_num_sim_list], 
                       [j for i, j in sort_deg_num_sim_list]]
    degs_sim = deg_num_sim_list[0]
    nums_sim = deg_num_sim_list[1]
    
    return degs_sim, nums_sim, iLinks
In [45]:
iLinks = 0 # initialize iLinks = 0

# We will simulate the network so that it will have exactly the same number of nodes (already guaranteed) and edges as the observed network
while iLinks != edge_num: # so, we will loop until the requirement of the edge number is satisfied
    degs_sim, nums_sim, iLinks = run_pa(3.7) # run the preferential attachment model
    print(iLinks) # print the number of total links in the network
653
663
713
656
691
671
669
633
739
686
708
672
665
678
638
639
702
686
688
682
698
645
695
672
661
705
675
696
670
671
650
671
691
688
691
685
660
647
739
667
676
709
680
643
657
674
690
685
693
698
702
709
708
676
678
657
668
668
657
664
669
687
661
704
714
689
648
702
667
658
672
711
658
698
646
694
694
656
631
658
717
668
717
694
670
676
713
715
662
649
650
647
671
691
668
672
689
675
709
666
658
662
653
695
672
677
681
653
685
694
680
707
662
657
681
701
674
710
690
702
704
698
702
701
627
667
676
681
669
684
In [46]:
# Visualization of both the simulated network and the observed network

plt.scatter(nums_sim, degs_sim, color = 'red', label = 'Simulated')  # plot a scatterplot of the simulated network
plt.scatter(nums, degs, color='blue', label='Observed') # plot a scatterplot of the observed network
plt.xlabel('Number of Nodes') # set the label of the x axis
plt.ylabel('Degree') # set the label of the y axis
plt.legend() # display the legend
Out[46]:
<matplotlib.legend.Legend at 0x1f7c12dec70>

Task 5¶

The network observed in A Song of Ice and Fire Book 1 exhibits a significant degree distribution overlap with the simulated network using the Preferential Attachment model. In a Preferential Attachment model, the probability of any given node acquiring a new edge is proportional to the number of edges it already possesses. What does this correspondence suggest about character relationships in A Song of Ice and Fire? Once again, If you're not familiar with the work, feel free to speculate on potential reasons. Please include the discussion in the next cell (markdown).