From cbe22da02b05863f9956a935ee24d3d9a4fd0dbd Mon Sep 17 00:00:00 2001 From: Holtz Yan Date: Fri, 27 Sep 2024 08:49:10 +0200 Subject: [PATCH] fix Guy --- Example_dataset/Readme.md | 76 +- graph/chord.Rmd | 4 +- graph/chord.html | 1000 ++-- story/AdjacencyMatrix.Rmd | 4 +- story/AdjacencyMatrix.html | 9837 ++++++++++++++++++++++++++++++++++-- 5 files changed, 9975 insertions(+), 946 deletions(-) diff --git a/Example_dataset/Readme.md b/Example_dataset/Readme.md index e66c75a..808b689 100644 --- a/Example_dataset/Readme.md +++ b/Example_dataset/Readme.md @@ -1,56 +1,24 @@ This folder lists the datasets used in the website [data-to-viz.com](https://www.data-to-viz.com). -| id | Name | Source | Description | Analysis | -|----|------------- |---------------| ------------| ---------| -|1 | OneNum | [AirBnb](https://s3.amazonaws.com/tomslee-airbnb-data-2/alpes_maritime.zip) | Night price for 10000 appartments on the french riviera | [link](https://www.data-to-viz.com/story/OneNum.html) | -|2 | TwoNum | [Kaggle](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data) | Apartment price and ground living area | [link](https://www.data-to-viz.com/story/TwoNum.html) | -|3 | TwoNumOrdered | [CoinMarketCap](https://github.com/JesseVent/crypto) | Daily bitcoin price between 2014 and 2018 | [link](https://www.data-to-viz.com/story/TwoNum.html) | -|4 | ThreeNum | [Gapminder](https://github.com/jennybc/gapminder) | Gapminder: life Expectancy, pop and per-capita GDP for several countries | [link](https://www.data-to-viz.com/story/ThreeNum.html) | -|4 | ThreeNum | [R & Ross Ihaka](https://vincentarelbundock.github.io/Rdatasets/doc/datasets/volcano.html) | Topographic information for Maunga Whau volcano | [link](https://www.data-to-viz.com/story/OneCatSevOrderedNum.html) | -|5 | OneCatSevNumOrdered | [Babynames R library](https://github.com/hadley/babynames) | Evolution of first name popularity in the US | [link](https://www.data-to-viz.com/story/OneCatSevOrderedNum.html) | -|6 | SeveralNum | [Cars](https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/mtcars.html) | Features of 32 cars | [link](https://www.data-to-viz.com/story/SeveralNum.html) | -|7 | OneNumOneCat | [Peace Research Inst.](http://armstrade.sipri.org/armstrade/page/toplist.php) | Quantity of weapons exported by the top 50 largest exporters | [link](https://www.data-to-viz.com/story/OneNumOneCat.html) | -|8 | OneNumOneCatSevObs | [Reddit](https://github.com/zonination/perceptions) | Perception of probability | [link]() | -|9 | OneNumSevCatSubgroupOneObs | [OECD](https://stats.oecd.org/index.aspx?queryid=54751) | The Gender Wage Gap | [link](https://www.data-to-viz.com/story/OneNumSevCatSubgroupOneObsPerGroup.html) | -|10 | OneNumSevCatSubgroupSevObsPerGroup | [Seaborn Python library](https://www.data-to-viz.com/story/OneNumSevCatSubgroupSevObsPerGroup.html) | How much do people tip? | [link]() | -|11 | SevCatOneNumNestedOneObsPerGroup | [Wikipedia](https://github.com/mledoze/countries) | World population for 250 countries | [link](https://www.data-to-viz.com/story/SevCatOneNumNestedOneObsPerGroup.html) | -|13 | AdjacencyMatrix | [Goggle Scholar](https://github.com/holtzy/Google-Scholar-Network) | Co-autorship network of a researcher | [link](https://www.data-to-viz.com/story/AdjacencyMatrix.html) | -|13 | AdjacencyMatrix | [Gui J. Abel Publication](https://onlinelibrary.wiley.com/doi/abs/10.1111/imre.12327) | Migration of people at world scale | [link](https://www.data-to-viz.com/story/AdjacencyMatrix.html) | -|14 | SeveralIndepLists | [Paroles.net](https://www.paroles.net/) | Lyrics of french rapers | [link](https://www.data-to-viz.com/story/SeveralIndepLists.html) | -|15 | NestedLinksValue | [-](https://www.paroles.net/) | - | [link]() | -|16 | NestedAndConnection | [Flare](https://www.paroles.net/) | Connection between informatic components | [link]() | -|17 | ListGPSCoordinates | [Surfer project](https://github.com/holtzy/About-Surfers-On-Twitter) | Where do people twitting #surf live? | [link]() | -|18 | ListGPSCoordinatesWithValue | [Maps R library](https://github.com/adeckmyn/maps) | Population of 925 cities in the UK | [link]() | -|19 | MapConnection | [Surfer Project](https://github.com/holtzy/About-Surfers-On-Twitter) | Where do surfers travel | [link]() | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +| id | Name | Source | Description | Analysis | +| --- | ---------------------------------- | --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------ | --------------------------------------------------------------------------------- | +| 1 | OneNum | [AirBnb](https://s3.amazonaws.com/tomslee-airbnb-data-2/alpes_maritime.zip) | Night price for 10000 appartments on the french riviera | [link](https://www.data-to-viz.com/story/OneNum.html) | +| 2 | TwoNum | [Kaggle](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data) | Apartment price and ground living area | [link](https://www.data-to-viz.com/story/TwoNum.html) | +| 3 | TwoNumOrdered | [CoinMarketCap](https://github.com/JesseVent/crypto) | Daily bitcoin price between 2014 and 2018 | [link](https://www.data-to-viz.com/story/TwoNum.html) | +| 4 | ThreeNum | [Gapminder](https://github.com/jennybc/gapminder) | Gapminder: life Expectancy, pop and per-capita GDP for several countries | [link](https://www.data-to-viz.com/story/ThreeNum.html) | +| 4 | ThreeNum | [R & Ross Ihaka](https://vincentarelbundock.github.io/Rdatasets/doc/datasets/volcano.html) | Topographic information for Maunga Whau volcano | [link](https://www.data-to-viz.com/story/OneCatSevOrderedNum.html) | +| 5 | OneCatSevNumOrdered | [Babynames R library](https://github.com/hadley/babynames) | Evolution of first name popularity in the US | [link](https://www.data-to-viz.com/story/OneCatSevOrderedNum.html) | +| 6 | SeveralNum | [Cars](https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/mtcars.html) | Features of 32 cars | [link](https://www.data-to-viz.com/story/SeveralNum.html) | +| 7 | OneNumOneCat | [Peace Research Inst.](http://armstrade.sipri.org/armstrade/page/toplist.php) | Quantity of weapons exported by the top 50 largest exporters | [link](https://www.data-to-viz.com/story/OneNumOneCat.html) | +| 8 | OneNumOneCatSevObs | [Reddit](https://github.com/zonination/perceptions) | Perception of probability | [link]() | +| 9 | OneNumSevCatSubgroupOneObs | [OECD](https://stats.oecd.org/index.aspx?queryid=54751) | The Gender Wage Gap | [link](https://www.data-to-viz.com/story/OneNumSevCatSubgroupOneObsPerGroup.html) | +| 10 | OneNumSevCatSubgroupSevObsPerGroup | [Seaborn Python library](https://www.data-to-viz.com/story/OneNumSevCatSubgroupSevObsPerGroup.html) | How much do people tip? | [link]() | +| 11 | SevCatOneNumNestedOneObsPerGroup | [Wikipedia](https://github.com/mledoze/countries) | World population for 250 countries | [link](https://www.data-to-viz.com/story/SevCatOneNumNestedOneObsPerGroup.html) | +| 13 | AdjacencyMatrix | [Goggle Scholar](https://github.com/holtzy/Google-Scholar-Network) | Co-autorship network of a researcher | [link](https://www.data-to-viz.com/story/AdjacencyMatrix.html) | +| 13 | AdjacencyMatrix | [Guy J. Abel Publication](https://onlinelibrary.wiley.com/doi/abs/10.1111/imre.12327) | Migration of people at world scale | [link](https://www.data-to-viz.com/story/AdjacencyMatrix.html) | +| 14 | SeveralIndepLists | [Paroles.net](https://www.paroles.net/) | Lyrics of french rapers | [link](https://www.data-to-viz.com/story/SeveralIndepLists.html) | +| 15 | NestedLinksValue | [-](https://www.paroles.net/) | - | [link]() | +| 16 | NestedAndConnection | [Flare](https://www.paroles.net/) | Connection between informatic components | [link]() | +| 17 | ListGPSCoordinates | [Surfer project](https://github.com/holtzy/About-Surfers-On-Twitter) | Where do people twitting #surf live? | [link]() | +| 18 | ListGPSCoordinatesWithValue | [Maps R library](https://github.com/adeckmyn/maps) | Population of 925 cities in the UK | [link]() | +| 19 | MapConnection | [Surfer Project](https://github.com/holtzy/About-Surfers-On-Twitter) | Where do surfers travel | [link]() | diff --git a/graph/chord.Rmd b/graph/chord.Rmd index 9053bea..478550d 100644 --- a/graph/chord.Rmd +++ b/graph/chord.Rmd @@ -32,7 +32,7 @@ A `chord diagram` represents flows or connections between several entities (call -Here is an example displaying the number of people migrating from one country to another. Data used comes from this [scientific publication](https://onlinelibrary.wiley.com/doi/abs/10.1111/imre.12327) from [Gui J. Abel](http://guyabel.com). +Here is an example displaying the number of people migrating from one country to another. Data used comes from this [scientific publication](https://onlinelibrary.wiley.com/doi/abs/10.1111/imre.12327) from [Guy J. Abel](http://guyabel.com). ```{r, warning=FALSE, message=FALSE, fig.align="center", fig.height=7, fig.width=7} # Libraries @@ -108,7 +108,7 @@ circos.trackPlotRegion( ``` -*Note*: this plot is made using the circlize library, and very strongly inspired from the [Migest package](https://github.com/cran/migest) from [Gui J. Abel](http://guyabel.com). Read more about this story [here](https://www.data-to-viz.com/story/AdjacencyMatrix.html). +*Note*: this plot is made using the circlize library, and very strongly inspired from the [Migest package](https://github.com/cran/migest) from [Guy J. Abel](http://guyabel.com). Read more about this story [here](https://www.data-to-viz.com/story/AdjacencyMatrix.html). diff --git a/graph/chord.html b/graph/chord.html index 0e90792..60928a4 100644 --- a/graph/chord.html +++ b/graph/chord.html @@ -1,8 +1,6 @@ - + Chord diagram – from Data to Viz @@ -36,10 +34,7 @@ content="An extensive description of Chord diagram. Definition, examples, input data, common caveats, tool to build it and potential alternatives." /> - + - - - - - - - - - - + + + + + + + + + + + - + - - - + + + - -
- - - + -
-
-

Definition

-
-

A chord diagram represents flows or connections between -several entities (called nodes). Each entity is represented -by a fragment on the outer part of the circular layout. -Then, arcs are drawn between each entities. The size of the -arc is proportional to the importance of the flow.

-

Here is an example displaying the number of people migrating from one -country to another. Data used comes from this scientific -publication from Gui J. Abel.

-
# Libraries
+      
+
+

Definition

+
+

+ A chord diagram represents flows or connections between + several entities (called nodes). Each entity is + represented by a fragment on the outer part of the + circular layout. Then, arcs are drawn + between each entities. The size of the arc is proportional to the + importance of the flow. +

+

+ Here is an example displaying the number of people migrating from + one country to another. Data used comes from this + scientific publication + from Guy J. Abel. +

+
+
# Libraries
 library(tidyverse)
 library(viridis)
 library(patchwork)
@@ -371,44 +505,74 @@ 

Definition

major.tick.percentage = 0.5, labels.niceFacing = FALSE) } -)
-

-

Note: this plot is made using the circlize library, and very -strongly inspired from the Migest package from Gui J. Abel. Read more about this story here.

-
-
-

What for

-
-

Chord diagrams are eye catching and quite popular in data -visualization. They allow to visualize weigthed -relationships between several entities. They are adapted for several -specific situations that slightly modify the output and the way to read -them:

-
    -
  • Flow. This is the example decribed in the chord diagram above. -But two ways to represent it:

    -
      -
    • One asymetric arc per pair
    • -
    • Two arcs per pair
    • -
  • -
  • Bipartite: nodes are grouped in a few categories. Connections go -between categories but not within categories. In my -opinion sankey -diagrams are more adapted in this situation.

  • -
-

Note: this section is under construction.

-
-
-

Variation

-
-

Interactivity is a real plus to make the chord diagram -understandable. In the example below, you can hover a specific group to -highlight all its connections.

-
-
library(chorddiag)
+)
+
+

+ +

+

+ Note: this plot is made using the circlize library, and + very strongly inspired from the + Migest package from + Guy J. Abel. Read more about this + story + here. +

+
+
+

What for

+
+

+ Chord diagrams are eye catching and quite popular in data + visualization. They allow to visualize + weigthed relationships between several entities. They + are adapted for several specific situations that slightly modify the + output and the way to read them: +

+
    +
  • +

    + Flow. This is the example decribed in the chord diagram above. + But two ways to represent it: +

    +
      +
    • One asymetric arc per pair
    • +
    • Two arcs per pair
    • +
    +
  • +
  • +

    + Bipartite: nodes are grouped in a few categories. Connections go + between categories but not within categories. + In my opinion + sankey diagrams + are more adapted in this situation. +

    +
  • +
+

Note: this section is under construction.

+
+
+

Variation

+
+

+ Interactivity is a real plus to make the chord diagram + understandable. In the example below, you can hover a specific group + to highlight all its connections. +

+
+
+
library(chorddiag)
 m <- matrix(c(11975,  5871, 8916, 2868,
               1951, 10048, 2060, 6171,
               8010, 16145, 8090, 8045,
@@ -420,207 +584,315 @@ 

Variation

prefer = haircolors) groupColors <- c("#000000", "#FFDD89", "#957244", "#F26223") -chorddiag(m, groupColors = groupColors, groupnamePadding = 20)
-
- -
-

Note: this example comes from the chorddiag package -documentation.

-
-
-

Common mistakes

-
-
    -
  • The group order around the circle is important. Try to minimize the -number of arc crossing.
  • -
  • Mind over-cluttering that makes the figure unreadable. It is advised -to dismiss weak connections.
  • -
  • Chord diagrams are not straightforward to understand at all. Give -plenty of explanation to your audience when you present it. It is -advised to break down the graphic, presenting its components -progressively. See this awesome example of storytelling from Nadieh -Bremer.
  • -
-
- -
-

Build your own

-
-

The R, -Python, React and -D3 graph galleries -are 4 websites providing hundreds of chart example, always providing the -reproducible code. Click the button below to see how to build the chart -you need with your favorite programing language.

-

-R -graph gallery -Python -gallery -React -gallery -D3 -gallery -

-
-
- -
-
-
chorddiag(m, groupColors = groupColors, groupnamePadding = 20)
+
+
+ + +

+ Note: this example comes from the + chorddiag + package documentation. +

+
+
+

Common mistakes

+
+
    +
  • + The group order around the circle is important. Try to minimize + the number of arc crossing. +
  • +
  • + Mind over-cluttering that makes the figure unreadable. It is + advised to dismiss weak connections. +
  • +
  • + Chord diagrams are not straightforward to understand at all. Give + plenty of explanation to your audience when you present it. It is + advised to break down the graphic, presenting its components + progressively. See this awesome example of storytelling from + Nadieh Bremer. +
  • +
+
+ +
+
+

Build your own

+
+

+ The R, + Python, + React + and D3 graph + galleries are 4 websites providing hundreds of chart example, always + providing the reproducible code. Click the button below to see how + to build the chart you need with your favorite programing language. +

+

+ R graph gallery + Python gallery + React gallery + D3 gallery +

+
+
+ +
+
+
+

+ Dataviz decision tree +

+

+ Data To Viz is a + comprehensive classification of chart types organized by + data input format. Get a high-resolution version of our decision + tree delivered to your inbox now! +

+
+ +
+
+
+ High Resolution Poster +
+
+
+
-   -
-

A work by Yan Holtz for data-to-viz.com

+   +
+

+ A work by Yan Holtz for + data-to-viz.com +

- - + + - -

- - - -

+ +

+ + + +

- - - + - + gtag("config", "UA-79254642-3"); + + -  +   + - + + diff --git a/story/AdjacencyMatrix.Rmd b/story/AdjacencyMatrix.Rmd index db858a2..98336fd 100644 --- a/story/AdjacencyMatrix.Rmd +++ b/story/AdjacencyMatrix.Rmd @@ -33,7 +33,7 @@ output: Adjacency and incidence matrices provide relationship between several nodes. The information they contain can have different nature, thus this document will consider several examples: -- Relationships can be `directed` and `weighted`. Like the number of people migrating from one country to another. Data used comes from this [scientific publication](https://onlinelibrary.wiley.com/doi/abs/10.1111/imre.12327) from [Gui J. Abel](http://guyabel.com). +- Relationships can be `directed` and `weighted`. Like the number of people migrating from one country to another. Data used comes from this [scientific publication](https://onlinelibrary.wiley.com/doi/abs/10.1111/imre.12327) from [Guy J. Abel](http://guyabel.com). ```{r, warning=FALSE, message=FALSE} # Libraries library(tidyverse) @@ -80,7 +80,7 @@ dataUU %>% head(3) %>% select(1:4) %>% kable() %>% A chord diagram is a good way to represent migration flows. It works well if your data are directed and weighted like for migration flows between country. -Disclaimer: this plot is made using the circlize library, and very strongly inspired from the [Migest package](https://github.com/cran/migest) from [Gui J. Abel](http://guyabel.com), who is also the author of the migration [dataset](https://www.oeaw.ac.at/fileadmin/subsites/Institute/VID/PDF/Publications/Working_Papers/WP2016_02.pdf) used here. +Disclaimer: this plot is made using the circlize library, and very strongly inspired from the [Migest package](https://github.com/cran/migest) from [Guy J. Abel](http://guyabel.com), who is also the author of the migration [dataset](https://www.oeaw.ac.at/fileadmin/subsites/Institute/VID/PDF/Publications/Working_Papers/WP2016_02.pdf) used here. Since this kind of graphic is used to display flows, it can be applied only on networks in which connections are `weighted`. It does not work for the other example on authors connections. diff --git a/story/AdjacencyMatrix.html b/story/AdjacencyMatrix.html index 08bb265..98b7ee5 100644 --- a/story/AdjacencyMatrix.html +++ b/story/AdjacencyMatrix.html @@ -1,8 +1,6 @@ - + @@ -54,142 +52,280 @@

- + - + - + - + - + - + - AdjacencyMatrix.knit - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - + - - - + + + - -
- - - -

-
-

Adjacency and incidence matrices provide relationship between several -nodes. The information they contain can have different nature, thus this -document will consider several examples:

-
    -
  • Relationships can be directed and -weighted. Like the number of people migrating from one -country to another. Data used comes from this scientific -publication from Gui J. Abel.
  • -
-
# Libraries
+      
+      

+
+

+ Adjacency and incidence matrices provide relationship between several + nodes. The information they contain can have different nature, thus + this document will consider several examples: +

+
    +
  • + Relationships can be directed and + weighted. Like the number of people migrating from one + country to another. Data used comes from this + scientific publication + from Guy J. Abel. +
  • +
+
+
# Libraries
 library(tidyverse)
 library(hrbrthemes)
 library(circlize)
@@ -313,171 +451,142 @@
 
 # show data
 data %>% head(3) %>% select(1:3) %>% kable() %>%
-  kable_styling(bootstrap_options = "striped", full_width = F)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -Africa - -East.Asia - -Europe -
-Africa - -3.142471 - -0.000000 - -2.107883 -
-East Asia - -0.000000 - -1.630997 - -0.601265 -
-Europe - -0.000000 - -0.000000 - -2.401476 -
-
    -
  • Relationships can be undirected and -unweighted. I will consider all the co-authors of a -researcher and study who is connected through a common publication. Data -have been retrieved using the scholar package, the -pipeline is describe in this github -repository. The result is an adjacency matrix with about 100 -researchers, filled with 1 if they have published a paper together, 0 -otherwise.
  • -
-
# Load data
+  kable_styling(bootstrap_options = "striped", full_width = F)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AfricaEast.AsiaEurope
Africa3.1424710.0000002.107883
East Asia0.0000001.6309970.601265
Europe0.0000000.0000002.401476
+
    +
  • + Relationships can be undirected and + unweighted. I will consider all the co-authors of a + researcher and study who is connected through a common publication. + Data have been retrieved using the + scholar package, + the pipeline is describe in this + github repository. The result is an adjacency matrix with about 100 researchers, + filled with 1 if they have published a paper together, 0 otherwise. +
  • +
+
+
# Load data
 dataUU <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/13_AdjacencyUndirectedUnweighted.csv", header=TRUE)
 
 # show data
 dataUU %>% head(3) %>% select(1:4) %>% kable() %>%
-  kable_styling(bootstrap_options = "striped", full_width = F)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-from - -A.Bateman - -A.Besnard - -A.Breil -
-A Armero - -NA - -NA - -1 -
-A Bateman - -NA - -NA - -NA -
-A Besnard - -NA - -NA - -NA -
-
    -
  • Relationships can also be undirected and -weighted

  • -
  • Relationships can also be directed and -unweighted

  • -
-
-

Chord diagram

-
-

A chord diagram is a good way to represent migration flows. It works -well if your data are directed and weighted like for migration flows -between country.

-

Disclaimer: this plot is made using the circlize library, and very -strongly inspired from the Migest package from Gui J. Abel, who is also the author of the -migration dataset -used here.

-

Since this kind of graphic is used to display flows, it can be -applied only on networks in which connections are weighted. -It does not work for the other example on authors connections.

-
# short names
+  kable_styling(bootstrap_options = "striped", full_width = F)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
fromA.BatemanA.BesnardA.Breil
A ArmeroNANA1
A BatemanNANANA
A BesnardNANANA
+
    +
  • +

    + Relationships can also be undirected and + weighted +

    +
  • +
  • +

    + Relationships can also be directed and + unweighted +

    +
  • +
+
+

Chord diagram

+
+

+ A chord diagram is a good way to represent migration flows. It works + well if your data are directed and weighted like for migration flows + between country. +

+

+ Disclaimer: this plot is made using the circlize library, and very + strongly inspired from the + Migest package from + Guy J. Abel, who is also the author + of the migration + dataset + used here. +

+

+ Since this kind of graphic is used to display flows, it can be + applied only on networks in which connections are + weighted. It does not work for the other example on + authors connections. +

+
+
# short names
 colnames(data) <- c("Africa", "East Asia", "Europe", "Latin Ame.",   "North Ame.",   "Oceania", "South Asia", "South East Asia", "Soviet Union", "West.Asia")
 rownames(data) <- colnames(data)
 
@@ -535,8 +644,9 @@ 

Chord diagram

major.tick.percentage = 0.5, labels.niceFacing = FALSE) } -)
-
## `major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
+)
+
+
## `major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
 ## `major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
 ## `major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
 ## `major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
@@ -546,27 +656,44 @@ 

Chord diagram

## `major.tick.percentage` is not used any more, please directly use argument `major.tick.length`. ## `major.tick.percentage` is not used any more, please directly use argument `major.tick.length`. ## `major.tick.percentage` is not used any more, please directly use argument `major.tick.length`.
-

-In my opinion this is a powerful way to display information. Major flows -are easy to detect, like the migration from South Asia towards Westa -Asia, or Africa to Europe. Moreover, for each continent it is quite easy -to quantify the proportion of people leaving and arriving.

-

However chord diagram is not an usual way of displaying information. -Thus, it is advised to give a good amount of explanation to educate your -audience. A good way to do so is to draw just a few connections in a -first step, before displaying the whole graphic. See this blog post by -Nadieh -Bremer for more ideas on this topic.

-
-
-

Sankey diagram

-
-

Sankey diagram is another option -to display weighted connection. Intead of displaying regions on a -circle, they are duplicated and represented on both sides of the -graphic. Origin is usually on the left, destination on the right.

-
# Package
+          

+ + In my opinion this is a powerful way to display information. Major + flows are easy to detect, like the migration from South Asia towards + Westa Asia, or Africa to Europe. Moreover, for each continent it is + quite easy to quantify the proportion of people leaving and + arriving. +

+

+ However chord diagram is not an usual way of displaying information. + Thus, it is advised to give a good amount of explanation to educate + your audience. A good way to do so is to draw just a few connections + in a first step, before displaying the whole graphic. See this blog + post by + Nadieh Bremer + for more ideas on this topic. +

+
+
+

Sankey diagram

+
+

+ Sankey diagram is another option + to display weighted connection. Intead of displaying regions on a + circle, they are duplicated and represented on both sides of the + graphic. Origin is usually on the left, destination on the right. +

+
+
# Package
 library(networkD3)
 
 # I need a long format
@@ -591,20 +718,125 @@ 

Sankey diagram

sankeyNetwork(Links = data_long, Nodes = nodes, Source = "IDsource", Target = "IDtarget", Value = "value", NodeID = "name", - sinksRight=FALSE, colourScale=ColourScal, nodeWidth=40, fontSize=13, nodePadding=20)
-
- -
-
-

Heatmap

-
-

The heatmap is -another great alternative to represent an adjacency matrix. Here, all -the origin countries are represented as row, and all the destination as -columns. The diagonal pops out with a lot of yellow squares, which means -that most of the migrations are intra continental.

-
library(heatmaply)
+                     sinksRight=FALSE, colourScale=ColourScal, nodeWidth=40, fontSize=13, nodePadding=20)
+
+
+ +
+
+

Heatmap

+
+

+ The + heatmap + is another great alternative to represent an adjacency matrix. Here, + all the origin countries are represented as row, and all the + destination as columns. The diagonal pops out with a lot of yellow + squares, which means that most of the migrations are intra + continental. +

+
+
library(heatmaply)
 p <- heatmaply(data,
         dendrogram = "none",
         xlab = "", ylab = "",
@@ -621,20 +853,1016 @@ 

Heatmap

labCol = colnames(data), labRow = rownames(data), heatmap_layers = theme(axis.line=element_blank()) - )
-
-
- -
-



-

Note that if the matrix is unweighted, each connection -can have only 2 values: 1 if there is a connection, 0 otherwise. It is -the case for the co-authorship network example, where researchers are -connected if they have already published a paper together. The heatmap -below shows these connections and also applies a clustering algorithm to -the data: researchers that tend to be involved in the same papers are -grouped together.

-
# Format data
+        )
+
+
+
+ +
+



+

+ Note that if the matrix is unweighted, each connection + can have only 2 values: 1 if there is a connection, 0 otherwise. It + is the case for the co-authorship network example, where researchers + are connected if they have already published a paper together. The + heatmap below shows these connections and also applies a clustering + algorithm to the data: researchers that tend to be involved in the + same papers are grouped together. +

+
+
# Format data
 tmp <- dataUU
 rownames(tmp) <- tmp$from
 tmp <- tmp %>% select(-from)
@@ -660,33 +1888,7560 @@ 

Heatmap

labCol = colnames(tmp), labRow = rownames(tmp), heatmap_layers = theme(axis.line=element_blank()) - )
-
-
- -
-
-
-

Network

-
-

Since an adjacency matrix is a network structure, it is -possible to build a network graph. -In a network graph, each entity is represented as a node, -and each connection as an edge.

-

In my opinion, this type of representation makes more sense when the -connections are unweighted, since drawing edges with -different sizes tends to clutter the figure and make it unreadable.

-

Thus, here is an application of this chart type to the coauthor -network. Researchers are the nodes, represented as dots. If 2 -researchers have published at least one scientific paper together, they -are connected. The node size is proportionnal to the number of -coauthors.

-
-
-
-
# Transform the adjacency matrix in a long format
+        )
+
+
+
+ +
+
+
+

Network

+
+

+ Since an adjacency matrix is a network structure, it is + possible to build a + network graph. In a network graph, each entity is represented as a + node, and each connection as an edge. +

+

+ In my opinion, this type of representation makes more sense when the + connections are unweighted, since drawing edges with + different sizes tends to clutter the figure and make it unreadable. +

+

+ Thus, here is an application of this chart type to the coauthor + network. Researchers are the nodes, represented as dots. If 2 + researchers have published at least one scientific paper together, + they are connected. The node size is proportionnal to the number of + coauthors. +

+
+
+
+
+
# Transform the adjacency matrix in a long format
 connect <- dataUU %>%
   gather(key="to", value="value", -1) %>%
   mutate(to = gsub("\\.", " ",to)) %>%
@@ -740,31 +9495,49 @@ 

Network

plot.margin=unit(c(0,0,0,0), "null"), panel.spacing=unit(c(0,0,0,0), "null") ) + - expand_limits(x = c(-1.2, 1.2), y = c(-1.2, 1.2))
-

-
-
-

Network -graphs are very powerful to study the global structure of the -network. Here, a few groups of researchers are isolated. Each actually -represents one single paper where Vincent Ranwez was involved. In the -middle a massive network of researchers appear: these are the people who -Vincent published with most often, and are therefore all linked -together.

-


-

However, network charts are very bad at annotating every single -points: names tend to overlap edges making the figure unreadable. The -arc diagram described below is a good alternative if you want to show -labels.

-
-

Chord diagram (again)

-
-

Instead of using a custom algorithm to position each nodes, it is -possible to place them all around a circle, making a chord diagram. But -this kind of chart makes sense only if the order of nodes around the -circle is carefully chosen, to avoid having a cluttered and unreadable -figure.

-
# Transform the adjacency matrix in a long format
+  expand_limits(x = c(-1.2, 1.2), y = c(-1.2, 1.2))
+
+

+ +

+
+
+

+ Network graphs + are very powerful to study the global structure of the network. Here, + a few groups of researchers are isolated. Each actually represents one + single paper where Vincent Ranwez was involved. In the middle a + massive network of researchers appear: these are the people who + Vincent published with most often, and are therefore all linked + together. +

+


+

+ However, network charts are very bad at annotating every single + points: names tend to overlap edges making the figure unreadable. The + arc diagram described below is a good alternative if you want to show + labels. +

+
+

Chord diagram (again)

+
+

+ Instead of using a custom algorithm to position each nodes, it is + possible to place them all around a circle, making a chord diagram. + But this kind of chart makes sense only if the order of nodes around + the circle is carefully chosen, to avoid having a cluttered and + unreadable figure. +

+
+
# Transform the adjacency matrix in a long format
 connect <- dataUU %>%
   gather(key="to", value="value", -1) %>%
   mutate(to = gsub("\\.", " ",to)) %>%
@@ -825,20 +9598,39 @@ 

Chord diagram (again)

plot.margin=unit(c(0,0,0,0), "null"), panel.spacing=unit(c(0,0,0,0), "null") ) + - expand_limits(x = c(-1.2, 1.2), y = c(-1.2, 1.2))
-

-
-
-

Arc diagram

-
-

An arc diagram follows the same concept, but displays nodes along a -single axis and links with arcs. The main advantage is that it allows to -make the labels easy to read.

-
-
-
-
# Make the graph
+  expand_limits(x = c(-1.2, 1.2), y = c(-1.2, 1.2))
+
+

+ +

+
+
+

Arc diagram

+
+

+ An arc diagram follows the same concept, but displays nodes along a + single axis and links with arcs. The main advantage is that it + allows to make the labels easy to read. +

+
+
+
+
+
# Make the graph
 ggraph(mygraph, layout="linear") +
   geom_edge_arc(edge_colour="black", edge_alpha=0.2, edge_width=0.3, fold=TRUE) +
   geom_node_point(aes(size=n, color=as.factor(grp), fill=grp), alpha=0.5) +
@@ -851,68 +9643,45 @@ 

Arc diagram

plot.margin=unit(c(0,0,0.4,0), "null"), panel.spacing=unit(c(0,0,3.4,0), "null") ) + - expand_limits(x = c(-1.2, 1.2), y = c(-5.6, 1.2))
-

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Going further

-
-

- You can learn more about each type of graphic presented in this - story in the dedicated sections. Click the icon below: -

- - - - - - - - - - -
+ expand_limits(x = c(-1.2, 1.2), y = c(-5.6, 1.2))
+
+

+ +

+
+
+ +

Going further

+
+

+ You can learn more about each type of graphic presented in this story + in the dedicated sections. Click the icon below: +

+ + + + + + + + + + +
-   -
-

A work by Yan Holtz for data-to-viz.com

- - - +   +
+

+ A work by Yan Holtz for + data-to-viz.com +

- -

- - - -

+ + - - - - + + + +   + - + +