An R Markdown document converted from “HW1.ipynb”

Starting fresh

rm(list = ls())


Get the Data

# Opening a file from a url (file in Excel), name it 'fragility23'
# linkGit stores the URL to the raw Excel file on GitHub
linkGit <- "https://github.com/DACSS-Fundamentals/overview/raw/refs/heads/main/FSI-2023-DOWNLOAD.xlsx"

#install.packages('rio') #package needed for importing Excel files from URLs
library(rio)

## Warning: package 'rio' was built under R version 4.4.3

# rio::import() to read files directly from URLs.
fragility23 <- rio::import(file = linkGit)


Exploratory commands

# names() returns a vector of all column names in the dataframe
names(x = fragility23)

##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "S1: Demographic Pressures"        "S2: Refugees and IDPs"           
##  [7] "C3: Group Grievance"              "E3: Human Flight and Brain Drain"
##  [9] "E2: Economic Inequality"          "E1: Economy"                     
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "C1: Security Apparatus"          
## [15] "C2: Factionalized Elites"         "X1: External Intervention"

# str() shows the structure of the object including data types
# finds out if numeric columns have been read as should
str(object = fragility23)

## 'data.frame':    179 obs. of  16 variables:
##  $ Country                         : chr  "Somalia" "Yemen" "South Sudan" "Congo Democratic Republic" ...
##  $ Year                            : num  2023 2023 2023 2023 2023 ...
##  $ Rank                            : chr  "1st" "2nd" "3rd" "4th" ...
##  $ Total                           : num  112 109 109 107 107 ...
##  $ S1: Demographic Pressures       : num  10 9.6 9.7 9.7 7.4 9.2 8.8 9.3 9.5 8.8 ...
##  $ S2: Refugees and IDPs           : num  9 9.6 10 9.8 9.1 8.6 9.6 9.5 9 7.7 ...
##  $ C3: Group Grievance             : num  8.7 8.8 8.6 9.4 9.1 8.3 9.3 8.1 8.1 5.5 ...
##  $ E3: Human Flight and Brain Drain: num  8.6 6.4 6.5 6.4 8 8.5 7.5 6.2 7.7 8.3 ...
##  $ E2: Economic Inequality         : num  9.1 7.9 8.6 8.4 6.5 8.2 8.5 9.6 8.7 9.2 ...
##  $ E1: Economy                     : num  9.5 9.9 8.6 8.1 9.6 9.6 9.3 8.2 8.4 8.9 ...
##  $ P1: State Legitimacy            : num  9.6 9.8 9.8 9.3 10 9.4 9.4 8.9 9.1 9.9 ...
##  $ P2: Public Services             : num  9.8 9.6 9.7 9.3 9 10 8.6 10 9.6 9.8 ...
##  $ P3: Human Rights                : num  9 9.6 8.7 9.3 9.1 8.7 9.2 9.1 8.4 8.7 ...
##  $ C1: Security Apparatus          : num  9.5 8.6 9.9 8.8 9.4 9.7 8.3 8 8.7 6.8 ...
##  $ C2: Factionalized Elites        : num  10 9.9 9.2 9.6 9.9 8.7 9.6 9.4 9.5 9.7 ...
##  $ X1: External Intervention       : num  9.1 9.2 9.2 9.1 10 7.7 8.1 9.4 7.9 9.6 ...

# Show me the first 10 rows
# head() displays the first n rows of the dataframe
head(x = fragility23, 10)

##                      Country Year Rank Total S1: Demographic Pressures
## 1                    Somalia 2023  1st 111.9                      10.0
## 2                      Yemen 2023  2nd 108.9                       9.6
## 3                South Sudan 2023  3rd 108.5                       9.7
## 4  Congo Democratic Republic 2023  4th 107.2                       9.7
## 5                      Syria 2023  5th 107.1                       7.4
## 6                Afghanistan 2023  6th 106.6                       9.2
## 7                      Sudan 2023  7th 106.2                       8.8
## 8   Central African Republic 2023  8th 105.7                       9.3
## 9                       Chad 2023  9th 104.6                       9.5
## 10                     Haiti 2023 10th 102.9                       8.8
##    S2: Refugees and IDPs C3: Group Grievance E3: Human Flight and Brain Drain
## 1                    9.0                 8.7                              8.6
## 2                    9.6                 8.8                              6.4
## 3                   10.0                 8.6                              6.5
## 4                    9.8                 9.4                              6.4
## 5                    9.1                 9.1                              8.0
## 6                    8.6                 8.3                              8.5
## 7                    9.6                 9.3                              7.5
## 8                    9.5                 8.1                              6.2
## 9                    9.0                 8.1                              7.7
## 10                   7.7                 5.5                              8.3
##    E2: Economic Inequality E1: Economy P1: State Legitimacy P2: Public Services
## 1                      9.1         9.5                  9.6                 9.8
## 2                      7.9         9.9                  9.8                 9.6
## 3                      8.6         8.6                  9.8                 9.7
## 4                      8.4         8.1                  9.3                 9.3
## 5                      6.5         9.6                 10.0                 9.0
## 6                      8.2         9.6                  9.4                10.0
## 7                      8.5         9.3                  9.4                 8.6
## 8                      9.6         8.2                  8.9                10.0
## 9                      8.7         8.4                  9.1                 9.6
## 10                     9.2         8.9                  9.9                 9.8
##    P3: Human Rights C1: Security Apparatus C2: Factionalized Elites
## 1               9.0                    9.5                     10.0
## 2               9.6                    8.6                      9.9
## 3               8.7                    9.9                      9.2
## 4               9.3                    8.8                      9.6
## 5               9.1                    9.4                      9.9
## 6               8.7                    9.7                      8.7
## 7               9.2                    8.3                      9.6
## 8               9.1                    8.0                      9.4
## 9               8.4                    8.7                      9.5
## 10              8.7                    6.8                      9.7
##    X1: External Intervention
## 1                        9.1
## 2                        9.2
## 3                        9.2
## 4                        9.1
## 5                       10.0
## 6                        7.7
## 7                        8.1
## 8                        9.4
## 9                        7.9
## 10                       9.6

# Show me the last 10 rows ,tail() displays the last n rows of the dataframe
tail(x = fragility23, 10)

##         Country Year  Rank Total S1: Demographic Pressures
## 170      Sweden 2023 170th  20.6                       3.0
## 171  Luxembourg 2023 172nd  19.5                       2.4
## 172     Ireland 2023 171st  19.5                       2.8
## 173      Canada 2023 173rd  18.9                       1.2
## 174     Denmark 2023 174th  17.9                       2.3
## 175 Switzerland 2023 175th  17.8                       2.4
## 176 New Zealand 2023 176th  16.7                       1.1
## 177     Finland 2023 177th  16.0                       1.7
## 178     Iceland 2023 178th  15.7                       1.5
## 179      Norway 2023 179th  14.5                       1.4
##     S2: Refugees and IDPs C3: Group Grievance E3: Human Flight and Brain Drain
## 170                   3.7                 2.3                              0.6
## 171                   2.8                 1.5                              1.7
## 172                   1.6                 0.5                              2.5
## 173                   2.0                 2.0                              0.7
## 174                   3.0                 3.1                              1.0
## 175                   3.2                 2.1                              1.0
## 176                   1.2                 2.0                              1.6
## 177                   1.9                 0.3                              1.5
## 178                   1.5                 0.5                              1.6
## 179                   1.7                 3.1                              0.7
##     E2: Economic Inequality E1: Economy P1: State Legitimacy
## 170                     2.3         1.3                  0.5
## 171                     1.8         2.4                  0.3
## 172                     1.8         1.7                  0.5
## 173                     2.5         1.4                  0.4
## 174                     1.8         1.0                  0.3
## 175                     2.4         1.6                  0.3
## 176                     2.6         2.6                  0.5
## 177                     1.6         2.7                  0.4
## 178                     1.5         2.6                  0.4
## 179                     1.4         1.4                  0.4
##     P2: Public Services P3: Human Rights C1: Security Apparatus
## 170                 1.0              1.5                    2.1
## 171                 1.3              1.1                    0.4
## 172                 1.9              1.6                    2.1
## 173                 1.7              1.9                    2.2
## 174                 1.7              0.6                    1.1
## 175                 1.6              0.4                    1.4
## 176                 1.1              0.5                    1.6
## 177                 1.0              0.5                    2.0
## 178                 0.9              0.4                    0.4
## 179                 1.0              0.4                    1.4
##     C2: Factionalized Elites X1: External Intervention
## 170                      1.8                       0.5
## 171                      3.4                       0.4
## 172                      1.5                       1.0
## 173                      2.5                       0.4
## 174                      1.4                       0.6
## 175                      1.0                       0.4
## 176                      1.4                       0.5
## 177                      1.4                       1.0
## 178                      1.8                       2.6
## 179                      1.1                       0.5

Transformative commands

# Keep Country, Total, S1: Demographic Pressures, P1: State Legitimacy, E2: Economic Inequality into object 'frag23_sub'

# Using grep() to find column names matching a pattern
# grep() searches for patterns in the column names
# pattern = "Country|S1|P1|E2|Total" means find columns with any of these names
# x = names(fragility23) searches within the column names
# fixed = F allows for regex pattern matching
# value = T returns the actual names (not positions)
grep(pattern = "Country|S1|P1|E2|Total", x = names(fragility23), fixed = F, value = T)

## [1] "Country"                   "Total"                    
## [3] "S1: Demographic Pressures" "E2: Economic Inequality"  
## [5] "P1: State Legitimacy"

# Using keep-grep to select only the columns we want
# This combines grep with subsetting to keep only matched columns
# names(fragility23) gets all column names
# fixed = F allows pattern matching
# value = T returns column names instead of positions
keep <- grep("Country|S1|P1|E2|Total", names(fragility23), fixed = F, value = T)

# Create the subset dataframe using the keep vector
frag23_sub <- fragility23[, keep]

# see the structure of the new subset
str(frag23_sub)

## 'data.frame':    179 obs. of  5 variables:
##  $ Country                  : chr  "Somalia" "Yemen" "South Sudan" "Congo Democratic Republic" ...
##  $ Total                    : num  112 109 109 107 107 ...
##  $ S1: Demographic Pressures: num  10 9.6 9.7 9.7 7.4 9.2 8.8 9.3 9.5 8.8 ...
##  $ E2: Economic Inequality  : num  9.1 7.9 8.6 8.4 6.5 8.2 8.5 9.6 8.7 9.2 ...
##  $ P1: State Legitimacy     : num  9.6 9.8 9.8 9.3 10 9.4 9.4 8.9 9.1 9.9 ...

# see beginning rows of the subset
head(frag23_sub)

##                     Country Total S1: Demographic Pressures
## 1                   Somalia 111.9                      10.0
## 2                     Yemen 108.9                       9.6
## 3               South Sudan 108.5                       9.7
## 4 Congo Democratic Republic 107.2                       9.7
## 5                     Syria 107.1                       7.4
## 6               Afghanistan 106.6                       9.2
##   E2: Economic Inequality P1: State Legitimacy
## 1                     9.1                  9.6
## 2                     7.9                  9.8
## 3                     8.6                  9.8
## 4                     8.4                  9.3
## 5                     6.5                 10.0
## 6                     8.2                  9.4

# display  renamed columns
names(frag23_sub)

## [1] "Country"                   "Total"                    
## [3] "S1: Demographic Pressures" "E2: Economic Inequality"  
## [5] "P1: State Legitimacy"

Filtering

# order the dataframe by E2 in ascending order (best = lowest values) to get top ten best countries on e2
ordered_by_E2 <- frag23_sub[order(frag23_sub$`E2: Economic Inequality`), ]

# take the first 10 rows (top 10 best/lowest E2 scores)
top10_best_E2 <- ordered_by_E2[1:10, ]
print(top10_best_E2)

##             Country Total S1: Demographic Pressures E2: Economic Inequality
## 179          Norway  14.5                       1.4                     1.4
## 178         Iceland  15.7                       1.5                     1.5
## 177         Finland  16.0                       1.7                     1.6
## 169     Netherlands  21.0                       2.5                     1.8
## 171      Luxembourg  19.5                       2.4                     1.8
## 172         Ireland  19.5                       2.8                     1.8
## 174         Denmark  17.9                       2.3                     1.8
## 151  Czech Republic  40.2                       3.2                     1.9
## 155 Slovak Republic  37.8                       2.6                     2.2
## 160         Belgium  31.4                       4.2                     2.2
##     P1: State Legitimacy
## 179                  0.4
## 178                  0.4
## 177                  0.4
## 169                  0.3
## 171                  0.3
## 172                  0.5
## 174                  0.3
## 151                  3.9
## 155                  3.2
## 160                  0.8

# get the top 10 worst (highest E2 scores) using decreasing=TRUE
worst_ordered <- frag23_sub[order(frag23_sub$`E2: Economic Inequality`, decreasing = TRUE), ]
top10_worst_E2 <- worst_ordered[1:10, ]
print(top10_worst_E2)

##                     Country Total S1: Demographic Pressures
## 8  Central African Republic 105.7                       9.3
## 10                    Haiti 102.9                       8.8
## 22               Mozambique  94.0                       9.6
## 49               Madagascar  81.7                       9.6
## 1                   Somalia 111.9                      10.0
## 48                   Zambia  81.8                       9.4
## 31            Guinea Bissau  89.9                       8.9
## 39                   Angola  86.9                       9.3
## 9                      Chad 104.6                       9.5
## 3               South Sudan 108.5                       9.7
##    E2: Economic Inequality P1: State Legitimacy
## 8                      9.6                  8.9
## 10                     9.2                  9.9
## 22                     9.2                  7.1
## 49                     9.2                  6.7
## 1                      9.1                  9.6
## 48                     9.1                  6.7
## 31                     8.9                  9.1
## 39                     8.8                  8.1
## 9                      8.7                  9.1
## 3                      8.6                  9.8

Computations

# Give the statistical description of 'frag23_sub'

summary(frag23_sub)

##    Country              Total        S1: Demographic Pressures
##  Length:179         Min.   : 14.50   Min.   : 1.100           
##  Class :character   1st Qu.: 49.00   1st Qu.: 4.100           
##  Mode  :character   Median : 68.20   Median : 5.900           
##                     Mean   : 65.83   Mean   : 5.956           
##                     3rd Qu.: 82.20   3rd Qu.: 8.050           
##                     Max.   :111.90   Max.   :10.000           
##  E2: Economic Inequality P1: State Legitimacy
##  Min.   :1.400           Min.   : 0.300      
##  1st Qu.:3.650           1st Qu.: 3.650      
##  Median :5.200           Median : 6.400      
##  Mean   :5.323           Mean   : 5.741      
##  3rd Qu.:7.200           3rd Qu.: 8.100      
##  Max.   :9.600           Max.   :10.000

# The value of the worst quartile in Total with na.rm=TRUE to remove missing values before calculation
worst_quartile_value <- quantile(frag23_sub$Total, 0.75, na.rm = TRUE)
cat("Worst Quartile Threshold (75th percentile):", worst_quartile_value, "\n")

## Worst Quartile Threshold (75th percentile): 82.2

# show all quartiles
quartiles_total <- quantile(frag23_sub$Total, probs = c(0.25, 0.5, 0.75, 1), na.rm = TRUE)
print(quartiles_total)

##   25%   50%   75%  100% 
##  49.0  68.2  82.2 111.9

# Show correlations between 'S1', 'E2', 'P1' must select only the numeric columns we want to correlate
correlation_vars <- frag23_sub[, c('S1: Demographic Pressures',
                                   'P1: State Legitimacy',
                                   'E2: Economic Inequality')]

# Calculate the correlation matrix with use="complete.obs" excludes rows with any missing values
cor_matrix <- cor(correlation_vars, use = "complete.obs")

# Display the correlation matrix rounded to 3 decimal places
print("Correlation Matrix:")

## [1] "Correlation Matrix:"

print(round(cor_matrix, 3))

##                           S1: Demographic Pressures P1: State Legitimacy
## S1: Demographic Pressures                     1.000                0.657
## P1: State Legitimacy                          0.657                1.000
## E2: Economic Inequality                       0.854                0.666
##                           E2: Economic Inequality
## S1: Demographic Pressures                   0.854
## P1: State Legitimacy                        0.666
## E2: Economic Inequality                     1.000

# Test if correlation between S1 and E2 is statistically significant using cor.test() performs a hypothesis test for correlation
cor_test_S1_E2 <- cor.test(frag23_sub$`S1: Demographic Pressures`,
                           frag23_sub$`E2: Economic Inequality`)

cat("\n\nCorrelation Test: S1 (Demographic Pressures) vs E2 (Economic Inequality)\n")

## 
## 
## Correlation Test: S1 (Demographic Pressures) vs E2 (Economic Inequality)

print(cor_test_S1_E2)

## 
##  Pearson's product-moment correlation
## 
## data:  frag23_sub$`S1: Demographic Pressures` and frag23_sub$`E2: Economic Inequality`
## t = 21.814, df = 177, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8082838 0.8890834
## sample estimates:
##       cor 
## 0.8537425

# Test correlation between P1 and E2
cor_test_P1_E2 <- cor.test(frag23_sub$`P1: State Legitimacy`,
                           frag23_sub$`E2: Economic Inequality`)

cat("\n\nCorrelation Test: P1 (State Legitimacy) vs E2 (Economic Inequality)\n")

## 
## 
## Correlation Test: P1 (State Legitimacy) vs E2 (Economic Inequality)

print(cor_test_P1_E2)

## 
##  Pearson's product-moment correlation
## 
## data:  frag23_sub$`P1: State Legitimacy` and frag23_sub$`E2: Economic Inequality`
## t = 11.884, df = 177, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5757609 0.7404948
## sample estimates:
##       cor 
## 0.6661759

# test correlation between S1 and P1
cor_test_S1_P1 <- cor.test(frag23_sub$`S1: Demographic Pressures`,
                           frag23_sub$`P1: State Legitimacy`)

cat("\n\nCorrelation Test: S1 (Demographic Pressures) vs P1 (State Legitimacy)\n")

## 
## 
## Correlation Test: S1 (Demographic Pressures) vs P1 (State Legitimacy)

print(cor_test_S1_P1)

## 
##  Pearson's product-moment correlation
## 
## data:  frag23_sub$`S1: Demographic Pressures` and frag23_sub$`P1: State Legitimacy`
## t = 11.598, df = 177, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5648893 0.7331297
## sample estimates:
##       cor 
## 0.6571171

# Regress P1 and E2 on S1 predict S1 using P1 and E2 as predictors (linear regression)
# dependent variable (outcome) is S1, independent variables (predictors) are P1 and E2
regression_model <- lm(`S1: Demographic Pressures` ~ `P1: State Legitimacy` + `E2: Economic Inequality`,
                       data = frag23_sub)
#display results
summary(regression_model)

## 
## Call:
## lm(formula = `S1: Demographic Pressures` ~ `P1: State Legitimacy` + 
##     `E2: Economic Inequality`, data = frag23_sub)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6991 -0.8670  0.1260  0.7808  2.6197 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.85359    0.24234   3.522 0.000545 ***
## `P1: State Legitimacy`     0.12477    0.04024   3.100 0.002250 ** 
## `E2: Economic Inequality`  0.82389    0.05645  14.594  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.162 on 176 degrees of freedom
## Multiple R-squared:  0.7429, Adjusted R-squared:   0.74 
## F-statistic: 254.3 on 2 and 176 DF,  p-value: < 2.2e-16


Plotting



``` r
# create a histogram to show the distribution of state legitimacy
hist(frag23_sub$`P1: State Legitimacy`,
     main = "Distribution of State Legitimacy (P1)",
     xlab = "State Legitimacy Score",
     col = "steelblue",
     border = "black")

# Visual correlation between S1 and E2, color points if country is on the worst quartile of Total
# Total >= worst_quartile_value returns TRUE/FALSE for each country
in_worst_quartile <- frag23_sub$Total >= worst_quartile_value
# create colors vector: red for worst quartile, blue for others. ifelse() returns "red" when TRUE, "blue" when FALSE
point_colors <- ifelse(in_worst_quartile, "red", "blue")

# create scatter plot using color vectors
plot(frag23_sub$`S1: Demographic Pressures`,
     frag23_sub$`E2: Economic Inequality`,
     main = "Demographic Pressures vs Economic Inequality",
     xlab = "S1: Demographic Pressures",
     ylab = "E2: Economic Inequality",
     pch = 19,
     col = point_colors)

# added a legend to explain the colors
legend("topright",
       legend = c("Not in Worst Quartile", "Worst Quartile of Total"),
       col = c("blue", "red"),
       pch = 19)

# create predicted values from our regression model
frag23_sub$predicted_S1 <- predict(regression_model, frag23_sub)

# plot actual vs predicted values
plot(frag23_sub$`S1: Demographic Pressures`,
     frag23_sub$predicted_S1,
     main = "Regression Model: Actual vs Predicted S1",
     sub = "S1 predicted by P1 and E2",
     xlab = "Actual S1 (Demographic Pressures)",
     ylab = "Predicted S1",
     pch = 19,
     col = "darkgreen")

#abline() adds a straight line: intercept=0, slope=1 (if predictions were perfect, all points would fall on this line)
abline(a = 0, b = 1, lty = 2, col = "red")

# legend
legend("topleft",
       legend = c("Data points", "Perfect prediction line"),
       col = c("darkgreen", "red"),
       lty = c(NA, 2),
       pch = c(19, NA))

# Create a visual correlation matrix, load corrplot library for correlation visualization
#install.packages('corrplot')
library(corrplot)

## Warning: package 'corrplot' was built under R version 4.4.3

## corrplot 0.95 loaded

# Create a correlation plot
# corrplot() creates a visual matrix of correlations
# method="circle" uses circles sized by correlation strength, type="upper" shows only upper triangle, addCoef.col="black" adds correlation values in black text
corrplot(cor_matrix,
         method = "circle",
         type = "upper",
         tl.col = "black",
         tl.srt = 45,
         addCoef.col = "black",
         number.cex = 0.8,
         title = "Correlation Matrix: S1, P1, E2",
         mar = c(0,0,2,0))

```