fork download
  1. # Install and load necessary packages (if needed)
  2. if(!require(dplyr)){install.packages("dplyr")}
  3. if(!require(ggplot2)){install.packages("ggplot2")}
  4. if(!require(DescTools)){install.packages("DescTools")}
  5. library(dplyr)
  6. library(ggplot2)
  7. library(DescTools)
  8.  
  9.  
  10. # Assuming your dataset is named 'surgery_data'
  11. # (Adapt this to your actual dataset name)
  12. # Example dataset using built in:
  13. surgery_data <- airquality
  14. surgery_data$mort30 <- sample(c(0,1),nrow(surgery_data), replace = T)
  15. surgery_data$categorical1 <- sample(c("A","B","C"), nrow(surgery_data), replace = T)
  16. surgery_data$categorical2 <- sample(c("X","Y"), nrow(surgery_data), replace = T)
  17.  
  18.  
  19. # Get variable names
  20. variable_names <- names(surgery_data)
  21.  
  22. # Remove the outcome variable mort30 from the list and any categorical variables that should not be included
  23. variable_names <- variable_names[variable_names != "mort30" ]
  24. categorical_variables_exclude <- c("categorical1", "categorical2")
  25.  
  26.  
  27.  
  28. # Loop through each variable
  29. for(variable in variable_names){
  30. cat("\n---------------------------------------------------\n")
  31. cat("Analyzing variable:", variable, "\n")
  32.  
  33. # Check if the variable is categorical (character or factor)
  34. if(is.character(surgery_data[[variable]]) || is.factor(surgery_data[[variable]]) || variable %in% categorical_variables_exclude){
  35. cat(" Type: Categorical\n")
  36.  
  37. # Calculate proportions of mort30 within each category
  38. prop <- surgery_data %>%
  39. group_by(.data[[variable]]) %>%
  40. summarize(
  41. mort30_prop = mean(mort30, na.rm = TRUE), # Proportion of mort30=1
  42. n = n() # Number of records in each category
  43. )
  44.  
  45. print(prop)
  46.  
  47. # Visualize using a bar chart (optional)
  48. p <- ggplot(prop, aes(x = .data[[variable]], y= mort30_prop)) +
  49. geom_col() +
  50. labs(title= paste("Proportion of mort30 for ", variable, "Categories"),
  51. x = variable,
  52. y = "Proportion mort30:1")
  53. print(p)
  54.  
  55. # perform Chi-square test
  56. xtab <- table(surgery_data[[variable]], surgery_data$mort30)
  57. if(min(xtab) >=10){
  58. chi_test <- chisq.test(xtab)
  59. print(chi_test)
  60. } else if(min(xtab)>0){
  61. print(fisher.test(xtab))
  62. } else{
  63. print("Not sufficient data to perform association test")
  64.  
  65. }
  66.  
  67. } else if(is.numeric(surgery_data[[variable]])){ # Check if the variable is continuous
  68. cat(" Type: Continuous\n")
  69. # Calculate summary statistics related to mort30=0 and mort30=1
  70. summ_by_mort<- surgery_data %>%
  71. group_by(mort30) %>%
  72. summarize(
  73. mean = mean(.data[[variable]], na.rm = TRUE),
  74. median = median(.data[[variable]], na.rm = TRUE),
  75. sd = sd(.data[[variable]], na.rm = TRUE),
  76. min = min(.data[[variable]], na.rm = TRUE),
  77. max = max(.data[[variable]], na.rm = TRUE),
  78. IQR= IQR(.data[[variable]], na.rm = TRUE),
  79. n =n()
  80. )
  81. print(summ_by_mort)
  82.  
  83. # Visualize using boxplots
  84. p <- ggplot(surgery_data, aes(x = factor(mort30), y = .data[[variable]])) +
  85. geom_boxplot() +
  86. labs(title = paste("Box Plot of", variable, "by mort30"),
  87. x = 'mort30',
  88. y = variable)
  89. print(p)
  90.  
  91. # perform T or Mann-Whitney test
  92. if(nrow(surgery_data[surgery_data$mort30 ==0,]) >10 &&
  93. nrow(surgery_data[surgery_data$mort30 ==1,]) >10){
  94. normality <- shapiro.test(surgery_data[[variable]])
  95. if(normality$p.value >0.05){
  96. t_test <- t.test(.data[[variable]]~mort30, data = surgery_data)
  97. print(t_test)
  98. } else {
  99. mw_test<- wilcox.test(.data[[variable]]~mort30, data = surgery_data)
  100. print(mw_test)
  101. }
  102. } else {
  103. print("Not sufficient data to perform association tests for continuous variables")
  104. }
  105.  
  106. }else {
  107. cat(" Type: Not Categorical or Continuous, check data\n")
  108. }
  109. }
Success #stdin #stdout #stderr 0.73s 54464KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Loading required package: dplyr

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: ggplot2
Loading required package: DescTools
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
Warning in install.packages("DescTools") :
  'lib = "/usr/local/lib/R/site-library"' is not writable
Error in install.packages("DescTools") : unable to install packages
In addition: Warning message:
In library(package, lib.loc = lib.loc, character.only = TRUE, logical.return = TRUE,  :
  there is no package called ‘DescTools’
Execution halted