deseq2-code-blocks

Deseq2 analysis link

Plots

Elbow plot is for determining the ideal number of clusters, can also try silhouette.

scan_k_clusters <- function(mat, centers = 1:20, ...){
  map(
    centers,
    function(k){
      km <- kmeans(x = mat, centers = k, ...)
    }
  )
}

summarize_kmeans_search <- function(km_list){
  map_df(
    km_list,
    function(x){
      ratio <- x$betweenss / x$totss
      tibble(
        k = max(x$cluster),
        tot.withinss = x$tot.withinss,
        betweenss = x$betweenss,
        totss = x$totss,
        prop_variance_explained = betweenss / totss,
      )
    }
  )
}

plot_k_elbow <- function(.data, xintercept = NULL){
  plt <-
    ggplot(.data,aes(x = k, y = prop_variance_explained))+
    geom_line()+
    geom_point()+
    labs(
      x = "Number of clusters",
      y = "Proportion of variance"
    )+
    theme(
      panel.background = element_blank(),
      axis.line = element_line(color = "#000000")
    )
  if(!is.null(xintercept)){
    plt <-
      plt +
      geom_vline(xintercept = xintercept, linetype = 2)+
      annotate("text",x = xintercept,y = Inf,label = xintercept, hjust = -1, vjust = 1)
  }
  plt
}
convert_to_long <- function(mat, clusters, coldata_tbl) {
  sample_ids <- colnames(mat)

  mat[names(clusters), ] |>
    as_tibble(rownames = "gene_id") |>
    dplyr::mutate(cluster = clusters[gene_id]) |>
    tidyr::pivot_longer(
      cols      = c(-gene_id, -cluster),
      names_to  = "sample_id",
      values_to = "score"
    ) |>
    dplyr::left_join(coldata_tbl, by = "sample_id")
}

plot_k_genes <- function(df, line_alpha = 0.1, nrow = 3,x_title = "Status", y_title = "Mean Score"){
  df |>
    summarize(
      mean_score = mean(score),
      .by = c(gene_id,cluster,hpv_type)
    ) |>
    ggplot(
      aes(x = hpv_type, y = mean_score, group = gene_id)
    )+
    geom_line(alpha = line_alpha, color = "#afafaf")+
    stat_summary(aes(group = cluster), geom = "line",fun = mean, color = "#0000ff",linewidth = 1)+
    facet_wrap(~cluster,nrow = nrow)+
    labs(
      x = x_title,
      y = y_title
    )+
    theme(panel.background = element_blank(),axis.line = element_line(colour = "#000000"))
}