Histogram

Keywords

clusterings

Examples

Callouts

path <- "~/data/raw/hydrofabric/prototype/"
flowlines_ml <- arrow::read_parquet(file.path(path,"flowline_attributes_ml.parquet",fsep = .Platform$file.sep))

in_channel_depth = (((flowlines_ml$TopWdth-flowlines_ml$BtmWdth)/2)/sin(atan(flowlines_ml$ChSlp)))*sin(90-atan(flowlines_ml$ChSlp)) %>% as.data.frame()
max(in_channel_depth$.,na.rm=T)
ggplot2::ggplot(in_channel_depth %>% dplyr::filter(.<500), ggplot2::aes(x=.)) +
  ggplot2::geom_histogram(binwidth=3, fill="#69b3a2", color="#e9ecef", alpha=0.9) +
  ggplot2::ggtitle("'in-channel' depth historgram") +
  cowplot::theme_half_open() +
  cowplot::background_grid()
base_data <- "~/data/raw/hydrofabric/v2.2/conus/conus_nextgen.gpkg"
dat <- sf::st_read(base_data,layer="divides")

datp = sf::st_transform(dat,sf::st_crs('EPSG:3857'))
datp$a5070 <- sf::st_area(sf::st_transform(dat,sf::st_crs('EPSG:5070')))
datp$an5070 <- datp$a5070 |> as.numeric()
mean_size <- mean(datp$a5070,na.rm=T)
median_size <- median(datp$a5070,na.rm=T)

chart_data <- as.data.frame(datp[datp$an5070 < 0.75e8,]$an5070)
colnames(chart_data) <- 'size'
chart_data$size = round(chart_data$size*1e-6,2)

mean_size <- mean(chart_data$size,na.rm=T)
median_size <- median(chart_data$size,na.rm=T)

chart <- ggplot2::ggplot(chart_data, ggplot2::aes(x=size)) +
  ggplot2::geom_histogram(ggplot2::aes(y=ggplot2::after_stat(count)/sum(ggplot2::after_stat(count))), binwidth = 1, center = 0.5, fill = "lightblue", color = "black") +
  ggplot2::geom_vline(ggplot2::aes(xintercept = as.numeric(mean_size), color = "mean"), linewidth = 1) +
  ggplot2::geom_vline(ggplot2::aes(xintercept = as.numeric(median_size), color = "median"), linewidth = 1) +
  ggplot2::labs(title = "NextGen Divide Area Distribution", x = "Area (km2 - EPSG:5070)", y = "Frequency", 
                caption = glue::glue("for basins under 75 km2 - {nrow(dat) - nrow(chart_data)} of {nrow(dat)} rows filtered")) +
  ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0, 0.02))) +
  ggplot2::scale_x_continuous(expand = c(0,0)) +
  ggplot2::scale_color_manual(name = "Callouts", labels = c(glue::glue("Data mean: {round(mean_size,2)} km2"), glue::glue("Data median: {median_size} km2")), values = c(mean = "red", median = "blue")) +
  cowplot::theme_half_open() +
  cowplot::background_grid() +
  ggplot2::theme(legend.position = c(.75, .95),
                 # legend.justification = c("right", "top"),
                 # legend.box.just = "right",
                 legend.margin = ggplot2::margin(6, 6, 6, 6)
  ) 

chart

_98c5dedbb98e7078a3ef610e4d37e6e8.png

Histograms and dates

https://stackoverflow.com/questions/10770698/understanding-dates-and-plotting-a-histogram-with-ggplot2-in-r