Title: | Interpretable Bivariate Density Visualization with 'ggplot2' |
---|---|
Description: | The 'ggplot2' package provides simple functions for visualizing contours of 2-d kernel density estimates. 'ggdensity' implements several additional density estimators as well as more interpretable visualizations based on highest density regions instead of the traditional height of the estimated density surface. |
Authors: | James Otto [aut, cre] |
Maintainer: | James Otto <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.0.0.900 |
Built: | 2025-03-04 04:03:19 UTC |
Source: | https://github.com/jamesotto852/ggdensity |
Perform 2D density estimation, compute and plot the resulting highest density regions.
geom_hdr()
draws filled regions and geom_hdr_lines()
draws lines outlining the regions.
Note, the plotted objects have probabilities mapped to the alpha
aesthetic by default.
stat_hdr( mapping = NULL, data = NULL, geom = "hdr", position = "identity", ..., method = "kde", probs = c(0.99, 0.95, 0.8, 0.5), n = 100, xlim = NULL, ylim = NULL, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr( mapping = NULL, data = NULL, stat = "hdr", position = "identity", ..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )
stat_hdr( mapping = NULL, data = NULL, geom = "hdr", position = "identity", ..., method = "kde", probs = c(0.99, 0.95, 0.8, 0.5), n = 100, xlim = NULL, ylim = NULL, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr( mapping = NULL, data = NULL, stat = "hdr", position = "identity", ..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )
mapping |
Set of aesthetic mappings created by |
data |
The data to be displayed in this layer. There are three options: If A A |
geom |
The geometric object to use to display the data, either as a
|
position |
Position adjustment, either as a string naming the adjustment
(e.g. |
... |
Other arguments passed on to |
method |
Density estimator to use, accepts character vector:
|
probs |
Probabilities to compute highest density regions for. |
n |
Resolution of grid defined by |
xlim , ylim
|
Range to compute and draw regions. If |
na.rm |
If |
show.legend |
logical. Should this layer be included in the legends?
|
inherit.aes |
If |
stat |
The statistical transformation to use on the data for this
layer, either as a |
geom_hdr()
and geom_hdr_lines()
understand the following aesthetics (required
aesthetics are in bold):
x
y
alpha
color
fill (only geom_hdr
)
group
linetype
linewidth
subgroup
The probability associated with the highest density region, specified
by probs
argument.
Scott, David W. Multivariate Density Estimation (2e), Wiley.
# Basic simulated data with bivariate normal data and various methods df <- data.frame(x = rnorm(1000), y = rnorm(1000)) p <- ggplot(df, aes(x, y)) + coord_equal() p + geom_hdr() p + geom_hdr(method = "mvnorm") p + geom_hdr(method = "freqpoly") # p + geom_hdr(method = "histogram") # Adding point layers on top to visually assess region estimates pts <- geom_point(size = .2, color = "red") p + geom_hdr() + pts p + geom_hdr(method = "mvnorm") + pts p + geom_hdr(method = "freqpoly") + pts # p + geom_hdr(method = "histogram") + pts # Highest density region boundary lines p + geom_hdr_lines() p + geom_hdr_lines(method = "mvnorm") p + geom_hdr_lines(method = "freqpoly") # p + geom_hdr_lines(method = "histogram") ## Not run: # 2+ groups - mapping other aesthetics in the geom rdata <- function(n, n_groups = 3, radius = 3) { list_of_dfs <- lapply(0:(n_groups-1), function(k) { mu <- c(cos(2*k*pi/n_groups), sin(2*k*pi/n_groups)) m <- MASS::mvrnorm(n, radius*mu, diag(2)) structure(data.frame(m, as.character(k)), names = c("x", "y", "c")) }) do.call("rbind", list_of_dfs) } dfc <- rdata(1000, n_groups = 5) pf <- ggplot(dfc, aes(x, y, fill = c)) + coord_equal() pf + geom_hdr() pf + geom_hdr(method = "mvnorm") pf + geom_hdr(method = "mvnorm", probs = .90, alpha = .5) pf + geom_hdr(method = "histogram") pf + geom_hdr(method = "freqpoly") pc <- ggplot(dfc, aes(x, y, color = c)) + coord_equal() + theme_minimal() + theme(panel.grid.minor = element_blank()) pc + geom_hdr_lines() pc + geom_hdr_lines(method = "mvnorm") # Data with boundaries ggplot(df, aes(x^2)) + geom_histogram(bins = 30) ggplot(df, aes(x^2)) + geom_histogram(bins = 30, boundary = 0) ggplot(df, aes(x^2, y^2)) + geom_hdr(method = "histogram") ## End(Not run)
# Basic simulated data with bivariate normal data and various methods df <- data.frame(x = rnorm(1000), y = rnorm(1000)) p <- ggplot(df, aes(x, y)) + coord_equal() p + geom_hdr() p + geom_hdr(method = "mvnorm") p + geom_hdr(method = "freqpoly") # p + geom_hdr(method = "histogram") # Adding point layers on top to visually assess region estimates pts <- geom_point(size = .2, color = "red") p + geom_hdr() + pts p + geom_hdr(method = "mvnorm") + pts p + geom_hdr(method = "freqpoly") + pts # p + geom_hdr(method = "histogram") + pts # Highest density region boundary lines p + geom_hdr_lines() p + geom_hdr_lines(method = "mvnorm") p + geom_hdr_lines(method = "freqpoly") # p + geom_hdr_lines(method = "histogram") ## Not run: # 2+ groups - mapping other aesthetics in the geom rdata <- function(n, n_groups = 3, radius = 3) { list_of_dfs <- lapply(0:(n_groups-1), function(k) { mu <- c(cos(2*k*pi/n_groups), sin(2*k*pi/n_groups)) m <- MASS::mvrnorm(n, radius*mu, diag(2)) structure(data.frame(m, as.character(k)), names = c("x", "y", "c")) }) do.call("rbind", list_of_dfs) } dfc <- rdata(1000, n_groups = 5) pf <- ggplot(dfc, aes(x, y, fill = c)) + coord_equal() pf + geom_hdr() pf + geom_hdr(method = "mvnorm") pf + geom_hdr(method = "mvnorm", probs = .90, alpha = .5) pf + geom_hdr(method = "histogram") pf + geom_hdr(method = "freqpoly") pc <- ggplot(dfc, aes(x, y, color = c)) + coord_equal() + theme_minimal() + theme(panel.grid.minor = element_blank()) pc + geom_hdr_lines() pc + geom_hdr_lines(method = "mvnorm") # Data with boundaries ggplot(df, aes(x^2)) + geom_histogram(bins = 30) ggplot(df, aes(x^2)) + geom_histogram(bins = 30, boundary = 0) ggplot(df, aes(x^2, y^2)) + geom_hdr(method = "histogram") ## End(Not run)
Compute and plot the highest density regions (HDRs) of a bivariate pdf.
geom_hdr_fun()
draws filled regions, and geom_hdr_lines_fun()
draws lines outlining the regions.
Note, the plotted objects have probabilities mapped to the alpha
aesthetic by default.
stat_hdr_fun( mapping = NULL, data = NULL, geom = "hdr_fun", position = "identity", ..., fun, args = list(), probs = c(0.99, 0.95, 0.8, 0.5), xlim = NULL, ylim = NULL, n = 100, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr_fun( mapping = NULL, data = NULL, stat = "hdr_fun", position = "identity", ..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )
stat_hdr_fun( mapping = NULL, data = NULL, geom = "hdr_fun", position = "identity", ..., fun, args = list(), probs = c(0.99, 0.95, 0.8, 0.5), xlim = NULL, ylim = NULL, n = 100, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr_fun( mapping = NULL, data = NULL, stat = "hdr_fun", position = "identity", ..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )
mapping |
Set of aesthetic mappings created by |
data |
The data to be displayed in this layer. There are three options: If A A |
geom |
The geometric object to use to display the data, either as a
|
position |
Position adjustment, either as a string naming the adjustment
(e.g. |
... |
Other arguments passed on to |
fun |
A function, the joint probability density function, must be vectorized in its first two arguments; see examples. |
args |
Named list of additional arguments passed on to |
probs |
Probabilities to compute highest density regions for. |
xlim , ylim
|
Range to compute and draw regions. If |
n |
Resolution of grid |
na.rm |
If |
show.legend |
logical. Should this layer be included in the legends?
|
inherit.aes |
If |
stat |
The statistical transformation to use on the data for this
layer, either as a |
geom_hdr_fun()
and geom_hdr_lines_fun()
understand the following aesthetics (required
aesthetics are in bold):
x
y
alpha
color
fill (only geom_hdr_fun
)
group
linetype
linewidth
subgroup
The probability associated with the highest density region, specified
by probs
.
# HDRs of the bivariate exponential f <- function(x, y) dexp(x) * dexp(y) ggplot() + geom_hdr_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10)) # HDRs of a custom parametric model # generate example data n <- 1000 th_true <- c(3, 8) rdata <- function(n, th) { gen_single_obs <- function(th) { rchisq(2, df = th) # can be anything } df <- replicate(n, gen_single_obs(th)) setNames(as.data.frame(t(df)), c("x", "y")) } data <- rdata(n, th_true) # estimate unknown parameters via maximum likelihood likelihood <- function(th) { th <- abs(th) # hack to enforce parameter space boundary log_f <- function(v) { x <- v[1]; y <- v[2] dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE) } sum(apply(data, 1, log_f)) } (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par) # plot f for the give model f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2]) ggplot(data, aes(x, y)) + geom_hdr_fun(fun = f, args = list(th = th_hat)) + geom_point(size = .25, color = "red") + xlim(0, 30) + ylim(c(0, 30)) ggplot(data, aes(x, y)) + geom_hdr_lines_fun(fun = f, args = list(th = th_hat)) + geom_point(size = .25, color = "red") + xlim(0, 30) + ylim(c(0, 30))
# HDRs of the bivariate exponential f <- function(x, y) dexp(x) * dexp(y) ggplot() + geom_hdr_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10)) # HDRs of a custom parametric model # generate example data n <- 1000 th_true <- c(3, 8) rdata <- function(n, th) { gen_single_obs <- function(th) { rchisq(2, df = th) # can be anything } df <- replicate(n, gen_single_obs(th)) setNames(as.data.frame(t(df)), c("x", "y")) } data <- rdata(n, th_true) # estimate unknown parameters via maximum likelihood likelihood <- function(th) { th <- abs(th) # hack to enforce parameter space boundary log_f <- function(v) { x <- v[1]; y <- v[2] dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE) } sum(apply(data, 1, log_f)) } (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par) # plot f for the give model f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2]) ggplot(data, aes(x, y)) + geom_hdr_fun(fun = f, args = list(th = th_hat)) + geom_point(size = .25, color = "red") + xlim(0, 30) + ylim(c(0, 30)) ggplot(data, aes(x, y)) + geom_hdr_lines_fun(fun = f, args = list(th = th_hat)) + geom_point(size = .25, color = "red") + xlim(0, 30) + ylim(c(0, 30))
Perform 2D density estimation, compute the resulting highest density regions (HDRs), and plot the provided data as a scatterplot with points colored according to their corresponding HDR.
stat_hdr_points( mapping = NULL, data = NULL, geom = "point", position = "identity", ..., method = "kde", probs = c(0.99, 0.95, 0.8, 0.5), n = 100, xlim = NULL, ylim = NULL, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr_points( mapping = NULL, data = NULL, stat = "hdr_points", position = "identity", ..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )
stat_hdr_points( mapping = NULL, data = NULL, geom = "point", position = "identity", ..., method = "kde", probs = c(0.99, 0.95, 0.8, 0.5), n = 100, xlim = NULL, ylim = NULL, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr_points( mapping = NULL, data = NULL, stat = "hdr_points", position = "identity", ..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )
mapping |
Set of aesthetic mappings created by |
data |
The data to be displayed in this layer. There are three options: If A A |
geom |
The geometric object to use to display the data, either as a
|
position |
Position adjustment, either as a string naming the adjustment
(e.g. |
... |
Other arguments passed on to |
method |
Density estimator to use, accepts character vector:
|
probs |
Probabilities to compute highest density regions for. |
n |
Number of grid points in each direction. |
xlim , ylim
|
Range to compute and draw regions. If |
na.rm |
If |
show.legend |
logical. Should this layer be included in the legends?
|
inherit.aes |
If |
stat |
The statistical transformation to use on the data for this
layer, either as a |
geom_hdr_points understands the following aesthetics (required aesthetics are in bold):
x
y
alpha
color
fill
group
linetype
size
subgroup
The probability associated with the highest density region, specified
by probs
.
set.seed(1) df <- data.frame(x = rnorm(500), y = rnorm(500)) p <- ggplot(df, aes(x, y)) + coord_equal() p + geom_hdr_points() # Setting aes(fill = after_stat(probs)), color = "black", and # shape = 21 helps alleviate overplotting: p + geom_hdr_points(aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2) # Also works well with geom_hdr_lines() p + geom_hdr_lines( aes(color = after_stat(probs)), alpha = 1, xlim = c(-5, 5), ylim = c(-5, 5) ) + geom_hdr_points( aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2, xlim = c(-5, 5), ylim = c(-5, 5) )
set.seed(1) df <- data.frame(x = rnorm(500), y = rnorm(500)) p <- ggplot(df, aes(x, y)) + coord_equal() p + geom_hdr_points() # Setting aes(fill = after_stat(probs)), color = "black", and # shape = 21 helps alleviate overplotting: p + geom_hdr_points(aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2) # Also works well with geom_hdr_lines() p + geom_hdr_lines( aes(color = after_stat(probs)), alpha = 1, xlim = c(-5, 5), ylim = c(-5, 5) ) + geom_hdr_points( aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2, xlim = c(-5, 5), ylim = c(-5, 5) )
Compute the highest density regions (HDRs) of a bivariate pdf and plot the provided data as a scatterplot with points colored according to their corresponding HDR.
stat_hdr_points_fun( mapping = NULL, data = NULL, geom = "point", position = "identity", ..., fun, args = list(), probs = c(0.99, 0.95, 0.8, 0.5), xlim = NULL, ylim = NULL, n = 100, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr_points_fun( mapping = NULL, data = NULL, stat = "hdr_points_fun", position = "identity", ..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )
stat_hdr_points_fun( mapping = NULL, data = NULL, geom = "point", position = "identity", ..., fun, args = list(), probs = c(0.99, 0.95, 0.8, 0.5), xlim = NULL, ylim = NULL, n = 100, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr_points_fun( mapping = NULL, data = NULL, stat = "hdr_points_fun", position = "identity", ..., na.rm = FALSE, show.legend = NA, inherit.aes = TRUE )
mapping |
Set of aesthetic mappings created by |
data |
The data to be displayed in this layer. There are three options: If A A |
geom |
The geometric object to use to display the data, either as a
|
position |
Position adjustment, either as a string naming the adjustment
(e.g. |
... |
Other arguments passed on to |
fun |
A function, the joint probability density function, must be vectorized in its first two arguments; see examples. |
args |
Named list of additional arguments passed on to |
probs |
Probabilities to compute highest density regions for. |
xlim , ylim
|
Range to compute and draw regions. If |
n |
Number of grid points in each direction. |
na.rm |
If |
show.legend |
logical. Should this layer be included in the legends?
|
inherit.aes |
If |
stat |
The statistical transformation to use on the data for this
layer, either as a |
geom_hdr_points_fun understands the following aesthetics (required aesthetics are in bold):
x
y
alpha
color
fill
group
linetype
size
subgroup
The probability associated with the highest density region, specified
by probs
.
# Can plot points colored according to known pdf: set.seed(1) df <- data.frame(x = rexp(1000), y = rexp(1000)) f <- function(x, y) dexp(x) * dexp(y) ggplot(df, aes(x, y)) + geom_hdr_points_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10)) # Also allows for hdrs of a custom parametric model # generate example data n <- 1000 th_true <- c(3, 8) rdata <- function(n, th) { gen_single_obs <- function(th) { rchisq(2, df = th) # can be anything } df <- replicate(n, gen_single_obs(th)) setNames(as.data.frame(t(df)), c("x", "y")) } data <- rdata(n, th_true) # estimate unknown parameters via maximum likelihood likelihood <- function(th) { th <- abs(th) # hack to enforce parameter space boundary log_f <- function(v) { x <- v[1]; y <- v[2] dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE) } sum(apply(data, 1, log_f)) } (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par) # plot f for the give model f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2]) ggplot(data, aes(x, y)) + geom_hdr_points_fun(fun = f, args = list(th = th_hat)) ggplot(data, aes(x, y)) + geom_hdr_points_fun(aes(fill = after_stat(probs)), shape = 21, color = "black", fun = f, args = list(th = th_hat), na.rm = TRUE) + geom_hdr_lines_fun(aes(color = after_stat(probs)), alpha = 1, fun = f, args = list(th = th_hat)) + lims(x = c(0, 15), y = c(0, 25))
# Can plot points colored according to known pdf: set.seed(1) df <- data.frame(x = rexp(1000), y = rexp(1000)) f <- function(x, y) dexp(x) * dexp(y) ggplot(df, aes(x, y)) + geom_hdr_points_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10)) # Also allows for hdrs of a custom parametric model # generate example data n <- 1000 th_true <- c(3, 8) rdata <- function(n, th) { gen_single_obs <- function(th) { rchisq(2, df = th) # can be anything } df <- replicate(n, gen_single_obs(th)) setNames(as.data.frame(t(df)), c("x", "y")) } data <- rdata(n, th_true) # estimate unknown parameters via maximum likelihood likelihood <- function(th) { th <- abs(th) # hack to enforce parameter space boundary log_f <- function(v) { x <- v[1]; y <- v[2] dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE) } sum(apply(data, 1, log_f)) } (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par) # plot f for the give model f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2]) ggplot(data, aes(x, y)) + geom_hdr_points_fun(fun = f, args = list(th = th_hat)) ggplot(data, aes(x, y)) + geom_hdr_points_fun(aes(fill = after_stat(probs)), shape = 21, color = "black", fun = f, args = list(th = th_hat), na.rm = TRUE) + geom_hdr_lines_fun(aes(color = after_stat(probs)), alpha = 1, fun = f, args = list(th = th_hat)) + lims(x = c(0, 15), y = c(0, 25))
Perform 1D density estimation, compute and plot the resulting highest density
regions in a way similar to ggplot2::geom_rug()
.
Note, the plotted objects have probabilities mapped to the alpha
aesthetic by default.
stat_hdr_rug( mapping = NULL, data = NULL, geom = "hdr_rug", position = "identity", ..., method = "kde", method_y = "kde", probs = c(0.99, 0.95, 0.8, 0.5), xlim = NULL, ylim = NULL, n = 512, na.rm = FALSE, show.legend = TRUE, inherit.aes = TRUE ) geom_hdr_rug( mapping = NULL, data = NULL, stat = "hdr_rug", position = "identity", ..., outside = FALSE, sides = "bl", length = unit(0.03, "npc"), na.rm = FALSE, show.legend = TRUE, inherit.aes = TRUE )
stat_hdr_rug( mapping = NULL, data = NULL, geom = "hdr_rug", position = "identity", ..., method = "kde", method_y = "kde", probs = c(0.99, 0.95, 0.8, 0.5), xlim = NULL, ylim = NULL, n = 512, na.rm = FALSE, show.legend = TRUE, inherit.aes = TRUE ) geom_hdr_rug( mapping = NULL, data = NULL, stat = "hdr_rug", position = "identity", ..., outside = FALSE, sides = "bl", length = unit(0.03, "npc"), na.rm = FALSE, show.legend = TRUE, inherit.aes = TRUE )
mapping |
Set of aesthetic mappings created by |
data |
The data to be displayed in this layer. There are three options: If A A |
geom |
The geometric object to use to display the data, either as a
|
position |
Position adjustment, either as a string naming the adjustment
(e.g. |
... |
Other arguments passed on to |
method , method_y
|
Density estimator(s) to use.
By default |
probs |
Probabilities to compute highest density regions for. |
xlim , ylim
|
Range to compute and draw regions. If |
n |
Resolution of grid defined by |
na.rm |
If |
show.legend |
logical. Should this layer be included in the legends?
|
inherit.aes |
If |
stat |
The statistical transformation to use on the data for this
layer, either as a |
outside |
logical that controls whether to move the rug tassels outside of the plot area. Default is off (FALSE). You will also need to use |
sides |
A string that controls which sides of the plot the rugs appear on.
It can be set to a string containing any of |
length |
A |
geom_hdr_rug understands the following aesthetics (required aesthetics are in bold):
x
y
alpha
fill
group
subgroup
The probability of the highest density region, specified
by probs
, corresponding to each point.
set.seed(1) df <- data.frame(x = rnorm(100), y = rnorm(100)) # Plot marginal HDRs for bivariate data ggplot(df, aes(x, y)) + geom_point() + geom_hdr_rug() + coord_fixed() ggplot(df, aes(x, y)) + geom_hdr() + geom_hdr_rug() + coord_fixed() # Plot HDR for univariate data ggplot(df, aes(x)) + geom_density() + geom_hdr_rug() ggplot(df, aes(y = y)) + geom_density() + geom_hdr_rug() # Specify location of marginal HDRs as in ggplot2::geom_rug() ggplot(df, aes(x, y)) + geom_hdr() + geom_hdr_rug(sides = "tr", outside = TRUE) + coord_fixed(clip = "off") # Can use same methods of density estimation as geom_hdr(). # For data with constrained support, we suggest setting method = "histogram": ggplot(df, aes(x^2)) + geom_histogram(bins = 30, boundary = 0) + geom_hdr_rug(method = "histogram") ggplot(df, aes(x^2, y^2)) + geom_hdr(method = "histogram") + geom_hdr_rug(method = "histogram") + coord_fixed()
set.seed(1) df <- data.frame(x = rnorm(100), y = rnorm(100)) # Plot marginal HDRs for bivariate data ggplot(df, aes(x, y)) + geom_point() + geom_hdr_rug() + coord_fixed() ggplot(df, aes(x, y)) + geom_hdr() + geom_hdr_rug() + coord_fixed() # Plot HDR for univariate data ggplot(df, aes(x)) + geom_density() + geom_hdr_rug() ggplot(df, aes(y = y)) + geom_density() + geom_hdr_rug() # Specify location of marginal HDRs as in ggplot2::geom_rug() ggplot(df, aes(x, y)) + geom_hdr() + geom_hdr_rug(sides = "tr", outside = TRUE) + coord_fixed(clip = "off") # Can use same methods of density estimation as geom_hdr(). # For data with constrained support, we suggest setting method = "histogram": ggplot(df, aes(x^2)) + geom_histogram(bins = 30, boundary = 0) + geom_hdr_rug(method = "histogram") ggplot(df, aes(x^2, y^2)) + geom_hdr(method = "histogram") + geom_hdr_rug(method = "histogram") + coord_fixed()
Compute and plot the highest density regions (HDRs) of specified univariate pdf(s).
Note, the plotted objects have probabilities mapped to the alpha
aesthetic by default.
stat_hdr_rug_fun( mapping = NULL, data = NULL, geom = "hdr_rug_fun", position = "identity", ..., fun_x = NULL, fun_y = NULL, args_x = list(), args_y = list(), probs = c(0.99, 0.95, 0.8, 0.5), xlim = NULL, ylim = NULL, n = 512, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr_rug_fun( mapping = NULL, data = NULL, stat = "hdr_rug_fun", position = "identity", ..., outside = FALSE, sides = "bl", length = unit(0.03, "npc"), na.rm = FALSE, show.legend = TRUE, inherit.aes = TRUE )
stat_hdr_rug_fun( mapping = NULL, data = NULL, geom = "hdr_rug_fun", position = "identity", ..., fun_x = NULL, fun_y = NULL, args_x = list(), args_y = list(), probs = c(0.99, 0.95, 0.8, 0.5), xlim = NULL, ylim = NULL, n = 512, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE ) geom_hdr_rug_fun( mapping = NULL, data = NULL, stat = "hdr_rug_fun", position = "identity", ..., outside = FALSE, sides = "bl", length = unit(0.03, "npc"), na.rm = FALSE, show.legend = TRUE, inherit.aes = TRUE )
mapping |
Set of aesthetic mappings created by |
data |
The data to be displayed in this layer. There are three options: If A A |
geom |
The geometric object to use to display the data, either as a
|
position |
Position adjustment, either as a string naming the adjustment
(e.g. |
... |
Other arguments passed on to |
fun_x , fun_y
|
Functions, the univariate probability density function for the x- and/or y-axis. First argument must be vectorized. |
args_x , args_y
|
Named list of additional arguments passed on to |
probs |
Probabilities to compute highest density regions for. |
xlim , ylim
|
Range to compute and draw regions. If |
n |
Resolution of grid defined by |
na.rm |
If |
show.legend |
logical. Should this layer be included in the legends?
|
inherit.aes |
If |
stat |
The statistical transformation to use on the data for this
layer, either as a |
outside |
logical that controls whether to move the rug tassels outside of the plot area. Default is off (FALSE). You will also need to use |
sides |
A string that controls which sides of the plot the rugs appear on.
It can be set to a string containing any of |
length |
A |
geom_hdr_rug_fun()
understands the following aesthetics (required
aesthetics are in bold):
x
y
alpha
fill
group
subgroup
The probability of the highest density region, specified
by probs
, corresponding to each point.
# Plotting data with exponential marginals df <- data.frame(x = rexp(1e3), y = rexp(1e3)) ggplot(df, aes(x, y)) + geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp) + geom_point(size = .5) + coord_fixed() # without data/aesthetic mappings ggplot() + geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp, xlim = c(0, 7), ylim = c(0, 7)) + coord_fixed() # Plotting univariate normal data, estimating mean and sd df <- data.frame(x = rnorm(1e4, mean = 1, sd = 3)) # estimating parameters mu_hat <- mean(df$x) sd_hat <- sd(df$x) ggplot(df, aes(x)) + geom_hdr_rug_fun(fun_x = dnorm, args_x = list(mean = mu_hat, sd = sd_hat)) + geom_density() # Equivalent to `method_norm_1d()` with `geom_hdr_rug()` ggplot(df, aes(x)) + geom_hdr_rug(method = method_norm_1d()) + geom_density()
# Plotting data with exponential marginals df <- data.frame(x = rexp(1e3), y = rexp(1e3)) ggplot(df, aes(x, y)) + geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp) + geom_point(size = .5) + coord_fixed() # without data/aesthetic mappings ggplot() + geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp, xlim = c(0, 7), ylim = c(0, 7)) + coord_fixed() # Plotting univariate normal data, estimating mean and sd df <- data.frame(x = rnorm(1e4, mean = 1, sd = 3)) # estimating parameters mu_hat <- mean(df$x) sd_hat <- sd(df$x) ggplot(df, aes(x)) + geom_hdr_rug_fun(fun_x = dnorm, args_x = list(mean = mu_hat, sd = sd_hat)) + geom_density() # Equivalent to `method_norm_1d()` with `geom_hdr_rug()` ggplot(df, aes(x)) + geom_hdr_rug(method = method_norm_1d()) + geom_density()
get_hdr
is used to estimate a 2-dimensional density and compute
corresponding HDRs. The estimated density and HDRs are represented in a
discrete form as a grid, defined by arguments rangex
, rangey
, and n
.
get_hdr
is used internally by layer functions stat_hdr()
,
stat_hdr_points()
, stat_hdr_fun()
, etc.
get_hdr( data = NULL, method = "kde", probs = c(0.99, 0.95, 0.8, 0.5), n = 100, rangex = NULL, rangey = NULL, hdr_membership = TRUE, fun, args = list() )
get_hdr( data = NULL, method = "kde", probs = c(0.99, 0.95, 0.8, 0.5), n = 100, rangex = NULL, rangey = NULL, hdr_membership = TRUE, fun, args = list() )
data |
A data frame with columns |
method |
Either a character ( |
probs |
Probabilities to compute HDRs for. |
n |
Resolution of grid representing estimated density and HDRs. |
rangex , rangey
|
Range of grid representing estimated density and HDRs, along the x- and y-axes. |
hdr_membership |
Should HDR membership of data points ( |
fun |
Optional, a joint probability density function, must be vectorized
in its first two arguments. See the "The |
args |
Optional, a list of arguments to be provided to |
get_hdr
returns a list with elements df_est
(data.frame
), breaks
(named numeric
), and data
(data.frame
).
df_est
: the estimated HDRs and density evaluated on the grid defined by rangex
, rangey
, and n
.
The column of estimated HDRs (df_est$hdr
) is a numeric vector with values
from probs
. The columns df_est$fhat
and df_est$fhat_discretized
correspond to the estimated density on the original scale and rescaled to sum
to 1, respectively.
breaks
: the heights of the estimated density (df_est$fhat
) corresponding to the HDRs specified by probs
.
Will always have additional element Inf
representing the cutoff for the
100% HDR.
data
: the original data provided in the data
argument.
If hdr_membership
is set to TRUE
, this includes a column
(data$hdr_membership
) with the HDR corresponding to each data point.
method
argumentThe density estimator used to estimate the
HDRs is specified with the method
argument. The simplest way to specify
an estimator is to provide a character value to method
, for example
method = "kde"
specifies a kernel density estimator. However, this
specification is limited to the default behavior of the estimator.
Instead, it is possible to provide a function call, for example: method = method_kde()
. In many cases, these functions accept parameters governing
the density estimation procedure. Here, method_kde()
accepts parameters
h
and adjust
, both related to the kernel's bandwidth. For details, see
?method_kde
. Every method of bivariate density estimation implemented has
such corresponding method_*()
function, each with an associated help
page.
Note: geom_hdr()
and other layer functions also have method
arguments
which behave in the same way. For more details on the use and
implementation of the method_*()
functions, see vignette("method", "ggdensity")
.
fun
argumentIf method
is set to "fun"
, get_hdr()
expects a bivariate probability density function to be specified with the
fun
argument. It is required that fun
be a function of at least two
arguments (x
and y
). Beyond these first two arguments, fun
can have
arbitrarily many arguments; these can be set in get_hdr()
as a named list
via the args
parameter.
Note: get_hdr()
requires that fun
be vectorized in x
and y
. For an
example of an appropriate choice of fun
, see the final example below.
df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) # Two ways to specify `method` get_hdr(df, method = "kde") get_hdr(df, method = method_kde()) ## Not run: # If parenthesis are omitted, `get_hdr()` errors get_hdr(df, method = method_kde) ## End(Not run) # Estimate different HDRs with `probs` get_hdr(df, method = method_kde(), probs = c(.975, .6, .2)) # Adjust estimator parameters with arguments to `method_kde()` get_hdr(df, method = method_kde(h = 1)) # Parametric normal estimator of density get_hdr(df, method = "mvnorm") get_hdr(df, method = method_mvnorm()) # Compute "population" HDRs of specified bivariate pdf with `method = "fun"` f <- function(x, y, sd_x = 1, sd_y = 1) dnorm(x, sd = sd_x) * dnorm(y, sd = sd_y) get_hdr( method = "fun", fun = f, rangex = c(-5, 5), rangey = c(-5, 5) ) get_hdr( method = "fun", fun = f, rangex = c(-5, 5), rangey = c(-5, 5), args = list(sd_x = .5, sd_y = .5) # specify additional arguments w/ `args` )
df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) # Two ways to specify `method` get_hdr(df, method = "kde") get_hdr(df, method = method_kde()) ## Not run: # If parenthesis are omitted, `get_hdr()` errors get_hdr(df, method = method_kde) ## End(Not run) # Estimate different HDRs with `probs` get_hdr(df, method = method_kde(), probs = c(.975, .6, .2)) # Adjust estimator parameters with arguments to `method_kde()` get_hdr(df, method = method_kde(h = 1)) # Parametric normal estimator of density get_hdr(df, method = "mvnorm") get_hdr(df, method = method_mvnorm()) # Compute "population" HDRs of specified bivariate pdf with `method = "fun"` f <- function(x, y, sd_x = 1, sd_y = 1) dnorm(x, sd = sd_x) * dnorm(y, sd = sd_y) get_hdr( method = "fun", fun = f, rangex = c(-5, 5), rangey = c(-5, 5) ) get_hdr( method = "fun", fun = f, rangex = c(-5, 5), rangey = c(-5, 5), args = list(sd_x = .5, sd_y = .5) # specify additional arguments w/ `args` )
get_hdr_1d
is used to estimate a 1-dimensional density and compute corresponding HDRs.
The estimated density and HDRs are represented in a discrete form as a grid, defined by arguments range
and n
.
get_hdr_1d
is used internally by layer functions stat_hdr_rug()
and stat_hdr_rug_fun()
.
get_hdr_1d( x = NULL, method = "kde", probs = c(0.99, 0.95, 0.8, 0.5), n = 512, range = NULL, hdr_membership = TRUE, fun, args = list() )
get_hdr_1d( x = NULL, method = "kde", probs = c(0.99, 0.95, 0.8, 0.5), n = 512, range = NULL, hdr_membership = TRUE, fun, args = list() )
x |
A vector of data |
method |
Either a character ( |
probs |
Probabilities to compute HDRs for. |
n |
Resolution of grid representing estimated density and HDRs. |
range |
Range of grid representing estimated density and HDRs. |
hdr_membership |
Should HDR membership of data points ( |
fun |
Optional, a probability density function, must be vectorized in its first argument.
See the "The |
args |
Optional, a list of arguments to be provided to |
get_hdr_1d
returns a list with elements df_est
(data.frame
), breaks
(named numeric
), and data
(data.frame
).
df_est
: the estimated HDRs and density evaluated on the grid defined by range
and n
.
The column of estimated HDRs (df_est$hdr
) is a numeric vector with values from probs
.
The columns df_est$fhat
and df_est$fhat_discretized
correspond to the estimated density
on the original scale and rescaled to sum to 1, respectively.
breaks
: the heights of the estimated density (df_est$fhat
) corresponding to the HDRs specified by probs
.
Will always have additional element Inf
representing the cutoff for the 100% HDR.
data
: the original data provided in the data
argument.
If hdr_membership
is set to TRUE
, this includes a column (data$hdr_membership
)
with the HDR corresponding to each data point.
method
argumentThe density estimator used to estimate the HDRs is specified with the method
argument.
The simplest way to specify an estimator is to provide a character value to method
,
for example method = "kde"
specifies a kernel density estimator.
However, this specification is limited to the default behavior of the estimator.
Instead, it is possible to provide a function call, for example: method = method_kde_1d()
.
This is slightly different from the function calls provided in get_hdr()
, note the _1d
suffix.
In many cases, these functions accept parameters governing the density estimation procedure.
Here, method_kde_1d()
accepts several parameters related to the choice of kernel.
For details, see ?method_kde_1d
.
Every method of univariate density estimation implemented has such corresponding method_*_1d()
function,
each with an associated help page.
Note: geom_hdr_rug()
and other layer functions also have method
arguments which behave in the same way.
For more details on the use and implementation of the method_*_1d()
functions,
see vignette("method", "ggdensity")
.
fun
argumentIf method
is set to "fun"
, get_hdr_1d()
expects a univariate probability
density function to be specified with the fun
argument.
It is required that fun
be a function of at least one argument (x
).
Beyond this first argument, fun
can have arbitrarily many arguments;
these can be set in get_hdr_1d()
as a named list via the args
parameter.
Note: get_hdr_1d()
requires that fun
be vectorized in x
.
For an example of an appropriate choice of fun
, see the final example below.
x <- rnorm(1e3) # Two ways to specify `method` get_hdr_1d(x, method = "kde") get_hdr_1d(x, method = method_kde_1d()) ## Not run: # If parenthesis are omitted, `get_hdr_1d()` errors get_hdr_1d(df, method = method_kde_1d) # If the `_1d` suffix is omitted, `get_hdr_1d()` errors get_hdr_1d(x, method = method_kde()) ## End(Not run) # Adjust estimator parameters with arguments to `method_kde_1d()` get_hdr_1d(x, method = method_kde_1d(kernel = "triangular")) # Estimate different HDRs with `probs` get_hdr_1d(x, method = method_kde_1d(), probs = c(.975, .6, .2)) # Compute "population" HDRs of specified univariate pdf with `method = "fun"` f <- function(x, sd = 1) dnorm(x, sd = sd) get_hdr_1d(method = "fun", fun = f, range = c(-5, 5)) get_hdr_1d(method = "fun", fun = f, range = c(-5, 5), args = list(sd = .5))
x <- rnorm(1e3) # Two ways to specify `method` get_hdr_1d(x, method = "kde") get_hdr_1d(x, method = method_kde_1d()) ## Not run: # If parenthesis are omitted, `get_hdr_1d()` errors get_hdr_1d(df, method = method_kde_1d) # If the `_1d` suffix is omitted, `get_hdr_1d()` errors get_hdr_1d(x, method = method_kde()) ## End(Not run) # Adjust estimator parameters with arguments to `method_kde_1d()` get_hdr_1d(x, method = method_kde_1d(kernel = "triangular")) # Estimate different HDRs with `probs` get_hdr_1d(x, method = method_kde_1d(), probs = c(.975, .6, .2)) # Compute "population" HDRs of specified univariate pdf with `method = "fun"` f <- function(x, sd = 1) dnorm(x, sd = sd) get_hdr_1d(method = "fun", fun = f, range = c(-5, 5)) get_hdr_1d(method = "fun", fun = f, range = c(-5, 5), args = list(sd = .5))
A package that allows more flexible computations for visualization of density estimates with ggplot2.
Useful links:
Function used to specify bivariate frequency polygon density estimator
for get_hdr()
and layer functions (e.g. geom_hdr()
).
method_freqpoly(bins = NULL)
method_freqpoly(bins = NULL)
bins |
Number of bins along each axis. Either a vector of length 2 or a scalar value which is recycled for both dimensions. Defaults to normal reference rule (Scott, pg 87). |
For more details on the use and implementation of the method_*()
functions,
see vignette("method", "ggdensity")
.
Scott, David W. Multivariate Density Estimation (2e), Wiley.
set.seed(1) df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) ggplot(df, aes(x, y)) + geom_hdr(method = method_freqpoly()) + geom_point(size = 1) # The resolution of the frequency polygon estimator can be set via `bins` ggplot(df, aes(x, y)) + geom_hdr(method = method_freqpoly(bins = c(8, 25))) + geom_point(size = 1) # Can also be used with `get_hdr()` for numerical summary of HDRs res <- get_hdr(df, method = method_freqpoly()) str(res)
set.seed(1) df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) ggplot(df, aes(x, y)) + geom_hdr(method = method_freqpoly()) + geom_point(size = 1) # The resolution of the frequency polygon estimator can be set via `bins` ggplot(df, aes(x, y)) + geom_hdr(method = method_freqpoly(bins = c(8, 25))) + geom_point(size = 1) # Can also be used with `get_hdr()` for numerical summary of HDRs res <- get_hdr(df, method = method_freqpoly()) str(res)
Function used to specify univariate frequency polygon density estimator
for get_hdr_1d()
and layer functions (e.g. geom_hdr_rug()
).
method_freqpoly_1d(bins = NULL)
method_freqpoly_1d(bins = NULL)
bins |
Number of bins. Defaults to normal reference rule (Scott, pg 59). |
For more details on the use and implementation of the method_*_1d()
functions,
see vignette("method", "ggdensity")
.
Scott, David W. Multivariate Density Estimation (2e), Wiley.
df <- data.frame(x = rnorm(1e3)) # Strip chart to visualize 1-d data p <- ggplot(df, aes(x)) + geom_jitter(aes(y = 0), width = 0, height = 2) + scale_y_continuous(name = NULL, breaks = NULL) + coord_cartesian(ylim = c(-3, 3)) p p + geom_hdr_rug(method = method_freqpoly_1d()) # The resolution of the frequency polygon estimator can be set via `bins` p + geom_hdr_rug(method = method_freqpoly_1d(bins = 100)) # Can also be used with `get_hdr_1d()` for numerical summary of HDRs res <- get_hdr_1d(df$x, method = method_freqpoly_1d()) str(res)
df <- data.frame(x = rnorm(1e3)) # Strip chart to visualize 1-d data p <- ggplot(df, aes(x)) + geom_jitter(aes(y = 0), width = 0, height = 2) + scale_y_continuous(name = NULL, breaks = NULL) + coord_cartesian(ylim = c(-3, 3)) p p + geom_hdr_rug(method = method_freqpoly_1d()) # The resolution of the frequency polygon estimator can be set via `bins` p + geom_hdr_rug(method = method_freqpoly_1d(bins = 100)) # Can also be used with `get_hdr_1d()` for numerical summary of HDRs res <- get_hdr_1d(df$x, method = method_freqpoly_1d()) str(res)
Function used to specify bivariate histogram density estimator
for get_hdr()
and layer functions (e.g. geom_hdr()
).
method_histogram(bins = NULL, smooth = FALSE, nudgex = "none", nudgey = "none")
method_histogram(bins = NULL, smooth = FALSE, nudgex = "none", nudgey = "none")
bins |
Number of bins along each axis. Either a vector of length 2 or a scalar value which is recycled for both dimensions. Defaults to normal reference rule (Scott, pg 87). |
smooth |
If |
nudgex , nudgey
|
Horizontal and vertical rules for choosing witness points when |
For more details on the use and implementation of the method_*()
functions,
see vignette("method", "ggdensity")
.
Scott, David W. Multivariate Density Estimation (2e), Wiley.
## Not run: # Histogram estimators can be useful when data has boundary constraints set.seed(1) df <- data.frame(x = rexp(1e3), y = rexp(1e3)) ggplot(df, aes(x, y)) + geom_hdr(method = method_histogram()) + geom_point(size = 1) # The resolution of the histogram estimator can be set via `bins` ggplot(df, aes(x, y)) + geom_hdr(method = method_histogram(bins = c(8, 25))) + geom_point(size = 1) # By setting `smooth = TRUE`, we can graphically smooth the "blocky" HDRs ggplot(df, aes(x, y)) + geom_hdr(method = method_histogram(smooth = TRUE)) + geom_point(size = 1) # However, we need to set `nudgex` and `nudgey` to align the HDRs correctly ggplot(df, aes(x, y)) + geom_hdr(method = method_histogram(smooth = TRUE, nudgex = "left", nudgey = "down")) + geom_point(size = 1) # Can also be used with `get_hdr()` for numerical summary of HDRs res <- get_hdr(df, method = method_histogram()) str(res) ## End(Not run)
## Not run: # Histogram estimators can be useful when data has boundary constraints set.seed(1) df <- data.frame(x = rexp(1e3), y = rexp(1e3)) ggplot(df, aes(x, y)) + geom_hdr(method = method_histogram()) + geom_point(size = 1) # The resolution of the histogram estimator can be set via `bins` ggplot(df, aes(x, y)) + geom_hdr(method = method_histogram(bins = c(8, 25))) + geom_point(size = 1) # By setting `smooth = TRUE`, we can graphically smooth the "blocky" HDRs ggplot(df, aes(x, y)) + geom_hdr(method = method_histogram(smooth = TRUE)) + geom_point(size = 1) # However, we need to set `nudgex` and `nudgey` to align the HDRs correctly ggplot(df, aes(x, y)) + geom_hdr(method = method_histogram(smooth = TRUE, nudgex = "left", nudgey = "down")) + geom_point(size = 1) # Can also be used with `get_hdr()` for numerical summary of HDRs res <- get_hdr(df, method = method_histogram()) str(res) ## End(Not run)
Function used to specify univariate histogram density estimator
for get_hdr_1d()
and layer functions (e.g. geom_hdr_rug()
).
method_histogram_1d(bins = NULL)
method_histogram_1d(bins = NULL)
bins |
Number of bins. Defaults to normal reference rule (Scott, pg 59). |
For more details on the use and implementation of the method_*_1d()
functions,
see vignette("method", "ggdensity")
.
Scott, David W. Multivariate Density Estimation (2e), Wiley.
# Histogram estimators can be useful when data has boundary constraints df <- data.frame(x = rexp(1e3)) # Strip chart to visualize 1-d data p <- ggplot(df, aes(x)) + geom_jitter(aes(y = 0), width = 0, height = 2) + scale_y_continuous(name = NULL, breaks = NULL) + coord_cartesian(ylim = c(-3, 3)) p p + geom_hdr_rug(method = method_histogram_1d()) # The resolution of the histogram estimator can be set via `bins` p + geom_hdr_rug(method = method_histogram_1d(bins = 5)) # Can also be used with `get_hdr_1d()` for numerical summary of HDRs res <- get_hdr_1d(df$x, method = method_histogram_1d()) str(res)
# Histogram estimators can be useful when data has boundary constraints df <- data.frame(x = rexp(1e3)) # Strip chart to visualize 1-d data p <- ggplot(df, aes(x)) + geom_jitter(aes(y = 0), width = 0, height = 2) + scale_y_continuous(name = NULL, breaks = NULL) + coord_cartesian(ylim = c(-3, 3)) p p + geom_hdr_rug(method = method_histogram_1d()) # The resolution of the histogram estimator can be set via `bins` p + geom_hdr_rug(method = method_histogram_1d(bins = 5)) # Can also be used with `get_hdr_1d()` for numerical summary of HDRs res <- get_hdr_1d(df$x, method = method_histogram_1d()) str(res)
Function used to specify bivariate kernel density estimator
for get_hdr()
and layer functions (e.g. geom_hdr()
).
method_kde(h = NULL, adjust = c(1, 1))
method_kde(h = NULL, adjust = c(1, 1))
h |
Bandwidth (vector of length two). If |
adjust |
A multiplicative bandwidth adjustment to be used if 'h' is
'NULL'. This makes it possible to adjust the bandwidth while still
using the a bandwidth estimator. For example, |
For more details on the use and implementation of the method_*()
functions,
see vignette("method", "ggdensity")
.
set.seed(1) df <- data.frame(x = rnorm(1e3, sd = 3), y = rnorm(1e3, sd = 3)) ggplot(df, aes(x, y)) + geom_hdr(method = method_kde()) + geom_point(size = 1) # The defaults of `method_kde()` are the same as the estimator for `ggplot2::geom_density_2d()` ggplot(df, aes(x, y)) + geom_density_2d_filled() + geom_hdr_lines(method = method_kde(), probs = seq(.1, .9, by = .1)) + theme(legend.position = "none") # The bandwidth of the estimator can be set directly with `h` or scaled with `adjust` ggplot(df, aes(x, y)) + geom_hdr(method = method_kde(h = 1)) + geom_point(size = 1) ggplot(df, aes(x, y)) + geom_hdr(method = method_kde(adjust = 1/2)) + geom_point(size = 1) # Can also be used with `get_hdr()` for numerical summary of HDRs res <- get_hdr(df, method = method_kde()) str(res)
set.seed(1) df <- data.frame(x = rnorm(1e3, sd = 3), y = rnorm(1e3, sd = 3)) ggplot(df, aes(x, y)) + geom_hdr(method = method_kde()) + geom_point(size = 1) # The defaults of `method_kde()` are the same as the estimator for `ggplot2::geom_density_2d()` ggplot(df, aes(x, y)) + geom_density_2d_filled() + geom_hdr_lines(method = method_kde(), probs = seq(.1, .9, by = .1)) + theme(legend.position = "none") # The bandwidth of the estimator can be set directly with `h` or scaled with `adjust` ggplot(df, aes(x, y)) + geom_hdr(method = method_kde(h = 1)) + geom_point(size = 1) ggplot(df, aes(x, y)) + geom_hdr(method = method_kde(adjust = 1/2)) + geom_point(size = 1) # Can also be used with `get_hdr()` for numerical summary of HDRs res <- get_hdr(df, method = method_kde()) str(res)
Function used to specify univariate kernel density estimator
for get_hdr_1d()
and layer functions (e.g. geom_hdr_rug()
).
method_kde_1d( bw = "nrd0", adjust = 1, kernel = "gaussian", weights = NULL, window = kernel )
method_kde_1d( bw = "nrd0", adjust = 1, kernel = "gaussian", weights = NULL, window = kernel )
bw |
the smoothing bandwidth to be used. The kernels are scaled such that this is the standard deviation of the smoothing kernel. (Note this differs from the reference books cited below, and from S-PLUS.)
The specified (or computed) value of |
adjust |
the bandwidth used is actually |
kernel , window
|
a character string giving the smoothing kernel
to be used. This must partially match one of
|
weights |
numeric vector of non-negative observation weights,
hence of same length as |
For more details on the use and implementation of the method_*_1d()
functions,
see vignette("method", "ggdensity")
.
df <- data.frame(x = rnorm(1e3, sd = 3)) ggplot(df, aes(x)) + geom_hdr_rug(method = method_kde_1d()) + geom_density() # Details of the KDE can be adjusted with arguments to `method_kde_1d()` ggplot(df, aes(x)) + geom_hdr_rug(method = method_kde_1d(adjust = 1/5)) + geom_density(adjust = 1/5) ggplot(df, aes(x)) + geom_hdr_rug(method = method_kde_1d(kernel = "triangular")) + geom_density(kernel = "triangular") # Can also be used with `get_hdr_1d()` for numerical summary of HDRs res <- get_hdr_1d(df$x, method = method_kde_1d()) str(res)
df <- data.frame(x = rnorm(1e3, sd = 3)) ggplot(df, aes(x)) + geom_hdr_rug(method = method_kde_1d()) + geom_density() # Details of the KDE can be adjusted with arguments to `method_kde_1d()` ggplot(df, aes(x)) + geom_hdr_rug(method = method_kde_1d(adjust = 1/5)) + geom_density(adjust = 1/5) ggplot(df, aes(x)) + geom_hdr_rug(method = method_kde_1d(kernel = "triangular")) + geom_density(kernel = "triangular") # Can also be used with `get_hdr_1d()` for numerical summary of HDRs res <- get_hdr_1d(df$x, method = method_kde_1d()) str(res)
Function used to specify bivariate normal density estimator
for get_hdr()
and layer functions (e.g. geom_hdr()
).
method_mvnorm()
method_mvnorm()
For more details on the use and implementation of the method_*()
functions,
see vignette("method", "ggdensity")
.
# Normal estimator is useful when an assumption of normality is appropriate set.seed(1) df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) ggplot(df, aes(x, y)) + geom_hdr(method = method_mvnorm(), xlim = c(-4, 4), ylim = c(-4, 4)) + geom_point(size = 1) # Can also be used with `get_hdr()` for numerical summary of HDRs res <- get_hdr(df, method = method_mvnorm()) str(res)
# Normal estimator is useful when an assumption of normality is appropriate set.seed(1) df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) ggplot(df, aes(x, y)) + geom_hdr(method = method_mvnorm(), xlim = c(-4, 4), ylim = c(-4, 4)) + geom_point(size = 1) # Can also be used with `get_hdr()` for numerical summary of HDRs res <- get_hdr(df, method = method_mvnorm()) str(res)
Function used to specify univariate normal density estimator
for get_hdr_1d()
and layer functions (e.g. geom_hdr_rug()
).
method_norm_1d()
method_norm_1d()
For more details on the use and implementation of the method_*_1d()
functions,
see vignette("method", "ggdensity")
.
# Normal estimators are useful when an assumption of normality is appropriate df <- data.frame(x = rnorm(1e3)) ggplot(df, aes(x)) + geom_hdr_rug(method = method_norm_1d()) + geom_density() # Can also be used with `get_hdr_1d()` for numerical summary of HDRs res <- get_hdr_1d(df$x, method = method_norm_1d()) str(res)
# Normal estimators are useful when an assumption of normality is appropriate df <- data.frame(x = rnorm(1e3)) ggplot(df, aes(x)) + geom_hdr_rug(method = method_norm_1d()) + geom_density() # Can also be used with `get_hdr_1d()` for numerical summary of HDRs res <- get_hdr_1d(df$x, method = method_norm_1d()) str(res)