admin管理员组

文章数量:1432602

I'm working on some plots using ggplot2 to represent likert scale data, and have a need to repel some labels, but not others. Following plenty of answers found here on StackOverflow, I came up with the following code, but the repelled labels are in the wrong location on the plot.

#### Minimum Working Dataset
### Data Creation
## Initial Tibble
survey <- tibble(
  question_n = 1,
  answer = c("Somewhat Agree", "Somewhat Disagree", "Strongly Agree", "Strongly Disagree"),
  n = c(90, 12, 199, 4),
  respondents = 305,
  pct = n / respondents
)

## Factor levels for answers
survey$answer <- factor(survey$answer,
                        levels = c("Strongly Agree", "Somewhat Agree",
                                   "Somewhat Disagree", "Strongly Disagree"))

From here, it's a simple enough ggplot to create:

### Plot
survey %>% 
  ggplot(aes(x = pct, y = 1, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label(aes(label = percent_format(accuracy = 1)(pct),
                     color = fct_rev(answer)),
             fill = "white",
             size = 3.25,
             fontface = "bold",
             label.size = 1,
             label.r = unit(2.5, "pt"),
             show.legend = FALSE,
             position = position_stack(vjust = 0.5, reverse = FALSE),) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey[1,]$respondents),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

Obviously, this needs a geom_label_repel()! The "Disagree" response labels overlap with one another. So, I changed geom_label() to geom_label_repel():

### Plot
survey %>% 
  ggplot(aes(x = pct, y = 1, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label_repel(aes(label = percent_format(accuracy = 1)(pct),
                       color = fct_rev(answer)),
                   # Filter data to only less than 5.5% for repel; labels fit otherwise
                   # data = . %>% filter(pct < 0.055),
                   fill = "white",
                   size = 3.25,
                   fontface = "bold",
                   label.size = 1,
                   label.r = unit(2.5, "pt"),
                   show.legend = FALSE,
                   position = position_stack(vjust = 0.5, reverse = FALSE),
                   # Set direction so that repel is only "up" or "down" on plot
                   direction = "y",
                   # Set ylim to prevent labels going off the bar
                   ylim = c(.6, 1.3),
                   # Set seed so they always place in same position
                   seed = 12345
  ) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey[1,]$respondents),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

This technically works, but it looks really messy. The 65% and 30% results have been repelled even though there was no need to do so. So, finally, I tried to include both geom_label() and geom_label_repel():

### Plot
survey %>% 
  ggplot(aes(x = pct, y = 1, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label_repel(aes(label = percent_format(accuracy = 1)(pct),
                       color = fct_rev(answer)),
                   # Filter data to only less than 5.5% for repel; labels fit otherwise
                   data = . %>% filter(pct < 0.055),
                   fill = "white",
                   size = 3.25,
                   fontface = "bold",
                   label.size = 1,
                   label.r = unit(2.5, "pt"),
                   show.legend = FALSE,
                   position = position_stack(vjust = 0.5, reverse = FALSE),
                   # Set direction so that repel is only "up" or "down" on plot
                   direction = "y",
                   # Set ylim to prevent labels going off the bar
                   ylim = c(.6, 1.3),
                   # Set seed so they always place in same position
                   seed = 12345
  ) +
  geom_label(aes(label = percent_format(accuracy = 1)(pct),
                     color = fct_rev(answer)),
             # Filter data to everything greater than 5.5%; no need to repel these items
             data = . %>% filter(pct >= 0.055),
             fill = "white",
             size = 3.25,
             fontface = "bold",
             label.size = 1,
             label.r = unit(2.5, "pt"),
             show.legend = FALSE,
             position = position_stack(vjust = 0.5, reverse = FALSE),) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey[1,]$respondents),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

So, the 65% and 30% are in the right position, but the 4% and 1% are now in the incorrect x position. I've tried a few things to adjust this, like adding an x = value to the aes(), specifying a nudge_x = position instead of position_stack(), and several others I can't actually recall right now. I've been tearing my hair out the last few hours trying to solve this.

I need the 65% and 30% where they are, and the other two values where they're supposed to be on the one axis, and nudged like they are on the other axis. Any suggestions?

I'm working on some plots using ggplot2 to represent likert scale data, and have a need to repel some labels, but not others. Following plenty of answers found here on StackOverflow, I came up with the following code, but the repelled labels are in the wrong location on the plot.

#### Minimum Working Dataset
### Data Creation
## Initial Tibble
survey <- tibble(
  question_n = 1,
  answer = c("Somewhat Agree", "Somewhat Disagree", "Strongly Agree", "Strongly Disagree"),
  n = c(90, 12, 199, 4),
  respondents = 305,
  pct = n / respondents
)

## Factor levels for answers
survey$answer <- factor(survey$answer,
                        levels = c("Strongly Agree", "Somewhat Agree",
                                   "Somewhat Disagree", "Strongly Disagree"))

From here, it's a simple enough ggplot to create:

### Plot
survey %>% 
  ggplot(aes(x = pct, y = 1, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label(aes(label = percent_format(accuracy = 1)(pct),
                     color = fct_rev(answer)),
             fill = "white",
             size = 3.25,
             fontface = "bold",
             label.size = 1,
             label.r = unit(2.5, "pt"),
             show.legend = FALSE,
             position = position_stack(vjust = 0.5, reverse = FALSE),) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey[1,]$respondents),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

Obviously, this needs a geom_label_repel()! The "Disagree" response labels overlap with one another. So, I changed geom_label() to geom_label_repel():

### Plot
survey %>% 
  ggplot(aes(x = pct, y = 1, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label_repel(aes(label = percent_format(accuracy = 1)(pct),
                       color = fct_rev(answer)),
                   # Filter data to only less than 5.5% for repel; labels fit otherwise
                   # data = . %>% filter(pct < 0.055),
                   fill = "white",
                   size = 3.25,
                   fontface = "bold",
                   label.size = 1,
                   label.r = unit(2.5, "pt"),
                   show.legend = FALSE,
                   position = position_stack(vjust = 0.5, reverse = FALSE),
                   # Set direction so that repel is only "up" or "down" on plot
                   direction = "y",
                   # Set ylim to prevent labels going off the bar
                   ylim = c(.6, 1.3),
                   # Set seed so they always place in same position
                   seed = 12345
  ) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey[1,]$respondents),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

This technically works, but it looks really messy. The 65% and 30% results have been repelled even though there was no need to do so. So, finally, I tried to include both geom_label() and geom_label_repel():

### Plot
survey %>% 
  ggplot(aes(x = pct, y = 1, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label_repel(aes(label = percent_format(accuracy = 1)(pct),
                       color = fct_rev(answer)),
                   # Filter data to only less than 5.5% for repel; labels fit otherwise
                   data = . %>% filter(pct < 0.055),
                   fill = "white",
                   size = 3.25,
                   fontface = "bold",
                   label.size = 1,
                   label.r = unit(2.5, "pt"),
                   show.legend = FALSE,
                   position = position_stack(vjust = 0.5, reverse = FALSE),
                   # Set direction so that repel is only "up" or "down" on plot
                   direction = "y",
                   # Set ylim to prevent labels going off the bar
                   ylim = c(.6, 1.3),
                   # Set seed so they always place in same position
                   seed = 12345
  ) +
  geom_label(aes(label = percent_format(accuracy = 1)(pct),
                     color = fct_rev(answer)),
             # Filter data to everything greater than 5.5%; no need to repel these items
             data = . %>% filter(pct >= 0.055),
             fill = "white",
             size = 3.25,
             fontface = "bold",
             label.size = 1,
             label.r = unit(2.5, "pt"),
             show.legend = FALSE,
             position = position_stack(vjust = 0.5, reverse = FALSE),) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey[1,]$respondents),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

So, the 65% and 30% are in the right position, but the 4% and 1% are now in the incorrect x position. I've tried a few things to adjust this, like adding an x = value to the aes(), specifying a nudge_x = position instead of position_stack(), and several others I can't actually recall right now. I've been tearing my hair out the last few hours trying to solve this.

I need the 65% and 30% where they are, and the other two values where they're supposed to be on the one axis, and nudged like they are on the other axis. Any suggestions?

Share Improve this question edited Nov 18, 2024 at 23:38 Axeman 35.5k8 gold badges86 silver badges100 bronze badges asked Nov 18, 2024 at 22:19 MorriganMorrigan 556 bronze badges 3
  • You might consider the approach here, where the large values use geom_label() and the small ones use geom_label_repel(): stackoverflow/questions/65782787/… – Jon Spring Commented Nov 19, 2024 at 0:46
  • Both geoms should "see" all the data (otherwise they won't stack correctly). You should control whether they display or not depending on the threshold. – Jon Spring Commented Nov 19, 2024 at 0:59
  • @JonSpring Jon, thank you for the link to that other question! I didn't see that when looking up this particular problem and, after some re-jiggering, it solved my issue. Thank you! I'll post a solution to the question shortly. – Morrigan Commented Nov 19, 2024 at 13:13
Add a comment  | 

2 Answers 2

Reset to default 2

You could use the ggpp package which has some useful position functions, in this case position_stacknudge(), which as you would guess allows the data to be stacked and nudged.

But first you need to arrange your data by question and answer as to control the nudge distance requires "a numeric vector of length 1, or of the same length as rows there are in data, with nudge values in data rows order."

Next, identify instances where there are consecutive values below a certain threshold and assign nudge offset values with alternating signs.

You will have to tinker with the threshold and offset values depending on the width and height of the plot.

Using your example data:

library(ggplot2)
library(ggpp)
library(scales)
library(dplyr)

survey <- tibble(
  question_n = 1,
  answer = c("Somewhat Agree", "Somewhat Disagree", "Strongly Agree", "Strongly Disagree"),
  n = c(90, 12, 199, 4),
  respondents = 305,
  pct = n / respondents
)

survey$answer <- factor(survey$answer,
                        levels = c("Strongly Agree", "Somewhat Agree",
                                   "Somewhat Disagree", "Strongly Disagree"))

threshold <- .06
off_y <- .1

survey <- survey %>% 
  arrange(question_n, answer) %>%
  mutate(flag = pct <= threshold,
         cid = consecutive_id(flag), .by = question_n) %>%
  mutate(offset = if_else(flag & n() > 1, off_y * -cospi(row_number()), 0), 
         .by = c(question_n, cid))

survey %>%
  ggplot(aes(x = pct, y = question_n, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label(aes(label = percent_format(accuracy = 1)(pct),
                 color = fct_rev(answer)),
             fill = "white",
             size = 3.25,
             fontface = "bold",
             label.size = 1,
             label.r = unit(2.5, "pt"),
             show.legend = FALSE,
             position = position_stacknudge(vjust = 0.5, y = survey$offset, reverse = FALSE),) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey$respondents[1]),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

Expanding to plot multiple questions requires adjusting the offset value:

set.seed(888)
survey <- tibble(
  question_n = rep(1:3, each = 4),
  answer = rep(c("Somewhat Agree", "Somewhat Disagree", "Strongly Agree", "Strongly Disagree"), 3),
  n = sample(c(rep(1:3, 2), 30:33), 12, replace = TRUE)) |> 
  mutate(respondents = sum(n),
         pct = n / respondents,
         .by = question_n)

## Factor levels for answers
survey$answer <- factor(survey$answer,
                        levels = c("Strongly Agree", "Somewhat Agree",
                                   "Somewhat Disagree", "Strongly Disagree"))

threshold <- .06
off_y <- .2

survey <- survey %>% 
  arrange(question_n, answer) %>%
  mutate(flag = pct <= threshold,
         cid = consecutive_id(flag), .by = question_n) %>%
  mutate(offset = if_else(flag & n() > 1, off_y * -cospi(row_number()), 0), 
         .by = c(question_n, cid))

survey %>%
  ggplot(aes(x = pct, y = question_n, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label(aes(label = percent_format(accuracy = 1)(pct),
                 color = fct_rev(answer)),
             fill = "white",
             size = 3.25,
             fontface = "bold",
             label.size = 1,
             label.r = unit(2.5, "pt"),
             show.legend = FALSE,
             position = position_stacknudge(vjust = 0.5, y = survey$offset, reverse = FALSE),) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey$respondents[1]),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

Thanks for the comments on the post, and the answer provided above. I used a combination of the two to come to a solution which works great. For geom_label_repel() and geom_label(), I used a label = ifelse() statement rather than filtering the data itself. Thanks again to the commenter and the proposed answer for helping me solidify this result.

# Set repel threshold
threshold <- 0.055

# Plot
survey %>% 
  ggplot(aes(x = pct, y = 1, fill = fct_rev(answer))) +
  geom_col(color = "black") +
  theme_minimal() +
  scale_x_continuous(labels = label_percent(),
                     # Expand so the labels aren't off-plot
                     expand = expansion(mult = c(0.025, 0.025))) +
  scale_y_discrete(labels = NULL) +
  geom_label_repel(aes(label = ifelse(pct < threshold, percent_format(accuracy = 1)(pct), NA),
                       color = fct_rev(answer)),
                   fill = "white",
                   size = 3.25,
                   fontface = "bold",
                   label.size = 1,
                   label.r = unit(2.5, "pt"),
                   show.legend = FALSE,
                   na.rm = TRUE,
                   position = position_stack(vjust = 0.5, reverse = FALSE),
                   # Set direction so that repel is only "up" or "down" on plot
                   direction = "y",
                   # Set ylim to prevent labels going off the bar
                   ylim = c(.6, 1.3),
                   # Set seed so they always place in same position
                   seed = 12345
  ) +
  geom_label(aes(label = ifelse(pct >= threshold, percent_format(accuracy = 1)(pct), NA),
                 color = fct_rev(answer)),
             fill = "white",
             size = 3.25,
             fontface = "bold",
             label.size = 1,
             label.r = unit(2.5, "pt"),
             show.legend = FALSE,
             na.rm = TRUE,
             position = position_stack(vjust = 0.5, reverse = FALSE),) +
  scale_fill_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4")) +
  scale_color_manual(values = c("tomato4", "tomato", "royalblue", "royalblue4"), guide = "none") +
  guides(fill = guide_legend(position = "bottom", nrow = 2, reverse = TRUE)) +
  labs(
    title = NULL,
    subtitle = NULL,
    caption = paste("Respondents N =", survey[1,]$respondents),
    fill = NULL,
    color = NULL,
    x = NULL,
    y = NULL
  )

It also ended up working for all 20-ish plots in my .Rmd file, with some modifications here and there to the seed = argument to get them to place where I wanted.

本文标签: rgeomlabelrepel() labels not on the correct pointsStack Overflow