library(tidyverse, quietly = TRUE)
library(janitor, quietly = TRUE)
library(sf)
library(sp)
# Get county and state fips, state name, county name
county <- tidycensus::fips_codes %>%
unite("fips",
c(state_code, county_code),
sep = "", remove = FALSE) %>%
rename(county_name = county) %>%
select(fips, county_name, state_name)
# Add 02010 Aleutian Islands Census Area, Alaska
county <- county %>%
add_row(fips = "02010",
county_name = "Aleutian Islands Census Area",
state_name = "Alaska",
.after = 67)
state <- tidycensus::fips_codes %>%
select(state_code, state_name) %>%
rename(fips = state_code) %>% distinct()
# Merge so we have county and state data in one data frame
county_state <- bind_rows(county, state)
# Manually add US as fips "00"
county_state <- county_state %>%
add_row(fips = "00",
county_name = NA,
state_name = "US")
# Import county spatial data frame
county_sf <- tigris::counties(progress_bar = FALSE) %>%
clean_names()2 Local Food Sales
The Local and Regional Food Systems Data Warehouse contains data indicators related to local food sales. The Data Warehouse contains data related to local food sales that can help researchers, practitioners, and policymakers working to support farm and local food system development. The data can be used to inform proposals for funding,communicate the importance and broader context of their work to shareholders and policy makers, and to evaluate programs.
The set of metrics is about recognizing and specifying the effects that local farms and local markets have in their communities, states, and regions. Some of these are basic headcounts of how many of a given type of market or farm exist. Others seek to count and describe the customers that participate in these markets.
Data include:
- Producer data includes information about the number and demographics of producers selling into local markets, both direct and intermediated.
- Farmers market sector data includes the number of farmers markets. The USDA maintains the voluntary Local Food Directories which includes farmers markets data with information about location and SNAP acceptance. These data are incomplete (based on the need for farmers markets to self-list on the directory), but it is one of the best centralized sources of data.
- Community Supported Agriculture (CSA) data includes the number of operations with CSAs. The USDA maintains the voluntary Local Food Directory which includes data about CSAs including location, operator demographics, number of shares, and share mix data. This data is incomplete (based on the need for CSAs to self-list on the directory), but it is one of the best centralized sources of data.
- Other sector-level data include data on the number of aquaculture businesses and agritourism.
The heterogeneity of types of farms, businesses and organizations involved in local food systems means that specific count and other relevant data for each sector or farm type is a challenge to compile.
Local food sales occur primarily in a grassroots sector that has largely not participated in formal data collection efforts and management. For example, data from the farmers market sector is collected at many markets nationwide but often it is focused on measuring the impacts of the organization. The Farmers Market Coalition (FMC) has surveyed market operators on specific topics (such as COVID-19 impacts). However, only a small percentage of markets, and their farmers, are collecting and using data. The data presented are not complete datasets and should not be used to represent the performance of any one sector on a scale larger than what is represented through this data. As the local food sales environment becomes more friendly to collecting and reporting data, we expect the scope and accuracy of the data to improve.
Site users are encouraged to acknowledge the systemic factors that influence community characteristics. When presenting data, we encourage disaggregation by individual race, ethnicity, and cultural group wherever possible. Aggregation of data can mask important differences that might be relevant for understanding needs and crafting adequate program and policy solutions. We also encourage the use of practices that invite community members to help contextualize data, share stories, and amplify community solutions.
2.1 State and county data
We use tidycensus to get state and county names by FIPS so they are uniform across all data sets. And tigris to get spatial county-level data frames.
In the tidycensus data, there is no data for FIPS 02010 Aleutian Islands Census Area, Alaska. This FIPS is found in the Census of Agriculture. We add this fips to our county data based on the Geographic Area Codes and Titles from the U.S. Bureau of Labor Statistics.
Alaska must be mapped differently than by counties. For Alaska, the “proper” way to go is to join this crosswalk with Alaska counties. Form a union of the counties to get a shapefile with the NASS regions. Then merge the ag census data with that. You will report the total for the NASS designated region instead of the county. To build the map, you’d want to remove Alaska from a county shape file, and splice in Alaska with NASS region.
2.2 Agritourism, community supported agriculture (CSA), farmers markets, and on-farm stores
We gather point level data from the USDA Local Food Directories on Agritourism, CSA and Farmers Markets, and On-Farm Markets. The main challenge with this database is it is user reported and is not a comprehensive list of all organizations.
We match the lat/long coordinates to county level data to provide counts by county, state, and US. We only keep data that was updated since 8/30/2020 (based on feedback from AMS). If a county has no reported locations in this database, our final data will show there are zero in the county. Data is presented both as point-level and aggregated to county/state/US.
library(readxl, quietly = TRUE)
library(sf, quietly = TRUE)
library(lubridate, quietly = TRUE)
# Import and bind data in one data frame, add file name to indicate type
file_list <- fs::dir_ls(path = "data_raw/localfoodsales/AMS_directory")
df <- file_list %>%
map(read_xlsx) %>%
bind_rows(.id = "variable_name") %>%
mutate(
variable_name = str_remove(variable_name,
"data_raw/localfoodsales/AMS_directory/"),
variable_name = str_remove(variable_name, ".xlsx"))
# Keep columns of interest, drop obs. with NA for lat/long, drop obs. not updated since 2020
df <- df %>%
mutate(
update_time = as_date(update_time)) %>% filter(
update_time>"2020-08-30",
!is.na(location_x), !is.na(location_y)) %>%
select(
variable_name, listing_id, variable_name,
listing_name, location_address, location_x, location_y)
# listing_id 308158 has a comma at the end of location_y, remove this
df <- df %>%
mutate(location_y = str_remove(location_y, ","))
# Rename x and y, lat and long and make numeric
df <- df %>%
rename(lat = location_y,
long = location_x) %>%
mutate(lat = as.numeric(lat),
long = as.numeric(long))
## Get FIPS codes from lat/long data using the spatial county data from the Tigris package, defined above as county_sf
# make data frame into a spatial data frame and keep original lat/long variables
df_sf <- df %>%
st_as_sf(coords = c("long", "lat"),
crs = st_crs(county_sf),
remove = FALSE)
# intersect our spatial point-level data with the tigris county spatial data frame
intersected <- st_intersects(df_sf, county_sf)
# get the fips code for each entry
# Cambridge Farmers' Market, listing id 309678 did not have a fips, added manually
df_sf <- df_sf %>%
mutate(
intersection = as.integer(intersected),
fips = county_sf$geoid[intersection],
fips = case_when(listing_id=="309678" ~ "16087",
TRUE ~ fips))
rm(intersected)
# Turn back into a regular data frame
df <- as_tibble(df_sf) %>%
select(!c(listing_id, geometry, intersection))
# Group by variable name and join with county data so we can add a 0 value for those counties for each variable
# group by variable name and nest
nested_df <- df %>%
group_by(variable_name) %>%
nest()
# Join nested data frame by fips for each variable and unnest and add a count
nested_df <- nested_df %>%
mutate(data = map(data , ~ full_join(., county, by = "fips"))) %>%
unnest(cols = data) %>%
mutate(value =
case_when(is.na(listing_name) & is.na(location_address) ~ 0,
TRUE ~1))
rm(df_sf)
## Number of operations per county
df_county <- nested_df %>%
group_by(fips, variable_name, state_name, county_name) %>%
summarise(value = sum(value))
# Number of operations per state, add state fips code
df_state <- df_county %>%
group_by(variable_name, state_name) %>%
summarise(value = sum(value)) %>%
left_join(state)
# Number of operations in US
df_us <- df_county %>%
group_by(variable_name) %>%
summarise(value = sum(value)) %>%
mutate(
fips = "00",
state_name = "US",
county_name = NA)
# Join all data into one data frame
df_agg <- bind_rows(df_county, df_state, df_us)
rm(df_county, df_state, df_us)
# Add in additional columns for aggregated data and keep columns of interest
df_agg <- df_agg %>%
mutate(
variable_name = str_c("number_", variable_name, sep = ""),
category = "Food Retail",
topic_area = case_when(
variable_name == "number_csa" ~ "Community Supported Agriculture",
variable_name == "number_farmersmarket" ~ "Farmers Market",
variable_name == "number_agritourism" ~ "Agritourism",
variable_name == "number_onfarmmarket" ~ "On Farm Market"),
year = "2023",
value_codes = NA) %>%
select(fips, county_name, state_name, category,
topic_area, year, variable_name, value,
value_codes)
# Drop agritourism, we will use census of ag for ag tourism counts
df_agg <- df_agg %>%
filter(variable_name != "number_agritourism")
# Define food retail data frame
df_food_retail <- df_agg
rm(df_agg)
# Add in additional columns for point-level data and keep columns of interest
df <- df %>%
left_join(county) %>%
mutate(
category = "Food Retail",
topic_area = case_when(
str_detect(variable_name, "csa") ~ "Community Supported Agriculture",
str_detect(variable_name, "farmersmarket") ~ "Farmers Market",
str_detect(variable_name, "agritourism") ~ "Agritourism",
str_detect(variable_name, "onfarmmarket") ~ "On Farm Market"),
variable_name = case_when(
str_detect(variable_name, "csa") ~ "location_csa",
str_detect(variable_name, "farmersmarket") ~ "location_farmersmarket",
str_detect(variable_name, "agritourism") ~ "location_agritourism",
str_detect(variable_name, "onfarmmarket") ~ "location_onfarmmarket"),
org_type = topic_area,
year = "2023",
value_codes = NA) %>%
rename(
org_address = location_address,
org_name = listing_name) %>%
select(
fips, county_name, state_name, category,
topic_area, year, variable_name,
lat, long, org_name,
org_type, org_address, value_codes)
rm(nested_df)
# Drop obs. with no data and define food retail point-level data frame
df_food_retail_point <- df %>%
filter(!is.na(lat))
# Create meta data
meta_point <- df_food_retail_point %>%
group_by(category, topic_area, variable_name) %>%
count() %>%
select(-n)
meta_food_retail <- df_food_retail %>%
group_by(category, topic_area, variable_name) %>%
count() %>% select(-n) %>%
bind_rows(meta_point) %>%
mutate(
`2 pager title` = "Local Food Sales",
years = "2023",
user_friendly_variable_name = case_when(
variable_name == "number_csa" ~ "Community Supported Agriculture (CSA), number of businesses",
variable_name == "number_farmersmarket" ~ "Farmers markets, number of businesses",
variable_name == "number_agritourism" ~ "Agritourism, number of businesses",
variable_name == "number_onfarmmarket" ~ "On farm market, number of businesses",
variable_name == "location_csa" ~ "Community Supported Agriculture (CSA), location",
variable_name == "location_farmersmarket" ~ "Farmers markets, location",
variable_name == "location_agritourism" ~ "Agritourism, location",
variable_name == "location_onfarmmarket" ~ "On farm market, location"),
variable_definition = case_when(
variable_name == "number_csa" ~ "Number of community Supported Agriculture (CSA) businesses. Data is self reported and not representative of all CSA businesses.",
variable_name == "number_farmersmarket" ~ "Number of farmers markets. Data is self reported and not representative of all farmers markets.",
variable_name == "number_agritourism" ~ "Number of agritourism businesses",
variable_name == "number_onfarmmarket" ~ "Number of on-farm market businesses. Data is self reported and not representative of all onfarm markets.",
variable_name == "location_csa" ~ "Location of community Supported Agriculture (CSA) businesses. Data is self reported and not representative of all CSA businesses.",
variable_name == "location_farmersmarket" ~ "Location of farmers markets. Data is self reported and not representative of all farmers markets.",
variable_name == "location_agritourism" ~ "Location of agritourism businesses. Data is self reported and not representative of all agritourism businesses.",
variable_name == "location_onfarmmarket" ~ "Location of on-farm market businesses. Data is self reported and not representative of all onfarm markets."),
periodicity = "continuously updated",
aggregation = case_when(
str_detect(variable_name, "location") ~ "point",
TRUE ~ "count"),
format = case_when(
str_detect(variable_name, "location") ~ "point",
TRUE ~ "integer"),
keywords = "USDA|farmstand|USDA AMS|local food directory",
hashtags = "#farmersmarket|#shoplocal|#eatlocal|#supportlocal" ,
chart_type1 = case_when(
format == "point" ~ NA,
TRUE ~ "BarChart"),
chart_type2 = NA,
chart_axis_x1 = case_when(
str_detect(variable_name, "location") ~ NA,
variable_name %in% c("number_csa", "number_agritourism",
"number_onfarmmarket") ~ "Number of businesses",
variable_name == "number_farmersmarket" ~ "Number of markets"),
chart_axis_x2 = NA,
chart_axis_y1 = NA,
chart_axis_y2 = NA,
source = "U.S. Department of Agriculture, Local Food Directories",
citation = "U.S. Department of Agriculture, Local Food Directories",
url = "https://www.usdalocalfoodportal.com/#directories",
last_update_date = "1/31/2024") %>%
select(`2 pager title`, category, topic_area, variable_name,
user_friendly_variable_name, variable_definition,
years, periodicity, aggregation, format,
keywords, hashtags,
chart_type1, chart_type2,
chart_axis_x1, chart_axis_x2, chart_axis_y1,
chart_axis_y2, source, url, citation, last_update_date)
rm(df, meta_point)2.3 Direct-to-consumer, intermediated, value-added, and agritourism operations and sales
We use the 2022 and 2017 Census of Agriculture public data. Data is downloaded from U.S. Department of Agriculture, National Agricultural Statistics Service (USDA NASS), Data, Large Datasets, qs.census2022.txt.gz and qs.census2017.txt.gz.
Data is provided for
Direct-to-consumer sales and farms
Intermediated sales and farms
Local sales
Because direct-to-consumer and intermediated farm counts are not exclusive categories and one farm could be counted in both categories, we cannot compute total local farms with the public access data.
If either sales category was not reported, we report local food sales as NA and include the appropriate disclosure code. There were two cases fips 31173 in 2017 and fips 20119 in 2020, where direct-to-consumer and intermediated had different disclosure codes. We use (D) for both.
Agritourism sales and farms
Value-added sales and farms (this includes sales though commodity and local channels)
Data notes:
- (D) Withheld to avoid disclosing data for individual farms
- (Z) Less than half of the unit shown
# Import 2017, 2022 data
file_list <- fs::dir_ls(path = "data_raw/localfoodsales/qs.census")
coa <- file_list %>%
map(~read_tsv(.,show_col_types = FALSE)) %>%
bind_rows() %>%
clean_names()
# Select variables of interest, keep totals only
df <- coa %>%
filter(domain_desc == "TOTAL" &
str_detect(short_desc, "WHOLESALE|RETAIL|VALUE-ADDED PRODUCTS|COMMODITY TOTALS - SALES|COMMODITY TOTALS - OPERATIONS WITH SALES|AG TOURISM & RECREATIONAL SERVICES") &
agg_level_desc %in% c("COUNTY", "NATIONAL", "STATE")) %>%
filter(short_desc != "COMMODITY TOTALS - SALES, MEASURED IN $ / OPERATION") %>%
select(short_desc, agg_level_desc, state_fips_code, county_code, year, value)
# To be consistent with other data sets, change the US fips code of 99 to 00
# Add fips
df <- df %>%
mutate(
state_fips_code = case_when(
state_fips_code == "99" ~ "00",
TRUE ~ state_fips_code),
fips = case_when(
is.na(county_code) ~ state_fips_code,
TRUE ~ str_c(state_fips_code, county_code)))
# Create variable names and make values numeric
df <- df %>%
mutate(
short_desc = str_remove_all(short_desc,
"COMMODITY TOTALS - |INCOME, FARM-RELATED, |COMMODITY TOTALS, "),
variable_name = case_when(
short_desc == "SALES, MEASURED IN $" ~ "all_sales",
short_desc == "OPERATIONS WITH SALES" ~ "all_farms",
str_detect(short_desc, "AG TOURISM & RECREATIONAL SERVICES - OPERATIONS WITH RECEIPTS") ~
"agritourism_farms",
short_desc == "AG TOURISM & RECREATIONAL SERVICES - RECEIPTS, MEASURED IN $" ~
"agritourism_sales",
short_desc == "INCL VALUE-ADDED, RETAIL, DIRECTLY MARKETED, HUMAN CONSUMPTION - OPERATIONS WITH SALES" ~ "d2c_farms",
short_desc == "INCL VALUE-ADDED, RETAIL, DIRECTLY MARKETED, HUMAN CONSUMPTION - SALES, MEASURED IN $" ~ "d2c_sales",
short_desc == "INCL VALUE-ADDED, WHOLESALE, DIRECT TO RETAILERS & INSTITUTIONS & FOOD HUBS, LOCAL OR REGIONALLY BRANDED PRODUCTS, HUMAN CONSUMPTION - OPERATIONS WITH SALES" ~ "intermediated_farms",
short_desc == "INCL VALUE-ADDED, WHOLESALE, DIRECT TO RETAILERS & INSTITUTIONS & FOOD HUBS, LOCAL OR REGIONALLY BRANDED PRODUCTS, HUMAN CONSUMPTION - SALES, MEASURED IN $" ~ "intermediated_sales",
short_desc == "PRODUCED AND SOLD VALUE-ADDED PRODUCTS - OPERATIONS WITH SALES" ~ "valueadded_farms",
short_desc == "PRODUCED AND SOLD VALUE-ADDED PRODUCTS - SALES, MEASURED IN $" ~ "valueadded_sales",
TRUE ~ NA),
value_codes = case_when(
value =="(D)" ~ "D",
value == "(Z)" ~ "Z",
TRUE ~ NA),
value = case_when(
value %in% c("(D)", "(Z)") ~ NA,
TRUE ~ as.numeric(str_remove_all(value, ",")))) %>%
select(fips, year, variable_name, value, value_codes)
## Drop average farm sales
df <- df %>%
filter(!is.na(variable_name))
## Add value codes for total local food sales variables. For any variable that is not disclosed for either local food sales category we do not calculate total local food sales
value_codes <- df %>%
filter(variable_name %in% c("d2c_sales", "intermediated_sales")) %>%
select(year, fips, variable_name, value_codes) %>%
pivot_wider(
names_from = variable_name,
values_from = value_codes) %>%
mutate(
local_sales = case_when(
d2c_sales != intermediated_sales |
(d2c_sales =="D" | intermediated_sales == "D" ) ~ "D",
(d2c_sales =="Z" | intermediated_sales == "Z") ~ "Z" ,
TRUE ~ NA)) %>%
select(year, fips, local_sales) %>%
pivot_longer(
cols = !c(year, fips),
names_to = "variable_name",
values_to = "value_codes")
## Calculate total local food sales
local <- df %>%
filter(variable_name %in% c("d2c_sales", "intermediated_sales")) %>%
select(-value_codes) %>%
pivot_wider(
names_from = variable_name,
values_from = value) %>%
mutate(
local_sales = d2c_sales + intermediated_sales) %>%
select(fips, year, local_sales) %>%
pivot_longer(
cols = !c(fips, year),
names_to = "variable_name",
values_to = "value")
# Join with value codes
local <- local %>%
left_join(value_codes)
# Add to original data
df <- bind_rows(df, local)
## Calculate percentages of total for farms and sales
percent <- df %>%
select(!value_codes) %>%
pivot_wider(
names_from = variable_name,
values_from = value) %>%
select(fips, year,
starts_with("all"), everything()) %>%
mutate(across(ends_with("_farms"), ~ ./all_farms,
.names = "{.col}_pct")) %>%
mutate(across(ends_with("_sales"), ~ ./all_sales,
.names = "{.col}_pct")) %>%
pivot_longer(
cols = !c(fips, year),
names_to = "variable_name",
values_to = "value") %>%
filter(str_detect(variable_name, "_pct$") &
!variable_name %in% c("all_sales_pct",
"all_farms_pct",
"all_producers_pct"))
# Add value codes
percent <- percent %>%
mutate(
variable_name = str_remove(variable_name, "_pct$"))
value_codes <- df %>%
select(fips, variable_name, year, value_codes)
percent <- left_join(percent, value_codes) %>%
mutate(variable_name = str_c(variable_name, "_pct", sep = ""))
df <- bind_rows(df, percent)
rm(percent, value_codes, local)
# Add state and county names
df <- df %>%
left_join(county_state)
# Add additional variables
df <- df %>%
mutate(
category = "Food Retail",
topic_area = case_when(
str_detect(variable_name, "agritourism") ~ "Agritourism",
str_detect(variable_name, "d2c|intermediated|valueadded|local") ~ "General DTC",
TRUE ~ "Agriculture")) %>%
select(
fips, county_name, state_name,
category, topic_area, year, variable_name, value,
value_codes)
# Create metadata file
meta_coa_local <- df %>%
group_by(variable_name, category, topic_area) %>%
count() %>%
select(-n) %>%
mutate(
`2 pager title` = "Local Food Sales",
user_friendly_variable_name = case_when(
variable_name == "agritourism_farms" ~ "Agritourism, number of farms",
variable_name == "agritourism_farms_pct" ~ "Agritourism, number of farms as a percent of all farms",
variable_name == "agritourism_sales" ~ "Agritourism, sales",
variable_name == "agritourism_sales_pct" ~ "Agritourism, sales as a percent of all farm sales",
variable_name == "all_farms" ~ "Farms, total number",
variable_name == "all_sales" ~ "Farms, total sales",
variable_name == "d2c_farms" ~
"Farms, number selling direct-to-consumer",
variable_name == "d2c_farms_pct" ~
"Farms, percent of all farms selling direct-to-consumer",
variable_name == "d2c_sales" ~
"Farms, direct-to-consumer sales",
variable_name == "d2c_sales_pct" ~
"Farms, direct-to-consumer sales as a percent of all farm sales",
variable_name == "intermediated_farms" ~
"Farms, number selling intermediated",
variable_name == "intermediated_farms_pct" ~
"Farms, percent of all farms selling intermediated",
variable_name == "intermediated_sales" ~
"Farms, intermediated sales",
variable_name == "intermediated_sales_pct" ~
"Farms, intermediated sales as a percent of all farm sales",
variable_name == "local_sales" ~
"Farms, local marketing channel sales",
variable_name == "local_sales_pct" ~
"Farms, local marketing channel sales as a percent of all farm sales",
variable_name == "valueadded_farms" ~
"Farms, number of farms with value-added sales",
variable_name == "valueadded_farms_pct" ~
"Farms, percent of all farms with value-added sales",
variable_name == "valueadded_sales" ~
"Farms, value-added sales",
variable_name == "valueadded_sales_pct" ~
"Farms, value-added sales as a percent of all farm sales"),
variable_definition = case_when(
variable_name == "agritourism_farms" ~ "Number of farms with income from agritourism and recreational services",
variable_name == "agritourism_farms_pct" ~ "Number of farms with income from agritourism and recreational services as a percent of all farms",
variable_name == "agritourism_sales" ~ "Income from agritourism and recreational services",
variable_name == "agritourism_sales_pct" ~ "Income from agritourism and recreational services as a percent of all farm sales",
variable_name == "all_farms" ~ "Number of farms",
variable_name == "all_sales" ~ "Total sales of all farms",
variable_name == "d2c_farms" ~
"Number of farms selling through direct-to-consumer market channels (e.g., farmers markets, community supported agriculture, farm stands)",
variable_name == "d2c_farms_pct" ~
"Percent of all farms selling through direct-to-consumer market channels (e.g., farmers markets, community supported agriculture, farm stands)",
variable_name == "d2c_sales" ~
"Direct-to-consumer sales (e.g., farmers markets, community supported agriculture, farm stands)",
variable_name == "d2c_sales_pct" ~
"Direct-to-consumer sales (e.g., farmers markets, community supported agriculture, farm stands) as a percent of all farm sales",
variable_name == "intermediated_farms" ~
"Number of farms selling intermediated market channels (e.g., restaurants, grocery stores, schools, hospitals, regional food aggregators)",
variable_name == "intermediated_farms_pct" ~
"Percent of all farms selling intermediated market channels (e.g., restaurants, grocery stores, schools, hospitals, regional food aggregators)",
variable_name == "intermediated_sales" ~
"Intermediated sales (e.g., restaurants, grocery stores, schools, hospitals, regional food aggregators)",
variable_name == "intermediated_sales_pct" ~
"Intermediated sales (e.g., restaurants, grocery stores, schools, hospitals, regional food aggregators) as a percent of all farm sales",
variable_name == "local_sales" ~
"Sales through local marketing channels (e.g., farmers markets, community supported agriculture, farm stands, restaurants, grocery stores, schools, hospitals, regional food aggregators)",
variable_name == "local_sales_pct" ~
"Sales thorugh local marketing channel (e.g., farmers markets, community supported agriculture, farm stands, restaurants, grocery stores, schools, hospitals, regional food aggregators) as a percent of all farm sales",
variable_name == "valueadded_farms" ~
"Number of farms with value-added sales (includes commodity and local sales)",
variable_name == "valueadded_farms_pct" ~
"Percent of all farms with value-added sales (includes commodity and local sales)",
variable_name == "valueadded_sales" ~
"Value-added sales (includes commodity and local sales)",
variable_name == "valueadded_sales_pct" ~
"Value-added sales (includes commodity and local sales) as a percent of all farm sales"),
years = "2017|2022",
periodicity = "Every 5 years",
aggregation = case_when(
str_detect(variable_name, "_pct") ~ "percent",
TRUE ~ "count"),
format = case_when(
str_detect(variable_name, "_pct") ~ "percent",
TRUE ~ "integer"),
keywords = "USDA|NASS|COA|Census of Agriculture",
hashtags = "#eatlocal|#supportlocal|#locallygrown|#localfarmers|#localfarm",
chart_type1 = "BarChart",
chart_type2 = NA,
chart_axis_x1 = case_when(
str_detect(variable_name, "number_pct$") ~ "Percent of all farms",
str_detect(variable_name, "sales_pct$") ~ "Percent of all sales",
str_detect(variable_name, "number") ~ "Number of all farms",
str_detect(variable_name, "sales") ~ "Sales"),
chart_axis_x2 = NA,
chart_axis_y1 = NA,
chart_axis_y2 = NA,
source = "U.S. Department of Agriculture, National Agricultural Statistical Survey, Census of Agriculture",
url = "https://www.nass.usda.gov/datasets/",
citation = "U.S. Department of Agriculture, National Agricultural Statistical Survey, Data, Large Datasets, qs.census2022.txt.gz",
last_update_date = "2/30/24") %>%
select(`2 pager title`, category, topic_area, variable_name,
user_friendly_variable_name, variable_definition,
years, periodicity, aggregation, format,
keywords, hashtags,
chart_type1, chart_type2,
chart_axis_x1, chart_axis_x2, chart_axis_y1,
chart_axis_y2, source, url, citation, last_update_date)
# Define data frame and make year a character
df_coa_local <- df %>%
mutate(
year = as.character(year))2.4 Farm race/ethnicity
We use the 2022 Census of Agriculture public data. Data is downloaded from U.S. Department of Agriculture, National Agricultural Statistics Service (USDA NASS), Data, Large Datasets, qs.census2022.txt.gz.
We provide race/ethnicity data by:
- Number of operations
- Number of producers
- Acres operated
Data notes:
- (D) Withheld to avoid disclosing data for individual farms
- (Z) Less than half of the unit shown
- “PRODUCERS - NUMBER OF PRODUCERS” and “PRODUCERS, (ALL) - NUMBER OF PRODUCERS” are both used to provide information on the total number of producers. To match the published data, the variable
- “PRODUCERS, (ALL) - NUMBER OF PRODUCERS” is the total number of producers at the state- and county- level
- “PRODUCERS - NUMBER OF PRODUCERS” is the total number of producers at the national level
# Select variables of interest
varlist <- c(
"PRODUCERS, AMERICAN INDIAN OR ALASKA NATIVE - ACRES OPERATED",
"PRODUCERS, AMERICAN INDIAN OR ALASKA NATIVE - NUMBER OF OPERATIONS",
"PRODUCERS, AMERICAN INDIAN OR ALASKA NATIVE - NUMBER OF PRODUCERS",
"PRODUCERS, ASIAN - ACRES OPERATED",
"PRODUCERS, ASIAN - NUMBER OF OPERATIONS",
"PRODUCERS, ASIAN - NUMBER OF PRODUCERS",
"PRODUCERS, BLACK OR AFRICAN AMERICAN - ACRES OPERATED",
"PRODUCERS, BLACK OR AFRICAN AMERICAN - NUMBER OF OPERATIONS",
"PRODUCERS, BLACK OR AFRICAN AMERICAN - NUMBER OF PRODUCERS",
"PRODUCERS, HISPANIC - ACRES OPERATED",
"PRODUCERS, HISPANIC - NUMBER OF OPERATIONS",
"PRODUCERS, HISPANIC - NUMBER OF PRODUCERS",
"PRODUCERS, NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER - ACRES OPERATED",
"PRODUCERS, NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER - NUMBER OF OPERATIONS",
"PRODUCERS, NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER - NUMBER OF PRODUCERS",
"PRODUCERS, WHITE - ACRES OPERATED",
"PRODUCERS, WHITE - NUMBER OF OPERATIONS",
"PRODUCERS, WHITE - NUMBER OF PRODUCERS",
"FARM OPERATIONS - ACRES OPERATED",
"FARM OPERATIONS - NUMBER OF OPERATIONS",
"PRODUCERS - NUMBER OF PRODUCERS",
"PRODUCERS, (ALL) - NUMBER OF PRODUCERS")
df <- coa %>%
filter(short_desc %in% varlist & domain_desc == "TOTAL" &
(agg_level_desc == "NATIONAL" |
agg_level_desc %in% c("COUNTY", "STATE"))) %>%
select(short_desc, agg_level_desc, state_fips_code, county_code, year, value)
# Use "PRODUCERS, (ALL) - NUMBER OF PRODUCERS" for data at the state- and county- level and PRODUCERS - NUMBER OF PRODUCERS" for data at the national level
df <- df %>%
filter(case_when(
agg_level_desc != "NATIONAL" ~ short_desc != "PRODUCERS - NUMBER OF PRODUCERS",
TRUE ~ short_desc != "PRODUCERS, (ALL) - NUMBER OF PRODUCERS"))
# To be consistent with other data sets, change the US fips code of 99 to 00
# Add fips
df <- df %>%
mutate(
state_fips_code = case_when(
state_fips_code == "99" ~ "00",
TRUE ~ state_fips_code),
fips = case_when(
is.na(county_code) ~ state_fips_code,
TRUE ~ str_c(state_fips_code, county_code)))
# Create variable names, description and definition
df <- df %>%
mutate(
short_desc = str_replace_all(short_desc,
"FARM OPERATIONS - |PRODUCERS - |PRODUCERS, \\(ALL\\) - ",
"TOTAL, "),
short_desc = str_to_title(str_remove_all(
short_desc, "PRODUCERS, ")),
short_desc = str_replace_all(short_desc, " - ", ", "),
short_desc = str_replace_all(short_desc, "Or", "or"),
short_desc = str_replace(short_desc, "Number Of Producers",
"number of producers"),
short_desc = str_replace(short_desc, "Number Of Operations",
"number of operations"),
short_desc = str_replace(short_desc, "Acres Operated",
"acres operated"),
variable_description = short_desc,
variable_name = str_to_lower(short_desc),
variable_name = str_remove_all(variable_name,
"of |or |,"),
variable_name = str_replace(variable_name,
"other pacific islander", "opi"),
variable_name = str_replace_all(variable_name,
" ", "_"),
variable_definition = case_when(
str_detect(variable_name, "hispanic") ~ "Hispanic of any race",
str_detect(variable_name, "total") ~ NA,
TRUE ~ "Race alone"))
# Keep information for metadata
meta_demo <- df %>%
select(variable_name, variable_description, variable_definition) %>%
distinct()
# Keep columns of interest
df <- df %>%
select(year, fips, variable_name, value)
# Change value to numeric and add value codes
df <- df %>%
mutate(
value_codes = case_when(
value =="(D)" ~ "D",
value == "(Z)" ~ "Z",
TRUE ~ NA),
value = case_when(
value %in% c("(D)", "(Z)") ~ NA,
TRUE ~ as.numeric(str_remove_all(value, ",")))) %>%
select(fips, year, variable_name, value, value_codes)
## Add value codes for total local food sales variables
value_codes <- df %>%
select(year, fips, variable_name, value_codes)
## Calculate percentages of total for farms and sales
percent <- df %>%
select(!value_codes) %>%
pivot_wider(
names_from = variable_name,
values_from = value) %>%
select(fips, year, starts_with("total"), everything()) %>%
mutate(across(everything(), ~ replace_na(., 0))) %>%
mutate(across(ends_with("acres_operated"), ~ ./total_acres_operated,
.names = "{.col}_pct")) %>%
mutate(across(ends_with("number_operations"), ~./total_number_operations,
.names = "{.col}_pct")) %>%
mutate(across(ends_with("number_producers"), ~./total_number_producers,
.names = "{.col}_pct")) %>%
pivot_longer(
cols = !c(fips, year),
names_to = "variable_name",
values_to = "value") %>%
filter(str_detect(variable_name, "_pct$") &
!variable_name %in% c("total_acres_operated_pct",
"total_number_operations_pct",
"total_number_producers_pct"))
# Add value codes
percent <- percent %>%
mutate(
variable_name = str_remove(variable_name, "_pct$"))
value_codes <- df %>%
select(fips, variable_name, year, value_codes)
percent <- left_join(percent, value_codes) %>%
mutate(variable_name = str_c(variable_name, "_pct", sep = ""),
value = case_when(
value_codes == "D" ~ NA,
TRUE ~ value))
df <- bind_rows(df, percent)
rm(percent, value_codes)
# Add state and county names
df <- df %>%
left_join(county_state)
# Add US as state name
df <- df %>%
mutate(
state_name = case_when(
fips == "00" ~ "US",
TRUE ~ state_name))
# Add additional variables
df <- df %>%
mutate(
category = "Demographics",
topic_area = "Agriculture") %>%
select(
fips, county_name, state_name,
category, topic_area, year, variable_name, value,
value_codes)
# define data frame
df_demo <- df %>%
mutate(year = as.character(year))
# Create metadata file
meta_demo_pct <- meta_demo %>%
filter(
!str_detect(variable_name, "^total")) %>%
mutate(
variable_name = str_c(variable_name, "_pct"),
variable_description = str_c(variable_description, " as a percent of total"))
meta_demo <- bind_rows(meta_demo, meta_demo_pct)
rm(meta_demo_pct)
meta_demo<- meta_demo %>%
mutate(
category = "Demographics",
topic_area = "Agriculture",
`2 pager title` = "Local Food Sales",
v1 = str_to_sentence(str_replace(variable_description,
".*, ", "")),
v2 = str_replace(variable_description, ", .*", ""),
v2 = case_when(
v2 == "Total" ~ tolower(v2),
TRUE ~ v2),
user_friendly_variable_name = str_c(v1, v2, sep = ", "),
years = "2017|2022",
periodicity = "Every 5 years",
aggregation = case_when(
str_detect(variable_name, "_pct") ~ "percent",
TRUE ~ "count"),
format = case_when(
str_detect(variable_name, "_pct") ~ "percent",
TRUE ~ "integer"),
keywords = "USDA|NASS|COA|Census of Agriculture",
hashtags = NA,
chart_type1 = "BarChart",
chart_type2 = NA,
chart_axis_x1 = case_when(
str_detect(variable_name, "acres_operated$") ~ "Acres operated",
str_detect(variable_name, "number_operations$") ~ "Number of operations",
str_detect(variable_name, "number_producers$") ~ "Number of producers",
str_detect(variable_name, "acres_operated_pct$") ~ "Percent of all acres",
str_detect(variable_name, "number_operations_pct$") ~ "Percent of all operations",
str_detect(variable_name, "number_producers_pct$") ~ "Percent of all producers"),
chart_axis_x2 = NA,
chart_axis_y1 = NA,
chart_axis_y2 = NA,
source = "U.S. Department of Agriculture, National Agricultural Statistical Survey, Census of Agriculture",
url = "https://www.nass.usda.gov/datasets/",
citation = "U.S. Department of Agriculture, National Agricultural Statistical Survey, Data, Large Datasets, qs.census2022.txt.gz, qs.census2017.txt.gz",
last_update_date = "4/22/24") %>%
select(`2 pager title`, category, topic_area, variable_name,
user_friendly_variable_name, variable_definition,
years, periodicity, aggregation, format,
keywords, hashtags,
chart_type1, chart_type2,
chart_axis_x1, chart_axis_x2, chart_axis_y1,
chart_axis_y2, source, url, citation, last_update_date)2.5 Combine all data and put in final format
rm(df, meta, coa)
# Combine all data
df_localfoodsales <-
bind_rows(df_food_retail, df_coa_local, df_demo)
# Combine metadata
meta_localfoodsales <-
bind_rows(meta_food_retail, meta_coa_local, meta_demo) %>%
select(
`2 pager title`, category, topic_area, variable_name,
user_friendly_variable_name, variable_definition,
years, periodicity, aggregation, format,
keywords, hashtags,
chart_type1, chart_type2,
chart_axis_x1, chart_axis_x2, chart_axis_y1,
chart_axis_y2, source, url, citation, last_update_date)
# Put point data in correct order
df_localfoodsales_point <- df_food_retail_point %>%
mutate(
value = 1,
year = as.character(year)) %>%
select(
fips, county_name, state_name, category,
topic_area, year, variable_name, value, value_codes,
lat, long, org_name,
org_type, org_address)
# Write files
write_csv(df_localfoodsales, "data_final/df_localfoodsales.csv")
write_csv(df_localfoodsales_point, "data_final/df_localfoodsales_point.csv")
write_csv(meta_localfoodsales, "data_final/meta_localfoodsales.csv")