Visualizing and Maintaining the Green Canopy of NYC

Author

Akashdeep Singh

Task 1: Download NYC City Council District Boundaries

Show code
library(sf)
library(tidyverse)
library(httr2)

get_nyc_council_districts <- function(){
    # URL for NYC City Council District Boundaries
    url <- "https://s-media.nyc.gov/agencies/dcp/assets/files/zip/data-tools/bytes/city-council/nycc_25c.zip"
    
    # Step 1: Create data/mp03 directory if needed
    if(!dir.exists(file.path("data", "mp03"))){
        dir.create(file.path("data", "mp03"), showWarnings=FALSE, recursive=TRUE)
    }
    
    # Define file paths
    zip_file <- file.path("data", "mp03", "nycc_25c.zip")
    extract_dir <- file.path("data", "mp03")
    
    # Step 2: Download zip file only if needed
    if(!file.exists(zip_file)){
        message("Downloading NYC City Council District boundaries...")
        download.file(url, zip_file, mode="wb")
    }
    
    # Step 3: Unzip only if needed
    # List files in zip to find the .shp file
    zip_contents <- unzip(zip_file, list=TRUE)
    shp_files <- zip_contents$Name[grepl("\\.shp$", zip_contents$Name)]
    
    if(length(shp_files) == 0){
        stop("No .shp file found in the zip archive")
    }
    
    shp_file <- file.path("data", "mp03", shp_files[1])
    
    if(!file.exists(shp_file)){
        message("Unzipping district boundaries...")
        unzip(zip_file, exdir=extract_dir)
    }
    
    # Step 4: Read the shapefile
    districts <-st_read(shp_file, quiet=TRUE)
    
    # Step 5: Transform to WGS 84
    districts_wgs84 <-st_transform(districts, crs="WGS84")
    
    # Step 6: Return the transformed data
    return(districts_wgs84)
}

# Usage:
nyc_districts <- get_nyc_council_districts()

Task 2: Download Tree Points

Show code
library(httr2)
library(sf)
library(dplyr)
library(glue)

download_tree_points <- function(
    base_url = "https://data.cityofnewyork.us/resource/hn5i-inap.json",
    limit = 5000,
    out_dir = "data/mp03"
) {
    if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)

    offset <- 0
    chunk_id <- 1
    downloaded_files <- c()

    repeat {
        out_file <- file.path(out_dir, glue("trees_{sprintf('%03d', chunk_id)}.geojson"))

        if (!file.exists(out_file)) {
            cat(glue("Downloading chunk {chunk_id} (offset = {offset})...\n"))

            req <- request(base_url) |>
                req_url_query(
                    `$limit`  = limit,
                    `$offset` = offset
                )

            resp <- req_perform(req)
            resp_body_raw(resp) |> writeBin(con = out_file)

        } else {
            cat(glue("Chunk {chunk_id} already exists — skipping.\n"))
        }

        dat <- tryCatch(
            st_read(out_file, quiet = TRUE),
            error = function(e) NULL
        )

        if (is.null(dat) || nrow(dat) == 0) {
            cat("No more data returned — stopping download.\n")
            break
        }

        downloaded_files <- c(downloaded_files, out_file)

        if (nrow(dat) < limit) {
            cat("Final chunk downloaded — reached end of dataset.\n")
            break
        }

        offset <- offset + limit
        chunk_id <- chunk_id + 1
    }

    # combine all files
    all_files <- list.files(out_dir, pattern = "trees_\\d+\\.geojson", full.names = TRUE)

    cat("Reading all GeoJSON files...\n")
    tree_data <- bind_rows(lapply(all_files, st_read, quiet = TRUE))

    cat(glue("✔ Finished — total rows: {nrow(tree_data)}\n"))

    return(tree_data)
}

trees <- download_tree_points()

Task 3-Plot All Tree Points

Show code
library(sf)
library(dplyr)
library(ggplot2)


trees_fixed <- trees %>%
  st_drop_geometry() %>%  
  filter(!is.na(longitude) & !is.na(latitude)) %>%  
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326) 

print(st_bbox(trees_fixed))

print(head(trees_fixed))


trees_transformed <- st_transform(trees_fixed, st_crs(nyc_districts))
Show code
ggplot() +
  geom_sf(data = nyc_districts, fill = "lightgray", color = "black", linewidth = 0.5) +
  geom_sf(data = trees_transformed[1:20000, ], color = "darkgreen", alpha = 0.3, size = 0.5) +
  labs(title = "NYC Street Trees by Council District",
       subtitle = "20,000 trees") +
  theme_minimal()

Show code
trees_with_districts <- st_join(trees_transformed, nyc_districts, join = st_intersects)
Show code
# check districts
final <- trees_with_districts %>%
  st_drop_geometry() %>%
  filter(!is.na(CounDist)) %>%
  group_by(CounDist) %>%
  summarize(count = n())

print(final)

Task 4- District-Level Analysis of Tree Coverage

Show code
library(dplyr)
library(sf)
head(trees_with_districts)
nrow(trees_with_districts)  

Question 1: Which council district has the most trees?

Show code
q1 <- trees_with_districts %>%
  st_drop_geometry() %>%
  group_by(CounDist) %>%
  summarize(tree_count = n()) %>%
  arrange(desc(tree_count)) %>%
  slice(1) %>%
  left_join(
    trees_with_districts %>% 
      st_drop_geometry() %>% 
      select(CounDist, zip_city) %>% 
      distinct(),
    by = "CounDist"
  )

print(q1)
# A tibble: 1 × 3
  CounDist tree_count zip_city     
     <int>      <int> <chr>        
1       51      52728 Staten Island

The District with the most trees is the 51st district, which is located in Staten Island. this makes sense as Staten Island is often considered the most suburban borough

Question 2: Which district has highest density of trees?

Show code
q2 <- trees_with_districts %>%
  st_drop_geometry() %>%
  group_by(CounDist, Shape_Area) %>%
  summarize(tree_count = n(), .groups = "drop") %>%
  mutate(tree_density = tree_count / Shape_Area) %>%
  arrange(desc(tree_density)) %>%
  slice(1) %>%
  left_join(
    trees_with_districts %>% 
      st_drop_geometry() %>% 
      select(CounDist, zip_city) %>% 
      distinct(),
    by = "CounDist"
  )
print(q2)
# A tibble: 1 × 5
  CounDist Shape_Area tree_count tree_density zip_city
     <int>      <dbl>      <int>        <dbl> <chr>   
1        9  56263769.       8175     0.000145 New York

The district with the highest density count is district 9, in Manhattan. This is very interesting because Manhattan would seem to be the last place the highest density of trees would be in.

Question 3: Which district has highest fraction of dead trees?

Show code
q3 <- trees_with_districts %>%
  st_drop_geometry() %>%
  group_by(CounDist) %>%
  summarize(
    total_trees = n(),
    dead_trees = sum(status == "Dead", na.rm = TRUE),
    fraction_dead = dead_trees / total_trees
  ) %>%
  arrange(desc(fraction_dead)) %>%
  slice(1) %>%
  left_join(
    trees_with_districts %>% 
      st_drop_geometry() %>% 
      select(CounDist, zip_city) %>% 
      distinct(),
    by = "CounDist"
  )


print(q3)
# A tibble: 1 × 5
  CounDist total_trees dead_trees fraction_dead zip_city
     <int>       <int>      <int>         <dbl> <chr>   
1       16        6897        395        0.0573 Bronx   

The most dead trees are located in the Bronx in district 16.

Question 4: Most common tree species in Manhattan

trees_with_districts <- trees_with_districts %>%
  mutate(borough = case_when(
    CounDist >= 1 & CounDist <= 10 ~ "Manhattan",
    CounDist >= 11 & CounDist <= 18 ~ "Bronx",
    CounDist >= 19 & CounDist <= 32 ~ "Queens",
    CounDist >= 33 & CounDist <= 48 ~ "Brooklyn",
    CounDist >= 49 & CounDist <= 51 ~ "Staten Island",
    TRUE ~ NA_character_
  ))

q4 <- trees_with_districts %>%
  st_drop_geometry() %>%
  filter(borough == "Manhattan") %>%
  group_by(spc_common) %>%
  summarize(count = n()) %>%
  arrange(desc(count)) %>%
  slice(1)

print(q4)
# A tibble: 1 × 2
  spc_common  count
  <chr>       <int>
1 honeylocust 13644

The most common species in Manhattan is the Honeylocust.

Question 5: Species of tree closest to Baruch’s campus

# Baruch coordinates: 40.7402 N, 73.9834 W


new_st_point <- function(lat, lon, ...){
  st_sfc(st_point(c(lon, lat))) |>
    st_set_crs("WGS84")
}

baruch_point <- new_st_point(40.7402, -73.9834)

baruch_point <- st_transform(baruch_point, st_crs(trees_with_districts))

q5 <- trees_with_districts %>%
  mutate(distance = st_distance(geometry, baruch_point)) %>%
  arrange(distance) %>%
  slice(1) %>%
  select(spc_common, distance, address)

print(q5)
Simple feature collection with 1 feature and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -73.98334 ymin: 40.7405 xmax: -73.98334 ymax: 40.7405
Geodetic CRS:  WGS 84
    spc_common     distance            address                  geometry
1 Callery pear 33.75424 [m] 137 EAST 25 STREET POINT (-73.98334 40.7405)

The closest species of tree that is near Baruch College, is the Callery pear.

Task 5-NYC Parks Proposal

Park Proposal:

We propose a comprehensive tree health initiative for Manhattan’s District 2 to address the district’s critical tree mortality crisis. This program will plant 500 new street trees and remove all dead trees, with priority given to under-served corridors east of Second Avenue where tree coverage falls significantly below city standards. The initiative will focus on diversifying species composition by introducing climate-resilient varieties such as London Planetree and Red Oak, while reducing dependence on over represented species like Honeylocust.

District 2 faces a tree health emergency that demands immediate intervention. Our analysis reveals that District 2 has one of the highest percentages of dead trees among comparable Lower Manhattan districts. Furthermore, District 2’s tree density falls below neighboring districts despite having among the highest pedestrian traffic volumes. The eastern corridor particularly suffers, with streets having significantly fewer trees per block compared to recommended standards of 12-15 trees per block.

Scope: 133 dead trees removed + 500 new plantings = 633 total trees

Our district requires urgent intervention.

Tree Mortality Crisis: District 2 has the highest rate of dead trees among comparable districts.

district2_trees <- trees_with_districts %>% filter(CounDist==2) %>%
  mutate(tree_status=case_when(status=="Dead"~"Dead", health=="Poor"~"Poor", TRUE~"Healthy"))
district2_boundary <- nyc_districts %>% filter(CounDist==2)

m1 <- ggplot() + geom_sf(data=district2_boundary, fill="gray95", color="black", linewidth=0.8) +
  geom_sf(data=district2_trees, aes(color=tree_status), size=0.5, alpha=0.6) +
  scale_color_manual(values=c("Healthy"="#10b981","Poor"="#fbbf24","Dead"="#dc2626"), name="") +
  labs(title="District 2 Tree Health") + theme_void(base_size=9) +
  theme(plot.title=element_text(face="bold", hjust=0.5, size=10), legend.position="right")

m1

The zoomed map reveals that tree health problems are concentrated in the eastern corridor of the district, particularly east of Second Avenue, where lower-income residents face the worst conditions. By removing 300+ dead trees and planting 500 new climate-resilient species, this program will restore District 2’s urban canopy to be more in line with neighboring districts, improve air quality, reduce urban heat effects, and eliminate safety hazards. The side-by-side comparison with District 1 demonstrates the stark disparity in tree health that this initiative will address.