From 988dc90b9b8eb97656da0fa11ab8a6dd4c11b09d Mon Sep 17 00:00:00 2001 From: lovasoa Date: Wed, 20 Sep 2023 10:05:32 +0200 Subject: [PATCH] add support for relative urls in IIIF images fixes https://github.com/lovasoa/dezoomify-rs/issues/215 --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/iiif/mod.rs | 75 +++++++++++++++++++---------- src/iiif/tile_info.rs | 7 +++ src/network.rs | 107 +++++++++++++++++++++++++++--------------- 5 files changed, 127 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0be370..c919803 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -534,7 +534,7 @@ checksum = "4f8a51dd197fa6ba5b4dc98a990a43cc13693c23eb0089ebb0fcc1f04152bca6" [[package]] name = "dezoomify-rs" -version = "2.11.1" +version = "2.11.2" dependencies = [ "aes", "base64", diff --git a/Cargo.toml b/Cargo.toml index c0b10b6..7d91860 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dezoomify-rs" -version = "2.11.1" +version = "2.11.2" authors = ["lovasoa"] edition = "2021" license-file = "LICENSE" diff --git a/src/iiif/mod.rs b/src/iiif/mod.rs index b3ee874..7626995 100644 --- a/src/iiif/mod.rs +++ b/src/iiif/mod.rs @@ -51,7 +51,10 @@ fn zoom_levels(url: &str, raw_info: &[u8]) -> Result { .filter(|info| { let keep = info.has_distinctive_iiif_properties(); if keep { - debug!("keeping image info {:?} because it has distinctive IIIF properties", info) + debug!( + "keeping image info {:?} because it has distinctive IIIF properties", + info + ) } else { info!("dropping level {:?}", info) } @@ -62,9 +65,12 @@ fn zoom_levels(url: &str, raw_info: &[u8]) -> Result { if levels.is_empty() { Err(e.into()) } else { - info!("No normal info.json parsing failed ({}), \ + info!( + "No normal info.json parsing failed ({}), \ but {} inline json5 zoom level(s) were found.", - e, levels.len()); + e, + levels.len() + ); Ok(levels) } } @@ -73,6 +79,7 @@ fn zoom_levels(url: &str, raw_info: &[u8]) -> Result { fn zoom_levels_from_info(url: &str, mut image_info: ImageInfo) -> ZoomLevels { image_info.remove_test_id(); + image_info.resolve_relative_urls(url); let img = Arc::new(image_info); let tiles = img.tiles(); let base_url = &Arc::from(url.replace("/info.json", "")); @@ -83,13 +90,13 @@ fn zoom_levels_from_info(url: &str, mut image_info: ImageInfo) -> ZoomLevels { let quality = Arc::from(img.best_quality()); let format = Arc::from(img.best_format()); let size_format = img.preferred_size_format(); - info!("Chose the following image parameters: tile_size=({}) quality={} format={}", - tile_size, quality, format); + info!( + "Chose the following image parameters: tile_size=({}) quality={} format={}", + tile_size, quality, format + ); let page_info = &img; // Required to allow the move - tile_info - .scale_factors - .iter() - .map(move |&scale_factor| IIIFZoomLevel { + tile_info.scale_factors.iter().map(move |&scale_factor| { + let zoom_level = IIIFZoomLevel { scale_factor, tile_size, page_info: Arc::clone(page_info), @@ -97,7 +104,10 @@ fn zoom_levels_from_info(url: &str, mut image_info: ImageInfo) -> ZoomLevels { quality: Arc::clone(&quality), format: Arc::clone(&format), size_format, - }) + }; + debug!("Found zoom level {zoom_level:?}: page_info: {page_info:?}, tile_size: {tile_size:?}, scale_factor: {scale_factor}, base_url: {base_url}, quality: {quality}, format: {format}, size_format: {size_format:?}"); + zoom_level + }) }) .into_zoom_levels(); levels @@ -129,12 +139,20 @@ impl TilesRect for IIIFZoomLevel { let tile_size = scaled_tile_size / self.scale_factor; format!( "{base}/{x},{y},{img_w},{img_h}/{tile_size}/{rotation}/{quality}.{format}", - base = self.page_info.id.as_deref().unwrap_or_else(|| self.base_url.as_ref()), + base = self + .page_info + .id + .as_deref() + .unwrap_or_else(|| self.base_url.as_ref()), x = xy_pos.x, y = xy_pos.y, img_w = scaled_tile_size.x, img_h = scaled_tile_size.y, - tile_size = TileSizeFormatter { w: tile_size.x, h: tile_size.y, format: self.size_format }, + tile_size = TileSizeFormatter { + w: tile_size.x, + h: tile_size.y, + format: self.size_format + }, rotation = 0, quality = self.quality, format = self.format, @@ -142,7 +160,11 @@ impl TilesRect for IIIFZoomLevel { } } -struct TileSizeFormatter { w: u32, h: u32, format: TileSizeFormat } +struct TileSizeFormatter { + w: u32, + h: u32, + format: TileSizeFormat, +} impl std::fmt::Display for TileSizeFormatter { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -202,7 +224,6 @@ fn test_tiles() { ]) } - #[test] fn test_tiles_max_area_filter() { // Predefined tile size (1024x1024) is over maxArea (262144 = 512x512). @@ -219,12 +240,15 @@ fn test_tiles_max_area_filter() { .into_iter() .map(|t| t.url) .collect(); - assert_eq!(tiles, vec![ - "http://ophir.dev/0,0,512,512/512,512/0/default.jpg", - "http://ophir.dev/512,0,512,512/512,512/0/default.jpg", - "http://ophir.dev/0,512,512,512/512,512/0/default.jpg", - "http://ophir.dev/512,512,512,512/512,512/0/default.jpg", - ]) + assert_eq!( + tiles, + vec![ + "http://ophir.dev/0,0,512,512/512,512/0/default.jpg", + "http://ophir.dev/512,0,512,512/512,512/0/default.jpg", + "http://ophir.dev/0,512,512,512/512,512/0/default.jpg", + "http://ophir.dev/512,512,512,512/512,512/0/default.jpg", + ] + ) } #[test] @@ -259,7 +283,10 @@ fn test_false_positive() { }; "#; let res = zoom_levels("https://orion2020v5b.spaceforeverybody.com/", data); - assert!(res.is_err(), "openseadragon zoomify image should not be misdetected"); + assert!( + res.is_err(), + "openseadragon zoomify image should not be misdetected" + ); } #[test] @@ -279,11 +306,7 @@ fn test_qualities() { let mut levels = zoom_levels("test.com", data).unwrap(); let level = &mut levels[0]; assert_eq!(level.size_hint(), Some(Vec2d { x: 515, y: 381 })); - let tiles: Vec = level - .next_tiles(None) - .into_iter() - .map(|t| t.url) - .collect(); + let tiles: Vec = level.next_tiles(None).into_iter().map(|t| t.url).collect(); assert_eq!(tiles, vec![ "https://images.britishart.yale.edu/iiif/fd470c3e-ead0-4878-ac97-d63295753f82/0,0,5156,3816/515,381/0/native.png", ]) diff --git a/src/iiif/tile_info.rs b/src/iiif/tile_info.rs index 811353a..83ac188 100644 --- a/src/iiif/tile_info.rs +++ b/src/iiif/tile_info.rs @@ -8,6 +8,7 @@ use regex::Regex; use serde::{Deserialize, Serialize}; use crate::Vec2d; +use crate::network::resolve_relative; #[derive(Default, Debug, Serialize, Deserialize, PartialEq, Eq)] pub struct ImageInfo { @@ -153,6 +154,12 @@ impl ImageInfo { } } } + + pub fn resolve_relative_urls(&mut self, base: &str) { + if let Some(id) = &self.id { + self.id = Some(resolve_relative(base, id)) + } + } } #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] diff --git a/src/network.rs b/src/network.rs index ee7d90d..52119fa 100644 --- a/src/network.rs +++ b/src/network.rs @@ -4,18 +4,18 @@ use std::path::PathBuf; use std::sync::Arc; use image::DynamicImage; -use log::{debug, warn}; -use reqwest::{Client, header}; +use log::{debug, trace, warn}; +use reqwest::{header, Client}; use sanitize_filename_reader_friendly::sanitize; use tokio::fs; use tokio::time::Duration; use url::Url; -use crate::{TileDownloadError, ZoomError}; use crate::arguments::Arguments; use crate::dezoomer::{PostProcessFn, TileReference}; use crate::errors::BufferToImageError; use crate::tile::Tile; +use crate::{TileDownloadError, ZoomError}; /// Fetch data, either from an URL or a path to a local file. /// If uri doesnt start with "http(s)://", it is considered to be a path @@ -23,13 +23,22 @@ use crate::tile::Tile; // TODO: return Bytes pub async fn fetch_uri(uri: &str, http: &Client) -> Result, ZoomError> { if uri.starts_with("http://") || uri.starts_with("https://") { - debug!("Loading url: '{}'", uri); - let response = http.get(uri).send() - .await?.error_for_status()?; + let req = http.get(uri).build()?; + debug!( + "Making http request to {uri} with headers '{:?}'", + req.headers() + ); + let response = http.execute(req).await?; + debug!( + "Got http response for {uri}: status={}, headers={:?}", + response.status(), + response.headers() + ); + let response = response.error_for_status()?; let mut contents = Vec::new(); let bytes = response.bytes().await?; contents.extend(bytes); - debug!("Loaded url: '{}'", uri); + trace!("Successfully finished loading url: '{}'", uri); Ok(contents) } else { debug!("Loading file: '{}'", uri); @@ -39,7 +48,6 @@ pub async fn fetch_uri(uri: &str, http: &Client) -> Result, ZoomError> { } } - pub struct TileDownloader { pub http_client: reqwest::Client, pub post_process_fn: PostProcessFn, @@ -48,7 +56,6 @@ pub struct TileDownloader { pub tile_storage_folder: Option, } - impl TileDownloader { pub async fn download_tile( &self, @@ -59,20 +66,24 @@ impl TileDownloader { let n = 100; let idx: f64 = ((tile_reference.position.x + tile_reference.position.y) % n).into(); let tile_reference = Arc::new(tile_reference); - let mut wait_time = self.retry_delay + Duration::from_secs_f64(idx * self.retry_delay.as_secs_f64() / n as f64); + let mut wait_time = self.retry_delay + + Duration::from_secs_f64(idx * self.retry_delay.as_secs_f64() / n as f64); let mut failures: usize = 0; loop { match self.load_image(Arc::clone(&tile_reference)).await { Ok(image) => { - return Ok(Tile { image, position: tile_reference.position }) - }, + return Ok(Tile { + image, + position: tile_reference.position, + }) + } Err(cause) => { if failures >= self.retries { return Err(TileDownloadError { tile_reference: Arc::try_unwrap(tile_reference) .expect("tile reference shouldn't leak"), cause, - }) + }); } failures += 1; warn!("{}. Retrying tile download in {:?}.", cause, wait_time); @@ -87,17 +98,16 @@ impl TileDownloader { &self, tile_reference: Arc, ) -> Result { - let bytes = - if let Some(bytes) = self.read_from_tile_cache(&tile_reference.url).await { - bytes - } else { - let bytes = self.download_image_bytes(Arc::clone(&tile_reference)).await?; - self.write_to_tile_cache(&tile_reference.url, &bytes).await; - bytes - }; - Ok(tokio::task::spawn_blocking(move || { - image::load_from_memory(&bytes) - }).await??) + let bytes = if let Some(bytes) = self.read_from_tile_cache(&tile_reference.url).await { + bytes + } else { + let bytes = self + .download_image_bytes(Arc::clone(&tile_reference)) + .await?; + self.write_to_tile_cache(&tile_reference.url, &bytes).await; + bytes + }; + Ok(tokio::task::spawn_blocking(move || image::load_from_memory(&bytes)).await??) } async fn download_image_bytes( @@ -109,7 +119,8 @@ impl TileDownloader { bytes = tokio::task::spawn_blocking(move || -> Result<_, BufferToImageError> { post_process(&tile_reference, bytes) .map_err(|e| BufferToImageError::PostProcessing { e }) - }).await??; + }) + .await??; } Ok(bytes) } @@ -118,7 +129,10 @@ impl TileDownloader { if let Some(root) = &self.tile_storage_folder { match tokio::fs::write(root.join(&sanitize(uri)), contents).await { Ok(_) => debug!("Wrote {} to tile cache ({} bytes)", uri, contents.len()), - Err(e) => warn!("Unable to write {} to the tile cache {:?}: {}", uri, root, e) + Err(e) => warn!( + "Unable to write {} to the tile cache {:?}: {}", + uri, root, e + ), } } } @@ -129,15 +143,15 @@ impl TileDownloader { Ok(d) => { debug!("{} read from tile cache", uri); return Some(d); - }, - Err(e) => debug!("Unable to open {} from tile cache {:?}: {}", uri, root, e) + } + Err(e) => debug!("Unable to open {} from tile cache {:?}: {}", uri, root, e), } } None } } -pub fn client<'a, I: Iterator>( +pub fn client<'a, I: Iterator>( headers: I, args: &Arguments, uri: Option<&str>, @@ -149,7 +163,10 @@ pub fn client<'a, I: Iterator>( .chain(headers.map(|(k, v)| (k, v))) .map(|(name, value)| Ok((name.parse()?, value.parse()?))) .collect::>()?; - debug!("Creating an http client with the following headers: {:?}", header_map); + debug!( + "Creating an http client with the following headers: {:?}", + header_map + ); let client = reqwest::Client::builder() .default_headers(header_map) .referer(false) @@ -166,10 +183,10 @@ pub fn default_headers() -> HashMap { pub fn resolve_relative(base: &str, path: &str) -> String { if Url::parse(path).is_ok() { - return path.to_string() + return path.to_string(); } else if let Ok(url) = Url::parse(base) { if let Ok(r) = url.join(path) { - return r.to_string() + return r.to_string(); } } let mut res = PathBuf::from(base.rsplitn(2, '/').last().unwrap_or_default()); @@ -183,17 +200,31 @@ pub fn remove_bom(contents: &[u8]) -> &[u8] { const BOM: &[u8] = &[0xEF, 0xBB, 0xBF]; // UTF8 byte order mark if contents.starts_with(BOM) { &contents[BOM.len()..] - } else { contents } + } else { + contents + } } #[test] fn test_resolve_relative() { use std::path::MAIN_SEPARATOR; - assert_eq!(resolve_relative("/a/b", "c/d"), format!("/a{}c/d", MAIN_SEPARATOR)); - assert_eq!(resolve_relative("C:\\X", "c/d"), format!("C:\\X{}c/d", MAIN_SEPARATOR)); - assert_eq!(resolve_relative("/a/b", "http://example.com/x"), "http://example.com/x"); - assert_eq!(resolve_relative("http://a.b", "http://example.com/x"), "http://example.com/x"); + assert_eq!( + resolve_relative("/a/b", "c/d"), + format!("/a{}c/d", MAIN_SEPARATOR) + ); + assert_eq!( + resolve_relative("C:\\X", "c/d"), + format!("C:\\X{}c/d", MAIN_SEPARATOR) + ); + assert_eq!( + resolve_relative("/a/b", "http://example.com/x"), + "http://example.com/x" + ); + assert_eq!( + resolve_relative("http://a.b", "http://example.com/x"), + "http://example.com/x" + ); assert_eq!(resolve_relative("http://a.b", "c/d"), "http://a.b/c/d"); assert_eq!(resolve_relative("http://a.b/x", "c/d"), "http://a.b/c/d"); assert_eq!(resolve_relative("http://a.b/x/", "c/d"), "http://a.b/x/c/d"); -} \ No newline at end of file +}