diff --git a/apps/api/sharedLibs/html-transformer/src/lib.rs b/apps/api/sharedLibs/html-transformer/src/lib.rs index 5a0fa39e..702c69e4 100644 --- a/apps/api/sharedLibs/html-transformer/src/lib.rs +++ b/apps/api/sharedLibs/html-transformer/src/lib.rs @@ -5,8 +5,12 @@ use serde::Deserialize; use serde_json::Value; use url::Url; +/// Extracts links from HTML +/// +/// # Safety +/// Input options must be a C HTML string. Output will be a JSON string array. Output string must be freed with free_string. #[no_mangle] -pub extern "C" fn extract_links(html: *const libc::c_char) -> *mut i8 { +pub unsafe extern "C" fn extract_links(html: *const libc::c_char) -> *mut i8 { let html = unsafe { CStr::from_ptr(html) }.to_str().unwrap(); let document = parse_html().one(html); @@ -44,9 +48,12 @@ macro_rules! insert_meta_property { }; } - +/// Extracts metadata from HTML +/// +/// # Safety +/// Input options must be a C HTML string. Output will be a JSON object. Output string must be freed with free_string. #[no_mangle] -pub extern "C" fn extract_metadata(html: *const libc::c_char) -> *mut i8 { +pub unsafe extern "C" fn extract_metadata(html: *const libc::c_char) -> *mut i8 { let html = unsafe { CStr::from_ptr(html) }.to_str().unwrap(); let document = parse_html().one(html); @@ -209,12 +216,12 @@ struct ImageSource { fn _transform_html_inner(opts: TranformHTMLOptions) -> Result { let mut document = parse_html().one(opts.html); - if opts.include_tags.len() > 0 { + if !opts.include_tags.is_empty() { let new_document = parse_html().one("
"); let root = new_document.select_first("div")?; for x in opts.include_tags.iter() { - for tag in document.select(&x)? { + for tag in document.select(x)? { root.as_node().append(tag.as_node().clone()); } } @@ -244,16 +251,16 @@ fn _transform_html_inner(opts: TranformHTMLOptions) -> Result { for x in opts.exclude_tags.iter() { // TODO: implement weird version - while let Ok(x) = document.select_first(&x) { + while let Ok(x) = document.select_first(x) { x.as_node().detach(); } } if opts.only_main_content { for x in EXCLUDE_NON_MAIN_TAGS.iter() { - let x: Vec<_> = document.select(&format!("{}", x))?.collect(); + let x: Vec<_> = document.select(x)?.collect(); for tag in x { - if !FORCE_INCLUDE_MAIN_TAGS.iter().any(|x| tag.as_node().select(&x).is_ok_and(|mut x| x.next().is_some())) { + if !FORCE_INCLUDE_MAIN_TAGS.iter().any(|x| tag.as_node().select(x).is_ok_and(|mut x| x.next().is_some())) { tag.as_node().detach(); } } @@ -261,9 +268,9 @@ fn _transform_html_inner(opts: TranformHTMLOptions) -> Result { } for img in document.select("img[srcset]")? { - let mut sizes: Vec = img.attributes.borrow().get("srcset").ok_or(())?.to_string().split(",").filter_map(|x| { + let mut sizes: Vec = img.attributes.borrow().get("srcset").ok_or(())?.split(",").filter_map(|x| { let tok: Vec<&str> = x.trim().split(" ").collect(); - let tok_1 = if tok.len() > 1 && tok[1].len() > 0 { + let tok_1 = if tok.len() > 1 && !tok[1].is_empty() { tok[1] } else { "1x" @@ -315,9 +322,13 @@ fn _transform_html_inner(opts: TranformHTMLOptions) -> Result { Ok(document.to_string()) } +/// Transforms rawHtml to html (formerly removeUnwantedElements) +/// +/// # Safety +/// Input options must be a C JSON string. Output will be an HTML string. Output string must be freed with free_string. #[no_mangle] -pub extern "C" fn transform_html(opts: *const libc::c_char) -> *mut i8 { - let opts: TranformHTMLOptions = match unsafe { CStr::from_ptr(opts) }.to_str().map_err(|_| ()).and_then(|x| serde_json::de::from_str(&x).map_err(|_| ())) { +pub unsafe extern "C" fn transform_html(opts: *const libc::c_char) -> *mut i8 { + let opts: TranformHTMLOptions = match unsafe { CStr::from_ptr(opts) }.to_str().map_err(|_| ()).and_then(|x| serde_json::de::from_str(x).map_err(|_| ())) { Ok(x) => x, Err(_) => { return CString::new("RUSTFC:ERROR").unwrap().into_raw(); @@ -332,7 +343,11 @@ pub extern "C" fn transform_html(opts: *const libc::c_char) -> *mut i8 { CString::new(out).unwrap().into_raw() } +/// Frees a string allocated in Rust-land. +/// +/// # Safety +/// ptr must be a non-freed string pointer returned by Rust code. #[no_mangle] -pub extern "C" fn free_string(ptr: *mut i8) { +pub unsafe extern "C" fn free_string(ptr: *mut i8) { drop(unsafe { CString::from_raw(ptr) }) }