Fix excerpt generator getting confused by massive img srcs

This commit is contained in:
Shadowfacts 2022-01-15 19:31:04 -05:00
parent 1726a7c711
commit 6fbda7dc78
1 changed files with 13 additions and 3 deletions

View File

@ -120,9 +120,19 @@ private func elementHandler(element: OpaquePointer!, userData: UnsafeMutableRawP
private func textHandler(chunk: OpaquePointer!, userData: UnsafeMutableRawPointer!) -> lol_html_rewriter_directive_t {
let userDataPtr = userData.assumingMemoryBound(to: UserData.self)
let s = lol_html_text_chunk_content_get(chunk)
let content = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: s.data), length: s.len, encoding: .utf8, freeWhenDone: false)!
userDataPtr.pointee.paragraphText += content
if userDataPtr.pointee.isInParagraph {
let s = lol_html_text_chunk_content_get(chunk)
let content = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: s.data), length: s.len, encoding: .utf8, freeWhenDone: false)!
userDataPtr.pointee.paragraphText += content
if userDataPtr.pointee.paragraphText.underestimatedCount >= 1024 {
// lol-html seems to get confused by img tags with hundreds of kilobytes of data in their src attributes
// and returns that data as text even though it's a tag
// if the text is over 1024 characters so far, we assume that's what's happened
// and abandon this attempt and try again at the next paragraph
userDataPtr.pointee.paragraphText = ""
userDataPtr.pointee.isInParagraph = false
}
}
return LOL_HTML_CONTINUE
}