Fix excerpt generator getting confused by massive img srcs
This commit is contained in:
parent
1726a7c711
commit
6fbda7dc78
|
@ -120,9 +120,19 @@ private func elementHandler(element: OpaquePointer!, userData: UnsafeMutableRawP
|
|||
|
||||
private func textHandler(chunk: OpaquePointer!, userData: UnsafeMutableRawPointer!) -> lol_html_rewriter_directive_t {
|
||||
let userDataPtr = userData.assumingMemoryBound(to: UserData.self)
|
||||
if userDataPtr.pointee.isInParagraph {
|
||||
let s = lol_html_text_chunk_content_get(chunk)
|
||||
let content = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: s.data), length: s.len, encoding: .utf8, freeWhenDone: false)!
|
||||
userDataPtr.pointee.paragraphText += content
|
||||
if userDataPtr.pointee.paragraphText.underestimatedCount >= 1024 {
|
||||
// lol-html seems to get confused by img tags with hundreds of kilobytes of data in their src attributes
|
||||
// and returns that data as text even though it's a tag
|
||||
// if the text is over 1024 characters so far, we assume that's what's happened
|
||||
// and abandon this attempt and try again at the next paragraph
|
||||
userDataPtr.pointee.paragraphText = ""
|
||||
userDataPtr.pointee.isInParagraph = false
|
||||
}
|
||||
}
|
||||
return LOL_HTML_CONTINUE
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue