Fix excerpt generator getting confused by massive img srcs
This commit is contained in:
parent
1726a7c711
commit
6fbda7dc78
|
@ -120,9 +120,19 @@ private func elementHandler(element: OpaquePointer!, userData: UnsafeMutableRawP
|
||||||
|
|
||||||
private func textHandler(chunk: OpaquePointer!, userData: UnsafeMutableRawPointer!) -> lol_html_rewriter_directive_t {
|
private func textHandler(chunk: OpaquePointer!, userData: UnsafeMutableRawPointer!) -> lol_html_rewriter_directive_t {
|
||||||
let userDataPtr = userData.assumingMemoryBound(to: UserData.self)
|
let userDataPtr = userData.assumingMemoryBound(to: UserData.self)
|
||||||
|
if userDataPtr.pointee.isInParagraph {
|
||||||
let s = lol_html_text_chunk_content_get(chunk)
|
let s = lol_html_text_chunk_content_get(chunk)
|
||||||
let content = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: s.data), length: s.len, encoding: .utf8, freeWhenDone: false)!
|
let content = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: s.data), length: s.len, encoding: .utf8, freeWhenDone: false)!
|
||||||
userDataPtr.pointee.paragraphText += content
|
userDataPtr.pointee.paragraphText += content
|
||||||
|
if userDataPtr.pointee.paragraphText.underestimatedCount >= 1024 {
|
||||||
|
// lol-html seems to get confused by img tags with hundreds of kilobytes of data in their src attributes
|
||||||
|
// and returns that data as text even though it's a tag
|
||||||
|
// if the text is over 1024 characters so far, we assume that's what's happened
|
||||||
|
// and abandon this attempt and try again at the next paragraph
|
||||||
|
userDataPtr.pointee.paragraphText = ""
|
||||||
|
userDataPtr.pointee.isInParagraph = false
|
||||||
|
}
|
||||||
|
}
|
||||||
return LOL_HTML_CONTINUE
|
return LOL_HTML_CONTINUE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue