/** * @file html_renderer.cpp * @brief HTML 渲染器实现 */ #include "renderer/html_renderer.hpp" #include "core/browser_engine.hpp" #include "core/url_parser.hpp" #include #include namespace tut { class HtmlRenderer::Impl { public: RenderOptions options_; void renderNode(GumboNode* node, std::ostringstream& output, std::vector& links, int& link_count) { if (node->type == GUMBO_NODE_TEXT) { output << node->v.text.text; return; } if (node->type != GUMBO_NODE_ELEMENT) { return; } GumboElement& element = node->v.element; GumboTag tag = element.tag; // 跳过不可见元素 if (tag == GUMBO_TAG_SCRIPT || tag == GUMBO_TAG_STYLE || tag == GUMBO_TAG_HEAD || tag == GUMBO_TAG_NOSCRIPT) { return; } // 处理块级元素 bool is_block = (tag == GUMBO_TAG_P || tag == GUMBO_TAG_DIV || tag == GUMBO_TAG_H1 || tag == GUMBO_TAG_H2 || tag == GUMBO_TAG_H3 || tag == GUMBO_TAG_H4 || tag == GUMBO_TAG_H5 || tag == GUMBO_TAG_H6 || tag == GUMBO_TAG_UL || tag == GUMBO_TAG_OL || tag == GUMBO_TAG_LI || tag == GUMBO_TAG_BR || tag == GUMBO_TAG_HR || tag == GUMBO_TAG_BLOCKQUOTE || tag == GUMBO_TAG_PRE || tag == GUMBO_TAG_TABLE || tag == GUMBO_TAG_TR); // 标题格式 if (tag >= GUMBO_TAG_H1 && tag <= GUMBO_TAG_H6) { output << "\n"; if (options_.use_colors) { output << "\033[1m"; // Bold } } // 列表项 if (tag == GUMBO_TAG_LI) { output << "\n • "; } // 链接 if (tag == GUMBO_TAG_A && options_.show_links) { GumboAttribute* href = gumbo_get_attribute(&element.attributes, "href"); if (href) { link_count++; LinkInfo link; link.url = href->value; // 提取链接文本 std::ostringstream link_text; for (unsigned int i = 0; i < element.children.length; ++i) { GumboNode* child = static_cast(element.children.data[i]); if (child->type == GUMBO_NODE_TEXT) { link_text << child->v.text.text; } } link.text = link_text.str(); links.push_back(link); if (options_.use_colors) { output << "\033[4;34m"; // Underline blue } output << "[" << link_count << "]"; } } // 递归处理子节点 for (unsigned int i = 0; i < element.children.length; ++i) { renderNode(static_cast(element.children.data[i]), output, links, link_count); } // 关闭格式 if (tag >= GUMBO_TAG_H1 && tag <= GUMBO_TAG_H6) { if (options_.use_colors) { output << "\033[0m"; // Reset } output << "\n"; } if (tag == GUMBO_TAG_A && options_.show_links && options_.use_colors) { output << "\033[0m"; } if (is_block) { output << "\n"; } } std::string findTitle(GumboNode* node) { if (node->type != GUMBO_NODE_ELEMENT) { return ""; } if (node->v.element.tag == GUMBO_TAG_TITLE) { if (node->v.element.children.length > 0) { GumboNode* child = static_cast(node->v.element.children.data[0]); if (child->type == GUMBO_NODE_TEXT) { return child->v.text.text; } } } for (unsigned int i = 0; i < node->v.element.children.length; ++i) { std::string title = findTitle( static_cast(node->v.element.children.data[i])); if (!title.empty()) { return title; } } return ""; } }; HtmlRenderer::HtmlRenderer() : impl_(std::make_unique()) {} HtmlRenderer::~HtmlRenderer() = default; RenderResult HtmlRenderer::render(const std::string& html, const RenderOptions& options) { impl_->options_ = options; RenderResult result; GumboOutput* output = gumbo_parse(html.c_str()); if (!output) { return result; } // 提取标题 result.title = impl_->findTitle(output->root); // 渲染内容 std::ostringstream text_output; int link_count = 0; impl_->renderNode(output->root, text_output, result.links, link_count); result.text = text_output.str(); gumbo_destroy_output(&kGumboDefaultOptions, output); return result; } std::string HtmlRenderer::extractTitle(const std::string& html) { GumboOutput* output = gumbo_parse(html.c_str()); if (!output) { return ""; } std::string title = impl_->findTitle(output->root); gumbo_destroy_output(&kGumboDefaultOptions, output); return title; } std::vector HtmlRenderer::extractLinks(const std::string& html, const std::string& base_url) { RenderOptions options; options.show_links = true; auto result = render(html, options); // 解析相对 URL if (!base_url.empty()) { UrlParser parser; for (auto& link : result.links) { if (link.url.find("://") == std::string::npos) { link.url = parser.resolveRelative(base_url, link.url); } } } return result.links; } void HtmlRenderer::setOptions(const RenderOptions& options) { impl_->options_ = options; } const RenderOptions& HtmlRenderer::getOptions() const { return impl_->options_; } } // namespace tut