/// <summary> /// Converts HTML to plain text. /// </summary> class HtmlToText { // Static data tables protected static Dictionary<string, string> _tags; protected static HashSet<string> _ignoreTags; // Instance variables protected TextBuilder _text; protected string _html; protected int _pos; // Static constructor (one time only) static HtmlToText() { _tags = new Dictionary<string, string>(); _tags.Add("address", "\n"); _tags.Add("blockquote", "\n"); _tags.Add("div", "\n"); _tags.Add("dl", "\n"); _tags.Add("fieldset", "\n"); _tags.Add("form", "\n"); _tags.Add("h1", "\n"); _tags.Add("/h1", "\n"); _tags.Add("h2", "\n"); _tags.Add("/h2", "\n"); _tags.Add("h3", "\n"); _tags.Add("/h3", "\n"); _tags.Add("h4", "\n"); _tags.Add("/h4", "\n"); _tags.Add("h5", "\n"); _tags.Add("/h5", "\n"); _tags.Add("h6", "\n"); _tags.Add("/h6", "\n"); _tags.Add("p", "\n"); _tags.Add("/p", "\n"); _tags.Add("table", "\n"); _tags.Add("/table", "\n"); _tags.Add("ul", "\n"); _tags.Add("/ul", "\n"); _tags.Add("ol", "\n"); _tags.Add("/ol", "\n"); _tags.Add("/li", "\n"); _tags.Add("br", "\n"); _tags.Add("/td", "\t"); _tags.Add("/tr", "\n"); _tags.Add("/pre", "\n"); _ignoreTags = new HashSet<string>(); _ignoreTags.Add("script"); _ignoreTags.Add("noscript"); _ignoreTags.Add("style"); _ignoreTags.Add("object"); } /// <summary> /// Converts the given HTML to plain text and returns the result. /// </summary> /// <param name="html">HTML to be converted</param> /// <returns>Resulting plain text</returns> public string Convert(string html) { // Initialize state variables _text = new TextBuilder(); _html = html; _pos = 0; // Process input while (!EndOfText) { if (Peek() == '<') { // HTML tag bool selfClosing; string tag = ParseTag(out selfClosing); // Handle special tag cases if (tag == "body") { // Discard content before <body> _text.Clear(); } else if (tag == "/body") { // Discard content after </body> _pos = _html.Length; } else if (tag == "pre") { // Enter preformatted mode _text.Preformatted = true; EatWhitespaceToNextLine(); } else if (tag == "/pre") { // Exit preformatted mode _text.Preformatted = false; } string value; if (_tags.TryGetValue(tag, out value)) _text.Write(value); if (_ignoreTags.Contains(tag)) EatInnerContent(tag); } else if (Char.IsWhiteSpace(Peek())) { // Whitespace (treat all as space) _text.Write(_text.Preformatted ? Peek() : ' '); MoveAhead(); } else { // Other text _text.Write(Peek()); MoveAhead(); } } // Return result return HttpUtility.HtmlDecode(_text.ToString()); } // Eats all characters that are part of the current tag // and returns information about that tag protected string ParseTag(out bool selfClosing) { string tag = String.Empty; selfClosing = false; if (Peek() == '<') { MoveAhead(); // Parse tag name EatWhitespace(); int start = _pos; if (Peek() == '/') MoveAhead(); while (!EndOfText && !Char.IsWhiteSpace(Peek()) && Peek() != '/' && Peek() != '>') MoveAhead(); tag = _html.Substring(start, _pos - start).ToLower(); // Parse rest of tag while (!EndOfText && Peek() != '>') { if (Peek() == '"' || Peek() == '\'') EatQuotedValue(); else { if (Peek() == '/') selfClosing = true; MoveAhead(); } } MoveAhead(); } return tag; } // Consumes inner content from the current tag protected void EatInnerContent(string tag) { string endTag = "/" + tag; while (!EndOfText) { if (Peek() == '<') { // Consume a tag bool selfClosing; if (ParseTag(out selfClosing) == endTag) return; // Use recursion to consume nested tags if (!selfClosing && !tag.StartsWith("/")) EatInnerContent(tag); } else MoveAhead(); } } // Returns true if the current position is at the end of // the string protected bool EndOfText { get { return (_pos >= _html.Length); } } // Safely returns the character at the current position protected char Peek() { return (_pos < _html.Length) ? _html[_pos] : (char)0; } // Safely advances to current position to the next character protected void MoveAhead() { _pos = Math.Min(_pos + 1, _html.Length); } // Moves the current position to the next non-whitespace // character. protected void EatWhitespace() { while (Char.IsWhiteSpace(Peek())) MoveAhead(); } // Moves the current position to the next non-whitespace // character or the start of the next line, whichever // comes first protected void EatWhitespaceToNextLine() { while (Char.IsWhiteSpace(Peek())) { char c = Peek(); MoveAhead(); if (c == '\n') break; } } // Moves the current position past a quoted value protected void EatQuotedValue() { char c = Peek(); if (c == '"' || c == '\'') { // Opening quote MoveAhead(); // Find end of value int start = _pos; _pos = _html.IndexOfAny(new char[] { c, '\r', '\n' }, _pos); if (_pos < 0) _pos = _html.Length; else MoveAhead(); // Closing quote } } /// <summary> /// A StringBuilder class that helps eliminate excess whitespace. /// </summary> protected class TextBuilder { private StringBuilder _text; private StringBuilder _currLine; private int _emptyLines; private bool _preformatted; // Construction public TextBuilder() { _text = new StringBuilder(); _currLine = new StringBuilder(); _emptyLines = 0; _preformatted = false; } /// <summary> /// Normally, extra whitespace characters are discarded. /// If this property is set to true, they are passed /// through unchanged. /// </summary> public bool Preformatted { get { return _preformatted; } set { if (value) { // Clear line buffer if changing to // preformatted mode if (_currLine.Length > 0) FlushCurrLine(); _emptyLines = 0; } _preformatted = value; } } /// <summary> /// Clears all current text. /// </summary> public void Clear() { _text.Length = 0; _currLine.Length = 0; _emptyLines = 0; } /// <summary> /// Writes the given string to the output buffer. /// </summary> /// <param name="s"></param> public void Write(string s) { foreach (char c in s) Write(c); } /// <summary> /// Writes the given character to the output buffer. /// </summary> /// <param name="c">Character to write</param> public void Write(char c) { if (_preformatted) { // Write preformatted character _text.Append(c); } else { if (c == '\r') { // Ignore carriage returns. We'll process // '\n' if it comes next } else if (c == '\n') { // Flush current line FlushCurrLine(); } else if (Char.IsWhiteSpace(c)) { // Write single space character int len = _currLine.Length; if (len == 0 || !Char.IsWhiteSpace(_currLine[len - 1])) _currLine.Append(' '); } else { // Add character to current line _currLine.Append(c); } } } // Appends the current line to output buffer protected void FlushCurrLine() { // Get current line string line = _currLine.ToString().Trim(); // Determine if line contains non-space characters string tmp = line.Replace(" ", String.Empty); if (tmp.Length == 0) { // An empty line _emptyLines++; if (_emptyLines < 2 && _text.Length > 0) _text.AppendLine(line); } else { // A non-empty line _emptyLines = 0; _text.AppendLine(line); } // Reset current line _currLine.Length = 0; } /// <summary> /// Returns the current output as a string. /// </summary> public override string ToString() { if (_currLine.Length > 0) FlushCurrLine(); return _text.ToString(); } } }

C#是一种现代、面向对象的编程语言,由微软开发并作为.NET框架的一部分。1.C#支持面向对象编程(OOP),包括封装、继承和多态。2.C#中的异步编程通过async和await关键字实现,提高应用的响应性。3.使用LINQ可以简洁地处理数据集合。4.常见错误包括空引用异常和索引超出范围异常,调试技巧包括使用调试器和异常处理。5.性能优化包括使用StringBuilder和避免不必要的装箱和拆箱。

C#.NET应用的测试策略包括单元测试、集成测试和端到端测试。1.单元测试确保代码的最小单元独立工作,使用MSTest、NUnit或xUnit框架。2.集成测试验证多个单元组合的功能,常用模拟数据和外部服务。3.端到端测试模拟用户完整操作流程,通常使用Selenium进行自动化测试。

C#高级开发者面试需要掌握异步编程、LINQ、.NET框架内部工作原理等核心知识。1.异步编程通过async和await简化操作,提升应用响应性。2.LINQ以SQL风格操作数据,需注意性能。3..NET框架的CLR管理内存,垃圾回收需谨慎使用。

C#.NET面试问题和答案包括基础知识、核心概念和高级用法。1)基础知识:C#是微软开发的面向对象语言,主要用于.NET框架。2)核心概念:委托和事件允许动态绑定方法,LINQ提供强大查询功能。3)高级用法:异步编程提高响应性,表达式树用于动态代码构建。

C#.NET是构建微服务的热门选择,因为其生态系统强大且支持丰富。1)使用ASP.NETCore创建RESTfulAPI,处理订单创建和查询。2)利用gRPC实现微服务间的高效通信,定义和实现订单服务。3)通过Docker容器化微服务,简化部署和管理。

C#和.NET的安全最佳实践包括输入验证、输出编码、异常处理、以及身份验证和授权。1)使用正则表达式或内置方法验证输入,防止恶意数据进入系统。2)输出编码防止XSS攻击,使用HttpUtility.HtmlEncode方法。3)异常处理避免信息泄露,记录错误但不返回详细信息给用户。4)使用ASP.NETIdentity和Claims-based授权保护应用免受未授权访问。

C 语言中冒号 (':') 的含义:条件语句:分隔条件表达式和语句块循环语句:分隔初始化、条件和增量表达式宏定义:分隔宏名和宏值单行注释:表示从冒号到行尾的内容为注释数组维数:指定数组的维数


热AI工具

Undresser.AI Undress
人工智能驱动的应用程序,用于创建逼真的裸体照片

AI Clothes Remover
用于从照片中去除衣服的在线人工智能工具。

Undress AI Tool
免费脱衣服图片

Clothoff.io
AI脱衣机

AI Hentai Generator
免费生成ai无尽的。

热门文章

热工具

适用于 Eclipse 的 SAP NetWeaver 服务器适配器
将Eclipse与SAP NetWeaver应用服务器集成。

MinGW - 适用于 Windows 的极简 GNU
这个项目正在迁移到osdn.net/projects/mingw的过程中,你可以继续在那里关注我们。MinGW:GNU编译器集合(GCC)的本地Windows移植版本,可自由分发的导入库和用于构建本地Windows应用程序的头文件;包括对MSVC运行时的扩展,以支持C99功能。MinGW的所有软件都可以在64位Windows平台上运行。

SublimeText3 Mac版
神级代码编辑软件(SublimeText3)

VSCode Windows 64位 下载
微软推出的免费、功能强大的一款IDE编辑器

SublimeText3 英文版
推荐:为Win版本,支持代码提示!