diff --git a/src/HtmlAgilityPack.Net35/HtmlAgilityPack.Net35.csproj b/src/HtmlAgilityPack.Net35/HtmlAgilityPack.Net35.csproj index eb8ebf1d..a69d929b 100644 --- a/src/HtmlAgilityPack.Net35/HtmlAgilityPack.Net35.csproj +++ b/src/HtmlAgilityPack.Net35/HtmlAgilityPack.Net35.csproj @@ -18,7 +18,7 @@ full false bin\Debug\ - DEBUG;TRACE + DEBUG;TRACE;FX35 prompt 4 diff --git a/src/HtmlAgilityPack.Net40-client/HtmlAgilityPack.Net40-client.csproj b/src/HtmlAgilityPack.Net40-client/HtmlAgilityPack.Net40-client.csproj index 91a90c33..502fcdde 100644 --- a/src/HtmlAgilityPack.Net40-client/HtmlAgilityPack.Net40-client.csproj +++ b/src/HtmlAgilityPack.Net40-client/HtmlAgilityPack.Net40-client.csproj @@ -44,7 +44,7 @@ full false bin\Debug\ - DEBUG;TRACE + DEBUG;TRACE;FX40 prompt 4 bin\Debug\HtmlAgilityPack.XML diff --git a/src/HtmlAgilityPack.Net40/Properties/AssemblyInfo.cs b/src/HtmlAgilityPack.Net40/Properties/AssemblyInfo.cs index d57cd451..e8bf2777 100644 --- a/src/HtmlAgilityPack.Net40/Properties/AssemblyInfo.cs +++ b/src/HtmlAgilityPack.Net40/Properties/AssemblyInfo.cs @@ -18,6 +18,7 @@ #endif #endif [assembly: InternalsVisibleTo("HtmlAgilityPack.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010027dc71d8e0b968c7324238e18a4cee4a367f1bf50c9d7a52d91ed46c6a1a584b9142c1d4234c4011d25437c909924079660c434eebe6d2c46412f30520a276e7ca8d8fa7075bb8b9e1c7502ef0e50423b32d469ba750012823fde16989ab42d8428ca5fdd0b06b801788a17239b78e0f75900012a50c5038ab93abbe2ac0d6ee")] +[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2, PublicKey=0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("ZZZ Projects Inc.")] [assembly: AssemblyProduct("Html Agility Pack")] diff --git a/src/HtmlAgilityPack.Net45/Properties/AssemblyInfo.cs b/src/HtmlAgilityPack.Net45/Properties/AssemblyInfo.cs index d57cd451..e8bf2777 100644 --- a/src/HtmlAgilityPack.Net45/Properties/AssemblyInfo.cs +++ b/src/HtmlAgilityPack.Net45/Properties/AssemblyInfo.cs @@ -18,6 +18,7 @@ #endif #endif [assembly: InternalsVisibleTo("HtmlAgilityPack.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010027dc71d8e0b968c7324238e18a4cee4a367f1bf50c9d7a52d91ed46c6a1a584b9142c1d4234c4011d25437c909924079660c434eebe6d2c46412f30520a276e7ca8d8fa7075bb8b9e1c7502ef0e50423b32d469ba750012823fde16989ab42d8428ca5fdd0b06b801788a17239b78e0f75900012a50c5038ab93abbe2ac0d6ee")] +[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2, PublicKey=0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("ZZZ Projects Inc.")] [assembly: AssemblyProduct("Html Agility Pack")] diff --git a/src/HtmlAgilityPack.NetStandard2_0/AssemblyInfo.cs b/src/HtmlAgilityPack.NetStandard2_0/AssemblyInfo.cs index 2bacd57c..7bc08237 100644 --- a/src/HtmlAgilityPack.NetStandard2_0/AssemblyInfo.cs +++ b/src/HtmlAgilityPack.NetStandard2_0/AssemblyInfo.cs @@ -1,3 +1,7 @@ using System; +using System.Runtime.CompilerServices; -[assembly: CLSCompliant(true)] \ No newline at end of file +[assembly: CLSCompliant(true)] + +[assembly: InternalsVisibleTo("HtmlAgilityPack.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010027dc71d8e0b968c7324238e18a4cee4a367f1bf50c9d7a52d91ed46c6a1a584b9142c1d4234c4011d25437c909924079660c434eebe6d2c46412f30520a276e7ca8d8fa7075bb8b9e1c7502ef0e50423b32d469ba750012823fde16989ab42d8428ca5fdd0b06b801788a17239b78e0f75900012a50c5038ab93abbe2ac0d6ee")] +[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2, PublicKey=0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7")] diff --git a/src/HtmlAgilityPack.Shared/EncodingNotSupportedException.cs b/src/HtmlAgilityPack.Shared/EncodingNotSupportedException.cs new file mode 100644 index 00000000..6da00d49 --- /dev/null +++ b/src/HtmlAgilityPack.Shared/EncodingNotSupportedException.cs @@ -0,0 +1,39 @@ +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Website & Documentation: http://html-agility-pack.net +// Forum & Issues: https://github.com/zzzprojects/html-agility-pack +// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE +// More projects: http://www.zzzprojects.com/ +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. + +using System; +using System.Text; + +namespace HtmlAgilityPack +{ + public class EncodingNotSupportedException : Exception + { + #region Fields + + private string _encoding; + + #endregion + + #region Constructors + + internal EncodingNotSupportedException(string encoding) + { + _encoding = encoding; + } + + #endregion + + #region Properties + + public string Encoding + { + get { return _encoding; } + } + + #endregion + } +} \ No newline at end of file diff --git a/src/HtmlAgilityPack.Shared/HtmlAgilityPack.Shared.projitems b/src/HtmlAgilityPack.Shared/HtmlAgilityPack.Shared.projitems index fa59426f..8b76dfdf 100644 --- a/src/HtmlAgilityPack.Shared/HtmlAgilityPack.Shared.projitems +++ b/src/HtmlAgilityPack.Shared/HtmlAgilityPack.Shared.projitems @@ -17,6 +17,7 @@ + @@ -57,5 +58,11 @@ + + + + + + \ No newline at end of file diff --git a/src/HtmlAgilityPack.Shared/HtmlWeb.cs b/src/HtmlAgilityPack.Shared/HtmlWeb.cs index 0132bee0..291e67ec 100644 --- a/src/HtmlAgilityPack.Shared/HtmlWeb.cs +++ b/src/HtmlAgilityPack.Shared/HtmlWeb.cs @@ -83,6 +83,10 @@ public partial class HtmlWeb #region Fields +#if !(NETSTANDARD1_3 || NETSTANDARD1_6) + private IHttpWebRequestFactory _requestFactory; +#endif + private bool _autoDetectEncoding = true; private bool _cacheOnly; @@ -928,6 +932,21 @@ public bool UsingCache #endregion + #region Constructors + +#if !(NETSTANDARD1_3 || NETSTANDARD1_6) + public HtmlWeb() : this(new HttpWebRequestFactory()) + { + } + + internal HtmlWeb(IHttpWebRequestFactory requestFactory) + { + _requestFactory = requestFactory; + } +#endif + + #endregion + #region Public Methods #if !(NETSTANDARD1_3 || NETSTANDARD1_6) @@ -1547,10 +1566,10 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc ICredentials creds) { string cachePath = null; - HttpWebRequest req; + IHttpWebRequest req; bool oldFile = false; - req = WebRequest.Create(uri) as HttpWebRequest; + req = _requestFactory.Create(uri); req.Method = method; req.UserAgent = UserAgent; if (CaptureRedirect) @@ -1617,7 +1636,7 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc if (PreRequest != null) { // allow our user to change the request at will - if (!PreRequest(req)) + if (!PreRequest(req.Request)) { return HttpStatusCode.ResetContent; } @@ -1632,16 +1651,16 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc // } } - HttpWebResponse resp; + IHttpWebResponse resp; try { - resp = req.GetResponse() as HttpWebResponse; + resp = req.GetResponse(); } catch (WebException we) { _requestDuration = Environment.TickCount - tc; - resp = (HttpWebResponse) we.Response; + resp = we.Response == null ? null : new HttpWebResponseWrapper((HttpWebResponse) we.Response); if (resp == null) { if (oldFile) @@ -1668,7 +1687,7 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc // allow our user to get some info from the response if (PostResponse != null) { - PostResponse(req, resp); + PostResponse(req.Request, resp.Response); } _requestDuration = Environment.TickCount - tc; @@ -1677,11 +1696,22 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc bool html = IsHtmlContent(resp.ContentType); bool isUnknown = string.IsNullOrEmpty(resp.ContentType); - Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding) - ? Encoding.GetEncoding(resp.ContentEncoding) - : null; - if (OverrideEncoding != null) - respenc = OverrideEncoding; + Encoding respenc = OverrideEncoding; + if (respenc == null && !string.IsNullOrEmpty(resp.ContentEncoding)) + { + try + { + Encoding.GetEncoding(resp.ContentEncoding); + } + catch (ArgumentException ex) + { + if (ex.ParamName == "name") + { + throw new EncodingNotSupportedException(resp.ContentEncoding); + } + throw ex; + } + } if (CaptureRedirect) { @@ -2092,7 +2122,7 @@ private HtmlDocument LoadUrl(Uri uri, string method, IWebProxy proxy, ICredentia } #endif #if !(NETSTANDARD1_3 || NETSTANDARD1_6) - private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp) + private void SaveCacheHeaders(Uri requestUri, IHttpWebResponse resp) { // we cache the original headers aside the cached document. string file = GetCacheHeadersPath(requestUri); diff --git a/src/HtmlAgilityPack.Shared/HttpWebRequestFactory.cs b/src/HtmlAgilityPack.Shared/HttpWebRequestFactory.cs new file mode 100644 index 00000000..db6b4cae --- /dev/null +++ b/src/HtmlAgilityPack.Shared/HttpWebRequestFactory.cs @@ -0,0 +1,31 @@ +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Website & Documentation: http://html-agility-pack.net +// Forum & Issues: https://github.com/zzzprojects/html-agility-pack +// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE +// More projects: http://www.zzzprojects.com/ +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. + +#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO) + +using System; +using System.Net; + +namespace HtmlAgilityPack +{ + /// + /// Implement the initialization of HttpWebRequest. + /// + internal class HttpWebRequestFactory : IHttpWebRequestFactory + { + public HttpWebRequestFactory() + { + } + + public IHttpWebRequest Create(Uri uri) + { + return new HttpWebRequestWrapper((HttpWebRequest)HttpWebRequest.Create(uri)); + } + } +} + +#endif diff --git a/src/HtmlAgilityPack.Shared/HttpWebRequestWrapper.cs b/src/HtmlAgilityPack.Shared/HttpWebRequestWrapper.cs new file mode 100644 index 00000000..cc8f5b82 --- /dev/null +++ b/src/HtmlAgilityPack.Shared/HttpWebRequestWrapper.cs @@ -0,0 +1,44 @@ +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Website & Documentation: http://html-agility-pack.net +// Forum & Issues: https://github.com/zzzprojects/html-agility-pack +// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE +// More projects: http://www.zzzprojects.com/ +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. + +#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO) + +using System; +using System.Net; + +namespace HtmlAgilityPack +{ + /// + /// Wraps HttpWebRequest. + /// + internal class HttpWebRequestWrapper : IHttpWebRequest + { + HttpWebRequest _request; + public HttpWebRequest Request { get { return _request; } } + + public HttpWebRequestWrapper(HttpWebRequest request) + { + _request = request; + } + + public string Method { get { return _request.Method; } set { _request.Method = value; } } + public string UserAgent { get { return _request.UserAgent; } set { _request.UserAgent = value; } } + public bool AllowAutoRedirect { get { return _request.AllowAutoRedirect; } set { _request.AllowAutoRedirect = value; } } + public ICredentials Credentials { get { return _request.Credentials; } set { _request.Credentials = value; } } + public IWebProxy Proxy { get { return _request.Proxy; } set { _request.Proxy = value; } } + public Uri RequestUri { get { return _request.RequestUri; } } + public DateTime IfModifiedSince { get { return _request.IfModifiedSince; } set { _request.IfModifiedSince = value; } } + public CookieContainer CookieContainer { get { return _request.CookieContainer; } set { _request.CookieContainer = value; } } + + public IHttpWebResponse GetResponse() + { + return new HttpWebResponseWrapper((HttpWebResponse)_request.GetResponse()); + } + } +} + +#endif diff --git a/src/HtmlAgilityPack.Shared/HttpWebResponseWrapper.cs b/src/HtmlAgilityPack.Shared/HttpWebResponseWrapper.cs new file mode 100644 index 00000000..0354a55e --- /dev/null +++ b/src/HtmlAgilityPack.Shared/HttpWebResponseWrapper.cs @@ -0,0 +1,55 @@ +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Website & Documentation: http://html-agility-pack.net +// Forum & Issues: https://github.com/zzzprojects/html-agility-pack +// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE +// More projects: http://www.zzzprojects.com/ +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. + +#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO) + +using System; +using System.IO; +using System.Net; + +namespace HtmlAgilityPack +{ + /// + /// Wraps HttpWebResponse. + /// + internal class HttpWebResponseWrapper : IHttpWebResponse + { + HttpWebResponse _response; + public HttpWebResponse Response { get { return _response; } } + + public HttpWebResponseWrapper(HttpWebResponse response) + { + _response = response; + } + + public Uri ResponseUri { get { return _response.ResponseUri; } } + public HttpStatusCode StatusCode { get { return _response.StatusCode; } } + public string ContentType { get { return _response.ContentType; } } + public string ContentEncoding { get { return _response.ContentEncoding; } } + public WebHeaderCollection Headers { get { return _response.Headers; } } + public DateTime LastModified { get { return _response.LastModified; } } + + public void Close() + { + _response.Close(); + } + + public void Dispose() + { +#if !(FX20 || FX35 || FX40) + _response.Dispose(); +#endif + } + + public Stream GetResponseStream() + { + return _response.GetResponseStream(); + } + } +} + +#endif diff --git a/src/HtmlAgilityPack.Shared/IHttpWebRequest.cs b/src/HtmlAgilityPack.Shared/IHttpWebRequest.cs new file mode 100644 index 00000000..3de4a47f --- /dev/null +++ b/src/HtmlAgilityPack.Shared/IHttpWebRequest.cs @@ -0,0 +1,34 @@ +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Website & Documentation: http://html-agility-pack.net +// Forum & Issues: https://github.com/zzzprojects/html-agility-pack +// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE +// More projects: http://www.zzzprojects.com/ +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. + +#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO) + +using System; +using System.Net; + +namespace HtmlAgilityPack +{ + internal interface IHttpWebRequest + /// + /// Abstracts HttpWebRequest. + /// + { + HttpWebRequest Request { get; } + string Method { get; set; } + string UserAgent { get; set; } + bool AllowAutoRedirect { get; set; } + ICredentials Credentials { get; set; } + IWebProxy Proxy { get; set; } + Uri RequestUri { get; } + DateTime IfModifiedSince { get; set; } + CookieContainer CookieContainer { get; set; } + + IHttpWebResponse GetResponse(); + } +} + +#endif diff --git a/src/HtmlAgilityPack.Shared/IHttpWebRequestFactory.cs b/src/HtmlAgilityPack.Shared/IHttpWebRequestFactory.cs new file mode 100644 index 00000000..6f0690df --- /dev/null +++ b/src/HtmlAgilityPack.Shared/IHttpWebRequestFactory.cs @@ -0,0 +1,23 @@ +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Website & Documentation: http://html-agility-pack.net +// Forum & Issues: https://github.com/zzzprojects/html-agility-pack +// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE +// More projects: http://www.zzzprojects.com/ +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. + +#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO) + +using System; + +namespace HtmlAgilityPack +{ + /// + /// Abstracts the initialization of HttpWebRequest. + /// + internal interface IHttpWebRequestFactory + { + IHttpWebRequest Create(Uri uri); + } +} + +#endif diff --git a/src/HtmlAgilityPack.Shared/IHttpWebResponse.cs b/src/HtmlAgilityPack.Shared/IHttpWebResponse.cs new file mode 100644 index 00000000..0ae0f2e2 --- /dev/null +++ b/src/HtmlAgilityPack.Shared/IHttpWebResponse.cs @@ -0,0 +1,34 @@ +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Website & Documentation: http://html-agility-pack.net +// Forum & Issues: https://github.com/zzzprojects/html-agility-pack +// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE +// More projects: http://www.zzzprojects.com/ +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. + +#if !(NETSTANDARD1_3 || NETSTANDARD1_6 || METRO) + +using System; +using System.IO; +using System.Net; + +namespace HtmlAgilityPack +{ + /// + /// Abstracts HttpWebResponse. + /// + internal interface IHttpWebResponse : IDisposable + { + HttpWebResponse Response { get; } + Uri ResponseUri { get; } + HttpStatusCode StatusCode { get; } + string ContentType { get; } + string ContentEncoding { get; } + WebHeaderCollection Headers { get; } + DateTime LastModified { get; } + + Stream GetResponseStream(); + void Close(); + } +} + +#endif diff --git a/src/HtmlAgilityPack.sln b/src/HtmlAgilityPack.sln index d862f345..119a9b8f 100644 --- a/src/HtmlAgilityPack.sln +++ b/src/HtmlAgilityPack.sln @@ -17,6 +17,12 @@ Project("{D954291E-2A0B-460D-934E-DC6B0785DB48}") = "HtmlAgilityPack.Shared", "H EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HtmlAgilityPack.Tests.Net45", "Tests\HtmlAgilityPack.Tests.Net45\HtmlAgilityPack.Tests.Net45.csproj", "{F7D2CBBC-E23E-478E-865B-6BE445B55EC8}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tools", "Tools", "{6459B875-8AB1-4E99-B088-472CA496F9B5}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HtmlAgilityPack.Tests", "Tests\HtmlAgilityPack.Tests\HtmlAgilityPack.Tests.csproj", "{23944379-89E1-4D96-9B26-E29EC516D0CF}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestCodeGen", "Tools\TestCodeGen\TestCodeGen.csproj", "{C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}" +EndProject Global GlobalSection(SharedMSBuildProjectFiles) = preSolution HtmlAgilityPack.Shared\HtmlAgilityPack.Shared.projitems*{1028002a-bbe2-4ff2-94b7-a8368a8f6887}*SharedItemsImports = 4 @@ -116,6 +122,46 @@ Global {F7D2CBBC-E23E-478E-865B-6BE445B55EC8}.Release|x64.Build.0 = Release|Any CPU {F7D2CBBC-E23E-478E-865B-6BE445B55EC8}.Release|x86.ActiveCfg = Release|Any CPU {F7D2CBBC-E23E-478E-865B-6BE445B55EC8}.Release|x86.Build.0 = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|ARM.ActiveCfg = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|ARM.Build.0 = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|x64.ActiveCfg = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|x64.Build.0 = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|x86.ActiveCfg = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Debug|x86.Build.0 = Debug|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|Any CPU.Build.0 = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|ARM.ActiveCfg = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|ARM.Build.0 = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|x64.ActiveCfg = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|x64.Build.0 = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|x86.ActiveCfg = Release|Any CPU + {23944379-89E1-4D96-9B26-E29EC516D0CF}.Release|x86.Build.0 = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|ARM.ActiveCfg = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|ARM.Build.0 = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|x64.ActiveCfg = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|x64.Build.0 = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|x86.ActiveCfg = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Debug|x86.Build.0 = Debug|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|Any CPU.Build.0 = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|ARM.ActiveCfg = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|ARM.Build.0 = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|x64.ActiveCfg = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|x64.Build.0 = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|x86.ActiveCfg = Release|Any CPU + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -126,6 +172,8 @@ Global {083D99CD-F083-4122-AA96-4E3A84F3EC01} = {4E506C9F-6514-4055-AAA8-DC89D85558FE} {5BA9A0D7-173F-4C4F-AFFA-3DD6DC2F9A79} = {4E506C9F-6514-4055-AAA8-DC89D85558FE} {F7D2CBBC-E23E-478E-865B-6BE445B55EC8} = {FBD7AB4B-A0C8-4EE0-B1A9-6F26223097E9} + {23944379-89E1-4D96-9B26-E29EC516D0CF} = {FBD7AB4B-A0C8-4EE0-B1A9-6F26223097E9} + {C9C8CEA1-6C48-4A48-913D-8DB1A847FEDC} = {6459B875-8AB1-4E99-B088-472CA496F9B5} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {E5870022-9E57-469B-A27E-E840C436B2EF} diff --git a/src/Tests/HtmlAgilityPack.Tests/AssemblyInfo.cs b/src/Tests/HtmlAgilityPack.Tests/AssemblyInfo.cs new file mode 100644 index 00000000..bc466ecb --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/AssemblyInfo.cs @@ -0,0 +1,7 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +[assembly: ComVisible(false)] + +[assembly: Guid("d2276641-9ec1-4c80-a59b-4ba8e2a578aa")] \ No newline at end of file diff --git a/src/Tests/HtmlAgilityPack.Tests/HtmlAgilityPack.Tests.csproj b/src/Tests/HtmlAgilityPack.Tests/HtmlAgilityPack.Tests.csproj new file mode 100644 index 00000000..c39d5c4e --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/HtmlAgilityPack.Tests.csproj @@ -0,0 +1,69 @@ + + + + net40;net45;netcoreapp2.1 + HtmlAgilityPack.Tests + HtmlAgilityPack.Tests + 1.0.0 + ZZZ Projects Inc. + ZZZ Projects Inc. + Html Agility Pack Tests + Copyright © ZZZ Projects Inc. 2014 - 2017 + true + HtmlAgilityPack.snk + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PreserveNewest + + + Always + + + PreserveNewest + + + PreserveNewest + + + diff --git a/src/Tests/HtmlAgilityPack.Tests/HtmlAgilityPack.snk b/src/Tests/HtmlAgilityPack.Tests/HtmlAgilityPack.snk new file mode 100644 index 00000000..c2d78f7b Binary files /dev/null and b/src/Tests/HtmlAgilityPack.Tests/HtmlAgilityPack.snk differ diff --git a/src/Tests/HtmlAgilityPack.Tests/HtmlDocumentTests.cs b/src/Tests/HtmlAgilityPack.Tests/HtmlDocumentTests.cs new file mode 100644 index 00000000..3b751232 --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/HtmlDocumentTests.cs @@ -0,0 +1,1040 @@ +using System.IO; +using System.Linq; +using System.Net; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.Threading; + +namespace HtmlAgilityPack.Tests +{ + [TestFixture] + public class HtmlDocumentTests + { + private string _contentDirectory; + + + + [OneTimeSetUp] + public void Setup() + { + + + _contentDirectory = Path.Combine(Path.GetDirectoryName(typeof(HtmlDocumentTests).Assembly.Location).ToString(), "files"); + // _contentDirectory = Path.Combine(@"C:\Users\Jonathan\Desktop\Z\zzzproject\HtmlAgilityPack\HtmlAgilityPack.Tests\bin\Debug\files\"); + } + + private HtmlDocument GetMshomeDocument() + { + var doc = new HtmlDocument(); + doc.Load(Path.Combine(_contentDirectory, "mshome.htm")); + return doc; + } + + [Test] + public void HtmlAgilityPack_AttributeCollectionBug() + { + { + const string firstAttrName = "first"; + const string secondAttrName = "second"; + const string value = "value"; + + HtmlNode firstNode = HtmlNode.CreateNode("
"); + firstNode.Attributes.Add(firstAttrName, value); + + HtmlNode secondNode = HtmlNode.CreateNode("
"); + secondNode.Attributes.Add(secondAttrName, value); + + secondNode.Attributes[0] = firstNode.Attributes[0]; + + Assert.IsNotNull(secondNode.Attributes[0]); + Assert.AreEqual(firstAttrName, secondNode.Attributes[0].Name); + + Assert.IsNotNull(secondNode.Attributes[firstAttrName], $"'{firstAttrName}' should exist in the collection"); + Assert.AreEqual(firstAttrName, secondNode.Attributes[firstAttrName].Name); + + Assert.IsNull(secondNode.Attributes [secondAttrName], $"{secondAttrName} should not exist in the collection"); + } + + { + const string firstAttrName = "first"; + const string secondAttrName = "second"; + const string value = "value"; + + HtmlNode firstNode = HtmlNode.CreateNode("
"); + firstNode.Attributes.Add(firstAttrName, value); + + HtmlNode secondNode = HtmlNode.CreateNode("
"); + secondNode.Attributes.Add(secondAttrName, value); + var a = secondNode.Attributes[secondAttrName]; + secondNode.Attributes[secondAttrName] = firstNode.Attributes[firstAttrName]; + + Assert.IsNotNull(secondNode.Attributes[firstAttrName]); + Assert.AreEqual(firstAttrName, secondNode.Attributes[firstAttrName].Name); + + Assert.IsNotNull(secondNode.Attributes[0], $"'{firstAttrName}' should exist in the collection"); + Assert.AreEqual(firstAttrName, secondNode.Attributes[firstAttrName].Name); + + Assert.IsNull(secondNode.Attributes[secondAttrName], $"{secondAttrName} should not exist in the collection"); + } + } + + [Test] + public void TextInsideScriptTagShouldHaveCorrectStreamPosition() + { + { + var document = new HtmlDocument(); + document.LoadHtml(@"foo"); + var scraptText = document.DocumentNode.FirstChild.FirstChild; + Assert.AreEqual(8, scraptText.StreamPosition); + Assert.AreEqual(1, scraptText.Line); + Assert.AreEqual(9, scraptText.LinePosition); + } + { + var document = new HtmlDocument(); + document.LoadHtml(@""); + var scriptText = document.DocumentNode.FirstChild.FirstChild; + Assert.AreEqual(8, scriptText.StreamPosition); + Assert.AreEqual(1, scriptText.Line); + Assert.AreEqual(9, scriptText.LinePosition); + } + { + var document = new HtmlAgilityPack.HtmlDocument(); + document.LoadHtml(@" +foo"); + var scraptText = document.DocumentNode.LastChild.FirstChild; + // var aa = scraptText.FirstChild; + Assert.AreEqual(10, scraptText.StreamPosition); + Assert.AreEqual(2, scraptText.Line); + Assert.AreEqual(9, scraptText.LinePosition); + } + + + { + var document = new HtmlAgilityPack.HtmlDocument(); + document.LoadHtml(@" +"); + var scriptText = document.DocumentNode.LastChild.FirstChild; + Assert.AreEqual(10, scriptText.StreamPosition); + Assert.AreEqual(2, scriptText.Line); + Assert.AreEqual(9, scriptText.LinePosition); + } + } + + [Test] + public void CreateAttribute() + { + var doc = new HtmlDocument(); + var a = doc.CreateAttribute("href"); + Assert.AreEqual("href", a.Name); + } + + [Test] + public void CreateAttributeWithEncodedText() + { + var doc = new HtmlDocument(); + var a = doc.CreateAttribute("href", "http://something.com\"&<>"); + Assert.AreEqual("href", a.Name); + Assert.AreEqual("http://something.com\"&<>", a.Value); + } + + [Test] + public void CreateAttributeWithText() + { + var doc = new HtmlDocument(); + var a = doc.CreateAttribute("href", "http://something.com"); + Assert.AreEqual("href", a.Name); + Assert.AreEqual("http://something.com", a.Value); + } + + [Test] + public void testTEXTAREA() + { + { + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(@""); + HtmlNodeCollection divs = doc.DocumentNode.SelectNodes("//div"); + + Assert.IsNull(divs); + + HtmlNode ta = doc.DocumentNode.SelectSingleNode("//textarea"); + Assert.IsTrue(ta.InnerHtml.Contains("div")); + } + } + + //[Test] + //public void CreateComment() + //{ + // var doc = new HtmlDocument(); + // var a = doc.CreateComment(); + // Assert.AreEqual(HtmlNode.HtmlNodeTypeNameComment, a.Name); + // Assert.AreEqual(a.NodeType, HtmlNodeType.Comment); + //} + + //[Test] + //public void CreateCommentWithText() + //{ + // var doc = new HtmlDocument(); + // var a = doc.CreateComment("something"); + // Assert.AreEqual(HtmlNode.HtmlNodeTypeNameComment, a.Name); + // Assert.AreEqual("something", a.InnerText); + // Assert.AreEqual(a.NodeType, HtmlNodeType.Comment); + //} + + [Test] + public void CreateElement() + { + var doc = new HtmlDocument(); + var a = doc.CreateElement("a"); + Assert.AreEqual("a", a.Name); + Assert.AreEqual(a.NodeType, HtmlNodeType.Element); + } + + //[Test] + //public void CreateTextNode() + //{ + // var doc = new HtmlDocument(); + // var a = doc.CreateTextNode(); + // Assert.AreEqual(HtmlNode.HtmlNodeTypeNameText, a.Name); + // Assert.AreEqual(a.NodeType, HtmlNodeType.Text); + //} + + [Test] + public void CreateTextNodeWithText() + { + var doc = new HtmlDocument(); + var a = doc.CreateTextNode("something"); + Assert.AreEqual("something", a.InnerText); + Assert.AreEqual(a.NodeType, HtmlNodeType.Text); + } + + [Test] + public void HtmlEncode() + { + var result = HtmlDocument.HtmlEncode("http://something.com\"&<>"); + Assert.AreEqual("http://something.com"&<>", result); + } + + [Test] + public void TestParse() + { + var doc = GetMshomeDocument(); + Assert.IsTrue(doc.DocumentNode.Descendants().Count() > 0); + } + + //[Test] + //public void TestLimitDepthParse() + //{ + // HtmlAgilityPack.HtmlDocument.MaxDepthLevel = 10; + // var doc = GetMshomeDocument(); + // try + // { + // Assert.IsTrue(doc.DocumentNode.Descendants().Count() > 0); + // } + // catch (ArgumentException e) + // { + // Assert.IsTrue(e.Message == HtmlAgilityPack.HtmlNode.DepthLevelExceptionMessage); + // } + // HtmlAgilityPack.HtmlDocument.MaxDepthLevel = int.MaxValue; + //} + + [Test] + public void TestParseSaveParse() + { + var doc = GetMshomeDocument(); + var doc1desc = + doc.DocumentNode.Descendants().Where(x => !string.IsNullOrWhiteSpace(x.InnerText)).ToList(); + doc.Save(Path.Combine(_contentDirectory, "testsaveparse.html")); + + var doc2 = new HtmlDocument(); + doc2.Load(Path.Combine(_contentDirectory, "testsaveparse.html")); + var doc2desc = + doc2.DocumentNode.Descendants().Where(x => !string.IsNullOrWhiteSpace(x.InnerText)).ToList(); + Assert.AreEqual(doc1desc.Count, doc2desc.Count); + //for(var i=0; i< doc1desc.Count;i++) + //{ + // try + // { + // Assert.AreEqual(doc1desc[i].Name, doc2desc[i].Name); + // }catch(Exception e) + // { + // throw; + // } + //} + } + + [Test] + public void TestRemoveUpdatesPreviousSibling() + { + var doc = GetMshomeDocument(); + var docDesc = doc.DocumentNode.Descendants().ToList(); + var toRemove = docDesc[1200]; + var toRemovePrevSibling = toRemove.PreviousSibling; + var toRemoveNextSibling = toRemove.NextSibling; + toRemove.Remove(); + Assert.AreSame(toRemovePrevSibling, toRemoveNextSibling.PreviousSibling); + } + + [Test] + public void TestReplaceUpdatesSiblings() + { + var doc = GetMshomeDocument(); + var docDesc = doc.DocumentNode.Descendants().ToList(); + var toReplace = docDesc[1200]; + var toReplacePrevSibling = toReplace.PreviousSibling; + var toReplaceNextSibling = toReplace.NextSibling; + var newNode = doc.CreateElement("tr"); + toReplace.ParentNode.ReplaceChild(newNode, toReplace); + Assert.AreSame(toReplacePrevSibling, newNode.PreviousSibling); + Assert.AreSame(toReplaceNextSibling, newNode.NextSibling); + } + + [Test] + public void TestInsertUpdateSiblings() + { + var doc = GetMshomeDocument(); + var newNode = doc.CreateElement("td"); + var toReplace = doc.DocumentNode.ChildNodes[2]; + var toReplacePrevSibling = toReplace.PreviousSibling; + var toReplaceNextSibling = toReplace.NextSibling; + doc.DocumentNode.ChildNodes.Insert(2, newNode); + Assert.AreSame(newNode.NextSibling, toReplace); + Assert.AreSame(newNode.PreviousSibling, toReplacePrevSibling); + Assert.AreSame(toReplaceNextSibling, toReplace.NextSibling); + } + + [Test] + public void TestCopyFromNode() + { + var html = @"
"; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var divNode = htmlDoc.DocumentNode.SelectSingleNode("//div"); + + var newNode = HtmlAgilityPack.HtmlNode.CreateNode(""); + newNode.CopyFrom(divNode); + + var attribute1 = divNode.Attributes[0]; + + var attribute2 = newNode.Attributes[0]; + + Assert.AreEqual(divNode.Attributes.Count, newNode.Attributes.Count); + Assert.AreEqual(attribute1.Value, attribute2.Value); + Assert.AreEqual(attribute1.QuoteType, attribute2.QuoteType); + } + + [Test] + public void TestCommentNode() + { + var html = + @" + + + + + +

This is bold headddding

+

This is underlinyed paragraph

+

This is italic heading

+ + "; + + var htmlDoc = new HtmlAgilityPack.HtmlDocument(); + htmlDoc.LoadHtml(html); + + var h1 = htmlDoc.DocumentNode.SelectNodes("//h1"); + var comments = htmlDoc.DocumentNode.SelectNodes("//comment()"); + + Assert.AreEqual(h1.Count, 1); + Assert.AreEqual(comments.Count, 4); + } + + [Test] + public void TestCloneNode() + { + var html = @"
"; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var divNode = htmlDoc.DocumentNode.SelectSingleNode("//div"); + + var newNode = divNode.Clone(); + + var attribute1 = divNode.Attributes[0]; + + var attribute2 = newNode.Attributes[0]; + + Assert.AreEqual(divNode.Attributes.Count, newNode.Attributes.Count); + Assert.AreEqual(attribute1.Value, attribute2.Value); + Assert.AreEqual(attribute1.QuoteType, attribute2.QuoteType); + } + + [Test] + public void TestEmptyTag_Single() + { + var html = ""; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(html); + + Assert.AreEqual(@"", doc.DocumentNode.OuterHtml); + } + + [Test] + public void TestEmptyTag_Many() + { + { + var html = ""; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(html); + + Assert.AreEqual(@"", doc.DocumentNode.OuterHtml); + } + + { + var html = ""; + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(html); + + Assert.AreEqual(@"", doc.DocumentNode.OuterHtml); + } + } + + [Test] + public void TestAddClass() + { + var html = @"

This is new heading

"; + + string output = "

This is new heading

"; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var h1Node = htmlDoc.DocumentNode.SelectSingleNode("//h1"); + + h1Node.AddClass("input"); + + Assert.AreEqual(h1Node.OuterHtml, output); + } + + [Test] + public void TestRemoveClass() + { + var output = @"

This is new heading

"; + + string html = "

This is new heading

"; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var h1Node = htmlDoc.DocumentNode.SelectSingleNode("//h1"); + + h1Node.RemoveClass("input"); + + Assert.AreEqual(h1Node.OuterHtml, output); + } + + [Test] + public void TestReplaceClass() + { + var output = "

This is new heading

"; + + string html = "

This is new heading

"; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var h1Node = htmlDoc.DocumentNode.SelectSingleNode("//h1"); + + h1Node.ReplaceClass("important", "input"); + + Assert.AreEqual(h1Node.OuterHtml, output); + } + + [Test] + + public void TestDetectEncoding() + { + string html = Path.Combine(_contentDirectory, "test.html"); + + var htmlDoc = new HtmlDocument(); + + var encoding = htmlDoc.DetectEncoding(html); + + Assert.AreEqual(System.Text.Encoding.UTF8, encoding); + } + + [Test] + public void TestLoadWithCache() + { + var dir = Path.Combine(_contentDirectory, "cache"); + Directory.CreateDirectory(dir); + + var web = new HtmlAgilityPack.HtmlWeb() + { + CachePath = dir, + UsingCache = true + }; + + var url = "http://html-agility-pack.net/"; + var docCache = web.Load(url); + + var docLoad = new HtmlAgilityPack.HtmlWeb().Load(url); + Assert.AreEqual(docLoad.DocumentNode.OuterHtml, docCache.DocumentNode.OuterHtml); + } + + [Test] + public void OuterHtmlHasBeenCalled_RemoveCalled_SubsequentOuterHtmlCallsAreBroken() + { + var doc = new HtmlDocument(); + doc.LoadHtml("
SOme text here
some boldedtext
"); + var resultList = doc.DocumentNode.SelectNodes("//div"); + Assert.AreEqual(2, resultList.Count); + resultList.First().Remove(); + Assert.AreEqual("
some boldedtext
", doc.DocumentNode.OuterHtml); + var resultList2 = doc.DocumentNode.SelectNodes("//div"); + Assert.AreEqual(1, resultList2.Count); + resultList2.First().Remove(); + //
some boldedtext
should have been removed + Assert.AreEqual("", doc.DocumentNode.OuterHtml); + } + + [Test] + public void TestAttributeDeEntitizeValue() + { + var html = + "

This is underlined paragraph

"; + + string output = "\"Hello\""; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var body = htmlDoc.DocumentNode.SelectSingleNode("//body"); + + var val = body.Attributes["data-foo"].DeEntitizeValue; + + Assert.AreEqual(output, val); + } + + [Test] + public void TestAttributeDeEntitizeValue2() + { + var html = + "

This is underlined paragraph

"; + + string output = "\"\"Hello\"\""; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var body = htmlDoc.DocumentNode.SelectSingleNode("//body"); + + var val = body.Attributes["data-foo"].DeEntitizeValue; + + Assert.AreEqual(output, val); + } + + [Test] + public void TestAttributeDeEntitizeValue3() + { + var doc = new HtmlDocument(); + var a = doc.CreateAttribute("href", "\"bad_href\""); + Assert.AreEqual("href", a.Name); + Assert.AreEqual("\"bad_href\"", a.DeEntitizeValue); + } + + [Test] + public void TestAttributeDeEntitizeValue4() + { + var html = "

This is underlined paragraph

"; + + string output = "\"Hello\""; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var body = htmlDoc.DocumentNode.SelectSingleNode("//body"); + + var val = body.Attributes["data-foo"].DeEntitizeValue; + + Assert.AreEqual(output, val); + } + + [Test] + public void TestAttributeValue() + { + var html = @"

This is underlined paragraph

"; + + string output = "http://example.com/path?productId=9788762505032&title=something"; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + var body = htmlDoc.DocumentNode.SelectSingleNode("//body"); + + var val = body.Attributes["data-foo"].Value; + + Assert.AreEqual(output, val); + } + + [Test] + + public void TestSingleNodesEmptyCollection() + { + var html = + @" + + +

This is bold heading

+

This is underlined paragraph

+

This is italic heading

+

This is new heading

+ + "; + + var output = 0; + + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + htmlDoc.OptionEmptyCollection = true; + + var divNodes = htmlDoc.DocumentNode.SelectNodes("//div"); + + Assert.AreEqual(output, divNodes.Count); + } + + [Test] + public void TestCreateNode() + { + string output = "

text

"; + HtmlNode node1 = HtmlNode.CreateNode(@" +

text

+ "); + + HtmlNode node2 = HtmlNode.CreateNode(@" + +

text

+ + "); + + HtmlNode node3 = HtmlNode.CreateNode(@"

text

"); + + Assert.AreEqual(output, node1.OuterHtml); + Assert.AreEqual(output, node2.OuterHtml); + Assert.AreEqual(output, node3.OuterHtml); + + try + { + HtmlNode node4 = HtmlNode.CreateNode("

a

b

"); + } + catch (Exception e) + { + Assert.AreEqual("Multiple node elments can't be created.", e.Message); + } + + HtmlNode node5 = HtmlNode.CreateNode(@"/r/n"); + Assert.AreEqual("/r/n", node5.OuterHtml); + } + + [Test] + public void TestParseListInList() + { + var html = "
    1. x
"; + + var doc = new HtmlDocument(); + doc.OptionFixNestedTags = true; + doc.Load(new StringReader(html)); + + Assert.AreEqual(doc.DocumentNode.OuterHtml, html); + } + + [Test] + public void TestLoadWithUri() + { + //string adress = "http://www.filmweb.pl/film/Piraci+z+Karaib%C3%B3w%3A+Zemsta+Salazara-2017-606542"; + //Uri uri = new Uri(adress, true); + //var web = new HtmlWeb(); + //HtmlAgilityPack.HtmlDocument document = web.Load(uri); + //Assert.AreNotEqual(string.Empty, document.DocumentNode.OuterHtml); + } + + [Test] + public void TestFormTag() + { + var html = @"
"; + var document = new HtmlDocument(); + document.LoadHtml(html); + var result = document.DocumentNode.Descendants().Select(dn => new {dn.NodeType, dn.Name, dn.OuterHtml}).ToArray(); + Assert.AreEqual(html, document.DocumentNode.OuterHtml); + Assert.AreEqual(1, result.Count()); + } + + [Test] + public void TestNumericTag() + { + var html = @"
<1
"; + var document = new HtmlDocument(); + document.LoadHtml(html); + var result = document.DocumentNode.Descendants().Select(dn => new {dn.NodeType, dn.Name, dn.OuterHtml}).ToArray(); + Assert.AreEqual(html, document.DocumentNode.OuterHtml); + } + + [Test] + public void ImplicitTag() + { + { + var html = @"
a
b
c
a
b
c"; + var document = new HtmlDocument(); + document.LoadHtml(html); + var result = document.DocumentNode.Descendants().Select(dn => new {dn.NodeType, dn.Name, dn.OuterHtml}).ToArray(); + + // TODO: Fix issue with last "dd" + Assert.AreEqual(html + "
", document.DocumentNode.OuterHtml); + } + + + { + var html = @"
a
b
c
a
b
c
"; + var document = new HtmlDocument(); + document.LoadHtml(html); + var result = document.DocumentNode.Descendants().Select(dn => new {dn.NodeType, dn.Name, dn.OuterHtml}).ToArray(); + + Assert.AreEqual(html, document.DocumentNode.OuterHtml); + } + } + + [Test] + public void DeEntitize() + { + var html = @"mouse's house"; + + Assert.AreEqual("mouse's house", HtmlEntity.DeEntitize("mouse's house")); + } + + [Test] + public void InnerText_Comment() + { + var document = new HtmlDocument(); + document.LoadHtml("

Expected value

"); + + Assert.AreEqual("Expected value", document.DocumentNode.FirstChild.InnerText); + } + + [Test] + public void TestFixNestedAnchors() + { + var inHtml = " Here's a great link! Here's another one!Here's some unrelated text."; + var expectedHtml = " Here's a great link! Here's another one!Here's some unrelated text."; + + var doc = new HtmlDocument(); + doc.LoadHtml(inHtml); + + Assert.AreEqual(expectedHtml, doc.DocumentNode.OuterHtml); + } + + [Test] + public void TestHandleNestedAnchors() + { + var inHtml = ""; + var expectedHtml = ""; + var doc = new HtmlDocument(); + doc.LoadHtml(inHtml); + Assert.AreEqual(expectedHtml, doc.DocumentNode.OuterHtml); + } + + [Test] + public void TestInnerText() + { + var inHtml = @" + + + InnerText bug Demo + + + +
+ This demonstration should show that the HAP currently parses div tags incorrectly, parsing carriage returns, new lines and tabular indents as text. +
+ +"; + var expectedHtml = "InnerText bug DemoThis demonstration should show that the HAP currently parses div tags incorrectly, parsing carriage returns, new lines and tabular indents as text."; + + var doc = new HtmlDocument() {BackwardCompatibility = false}; + doc.LoadHtml(inHtml); + + Assert.AreEqual(expectedHtml, doc.DocumentNode.InnerText); + } + + [Test] + public void TestOptionTag() + { + var html = ""; + + string output = ""; + var document = new HtmlDocument(); + document.LoadHtml(html); + Assert.AreEqual(output, document.DocumentNode.OuterHtml); + } + + [Test] + public void VerifyChildDivParent() + { + var doc = new HtmlDocument(); + doc.LoadHtml(""); + + var div = HtmlNode.CreateNode("
"); + var div2 = HtmlNode.CreateNode("
"); + + doc.DocumentNode.ChildNodes.Add(div); + div.ChildNodes.Add(div2); + + Assert.AreEqual(div.Name, div2.ParentNode.Name); + + } + + + [Test] + public void ChildIsRemovedFromParent() + { + var doc = new HtmlDocument(); + doc.LoadHtml(""); + + var div = HtmlNode.CreateNode("
"); + var div2 = HtmlNode.CreateNode("
"); + + div.ChildNodes.Add(div2); + doc.DocumentNode.ChildNodes.Add(div); + + + div.FirstChild.Remove(); + + Assert.AreEqual(0, div.ChildNodes.Count); + + } + + + [Test] + public void GetEncapsulatedData() + { + HtmlWeb stackoverflowSite = new HtmlWeb(); + HtmlDocument htmlDocument = stackoverflowSite.Load("https://stackoverflow.com/"); + StackOverflowPage stackOverflowPage = htmlDocument.DocumentNode.GetEncapsulatedData(); + IEnumerable filtered = stackOverflowPage.Questions.OrderByDescending(new Func(x => x.Statistics.Votes)); + + Assert.IsTrue(filtered.Count() > 5); + Assert.IsTrue(filtered.ElementAt(0).Statistics.Votes > 0); + + } + + [Test] + public void CompareLowerCulture() + { + + string html = File.ReadAllText(Path.Combine(_contentDirectory, "regression.html")); + HtmlNode node1 = null; + // Test 1 + CultureInfo cul1 = CultureInfo.CreateSpecificCulture("en-US"); + Thread.CurrentThread.CurrentCulture = cul1; + HtmlAgilityPack.HtmlDocument doc1 = new HtmlAgilityPack.HtmlDocument(); + doc1.LoadHtml(html); + + node1 = doc1.DocumentNode.SelectSingleNode("//div[@id='mainContents']/h2"); + + CultureInfo cul2 = CultureInfo.CreateSpecificCulture("tr-TR"); + Thread.CurrentThread.CurrentCulture = cul2; + HtmlAgilityPack.HtmlDocument doc2 = new HtmlAgilityPack.HtmlDocument(); + doc2.LoadHtml(html); + var s = doc2.DocumentNode.OuterHtml; + + HtmlNode node2 = doc2.DocumentNode.SelectSingleNode("//div[@id='mainContents']/h2"); + if (node1?.InnerHtml == node2?.InnerHtml) + + + Assert.AreEqual(node1?.InnerHtml, node2?.InnerHtml); + Assert.AreEqual(0, doc2.DocumentNode.OwnerDocument.ParseErrors.Count()); + } + + + [Test] + public void OverFlowNotEndTag() + { + + string html = File.ReadAllText(Path.Combine(_contentDirectory, "overflow.html")); + HtmlNode node1 = null; + // Test 1 + + HtmlAgilityPack.HtmlDocument doc1 = new HtmlAgilityPack.HtmlDocument(); + doc1.LoadHtml(html); + + Assert.AreEqual(15, doc1.DocumentNode.ChildNodes[4].ChildNodes.Count); + + Assert.AreEqual(0, doc1.DocumentNode.OwnerDocument.ParseErrors.Count()); + } + + [Test] + public void SanitizeXmlElementNameWithColon() + { + var input = @" + +"; + var htmlDoc = new HtmlAgilityPack.HtmlDocument(); + htmlDoc.LoadHtml(input); + htmlDoc.OptionDefaultStreamEncoding = System.Text.Encoding.UTF8; + htmlDoc.OptionOutputAsXml = true; + htmlDoc.OptionOutputOriginalCase = true; + var xmlDoc = htmlDoc.DocumentNode.WriteTo(); + + var expected = @"" + + @" + <_value3a_element> +"; + + Assert.AreEqual(expected, xmlDoc); + } + + [Test] + public void DoesNotSanitizeXmlElementNameWithColonWhenConfiguredToPreserveXmlNamespaces() + { + var input = @" + +"; + var htmlDoc = new HtmlAgilityPack.HtmlDocument(); + htmlDoc.LoadHtml(input); + htmlDoc.OptionDefaultStreamEncoding = System.Text.Encoding.UTF8; + htmlDoc.OptionOutputAsXml = true; + htmlDoc.OptionOutputOriginalCase = true; + htmlDoc.OptionPreserveXmlNamespaces = true; + var xmlDoc = htmlDoc.DocumentNode.WriteTo(); + + var expected = @"" + + @" + +"; + + Assert.AreEqual(expected, xmlDoc); + } + + [Test] + public void HasClass_WhereClassWithWhitespacePassed_ShouldReturnTrue() + { + var input = @""; + var htmlDoc = new HtmlDocument(); + + htmlDoc.LoadHtml(input); + + var aTag = htmlDoc.DocumentNode.SelectSingleNode("//a"); + Assert.True(aTag.HasClass("disabled")); + } + + [Test] + public void GetClasses_WhereClassWithWhitespacePassed_ShouldNotBeEmpty() + { + var input = @""; + var htmlDoc = new HtmlDocument(); + + htmlDoc.LoadHtml(input); + + var aTag = htmlDoc.DocumentNode.SelectSingleNode("//a"); + Assert.IsNotEmpty(aTag.GetClasses()); + } + + [HasXPath] + public class StackOverflowPage + { + [XPath("//*[@id='question-mini-list']/div/div")] + public IEnumerable Questions { get; set; } + + [XPath("//*[@id='hot-network-questions']/ul//li")] + public IEnumerable GetHotNetworkQuestions { get; set; } + + } + + [HasXPath] + [DebuggerDisplay("StackOverflowQuestion : {Question.QuestionTitle}")] + public class StackOverflowQuestion + { + [XPath("/div[@class='cp']")] + public StatisticsBox Statistics { get; set; } + + + [XPath("/div[@class='summary']")] + public QuestionBox Question { get; set; } + + + [XPath("/div[@class='summary']/div[@class='started']")] + public UserBox User { get; set; } + + } + + [HasXPath] + [DebuggerDisplay("Votes={Votes} , Answers={Answers} , Views={Views}")] + public class StatisticsBox + { + [XPath("/div[1]/div/span")] + public int Votes { get; set; } + + [XPath("/div[2]/div/span")] + public int Answers { get; set; } + + [XPath("/div[3]/div/span")] + public string Views { get; set; } + + + } + + [HasXPath] + [DebuggerDisplay("QuestionTitle={QuestionTitle}")] + public class QuestionBox + { + [XPath("/h3/a")] + public string QuestionTitle { get; set; } + + [XPath("/h3/a", "href")] + public string QuestionLink { get; set; } + + [XPath("/div[starts-with(@class,'tags')]//a")] + public IEnumerable Tags { get; set; } + } + + [HasXPath] + [DebuggerDisplay("UserID={UserID} , ReputationScore={ReputationScore}")] + public class UserBox + { + [XPath("/a[1]/span", "title")] + public DateTime ExactTime { get; set; } + + [XPath("/a[1]/span")] + public string RelativeTime { get; set; } + + [XPath("/a[2]")] + public string UserID { get; set; } + + [XPath("a[2]", "href")] + public string UserLink { get; set; } + + [XPath("/span[@class='reputation-score']")] + public string ReputationScore { get; set; } + } + + [HasXPath] + [DebuggerDisplay("Question Title={QuestionTitle}")] + public class HotNetworkQuestion + { + [XPath("/div", "title")] + public string QuestionCategory { get; set; } + + [XPath("/a")] + public string QuestionTitle { get; set; } + + [XPath("/a", "href")] + public string QuestionLink { get; set; } + } + } +} \ No newline at end of file diff --git a/src/Tests/HtmlAgilityPack.Tests/HtmlNode.Tests.cs b/src/Tests/HtmlAgilityPack.Tests/HtmlNode.Tests.cs new file mode 100644 index 00000000..f48faea8 --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/HtmlNode.Tests.cs @@ -0,0 +1,104 @@ +using System; +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace HtmlAgilityPack.Tests +{ + [TestFixture] + public class HtmlNode2 + { + //[Test(Description = + // "Attributes should maintain their original character casing if OptionOutputOriginalCase is true")] + //public void EnsureAttributeOriginalCaseIsPreserved() + //{ + // var html = "
"; + // var doc = new HtmlDocument + // { + // OptionOutputOriginalCase = true + // }; + // doc.LoadHtml(html); + // var div = doc.DocumentNode.Descendants("div").FirstOrDefault(); + // var writer = new StringWriter(); + // div.WriteAttributes(writer, false); + // var result = writer.GetStringBuilder().ToString(); + // Assert.AreEqual(" AttributeIsThis=\"val\"", result); + //} + + [Test] + public void ReadNotCloseTag() + { + var document = new HtmlDocument(); + document.LoadHtml("
  • item
"); + var span = document.DocumentNode.SelectSingleNode("//span"); + if (span == null) throw new Exception("Failed to find span element"); + var OuterHtml = span.OuterHtml; + var InnerHtml = span.InnerHtml; + var InnerText = span.InnerText; + + Assert.IsNotNull(OuterHtml); + Assert.IsNotNull(InnerHtml); + Assert.IsNotNull(InnerText); + } + + + [Test] + public void checkAttributForTextComment() + { + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(@"
some text
"); + var div = doc.GetElementbyId("foo"); + int count = 0; + Exception exception = null; + foreach (var textNode in div.ChildNodes) + { + try + { + textNode.Id = "1"; + count++; + } + catch (Exception e) + { + exception = e; + + } + } + + Assert.AreEqual(count, 1); + Assert.IsNotNull(exception); + } + + + [Test] + public void Prepend_CheckOrder() + { + HtmlNode source = HtmlNode.CreateNode(@" +
    +
  • Alpha
  • +
  • Bravo
  • +
  • Charlie
  • +
+"); + HtmlNode destination = HtmlNode.CreateNode(@" +
    +
  • Delta
  • +
  • Echo
  • +
  • Foxtrot
  • +
+"); + + destination.PrependChildren(source.ChildNodes); + + Assert.AreEqual(destination.WriteTo() + , @"
    +
  • Alpha
  • +
  • Bravo
  • +
  • Charlie
  • + +
  • Delta
  • +
  • Echo
  • +
  • Foxtrot
  • +
"); + } + } +} \ No newline at end of file diff --git a/src/Tests/HtmlAgilityPack.Tests/HtmlWebTests.cs b/src/Tests/HtmlAgilityPack.Tests/HtmlWebTests.cs new file mode 100644 index 00000000..adc0f722 --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/HtmlWebTests.cs @@ -0,0 +1,229 @@ +using System; +using System.IO; +using System.Linq; +using System.Net; +using System.Reflection; +using System.Text; +using Moq; +using NUnit.Framework; + +namespace HtmlAgilityPack.Tests +{ + [TestFixture] + class HtmlWebTests + { + private string _contentDir; + + [OneTimeSetUp] + public void Setup() + { + _contentDir = Path.Combine( + Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), + "files"); + } + + [Test] + public void TestLoad() + { + var factoryMock = new Mock(); + factoryMock.Setup(x => x.Create(It.IsAny())) + .Returns(u => + { + var reqMock = new Mock(); + reqMock.Setup(x => x.Request).Returns(null as HttpWebRequest); + reqMock.Setup(x => x.GetResponse()).Returns(() => + { + var resMock = new Mock(); + resMock.Setup(x => x.ResponseUri).Returns(u); + resMock.Setup(x => x.StatusCode).Returns(HttpStatusCode.OK); + resMock.Setup(x => x.ContentType).Returns("text/html; charset=utf-8"); + resMock.Setup(x => x.ContentEncoding).Returns(""); + resMock.Setup(x => x.Headers).Returns(() => + { + var headers = new WebHeaderCollection(); + headers.Add("Cache-Control", "no-store, no-transform, no-cache"); + headers.Add("Pragma", "no-cache"); + headers.Add("X-Activity-Id", "ec46db78-0cbf-40c4-97a2-a2420cd8e1ca"); + headers.Add("MS-CV", "wjoFJw0sEE+jo45O.0"); + headers.Add("X-AppVersion", "1.0.7257.410"); + headers.Add("X-Az", "{did:92e7dc58ca2143cfb2c818b047cc5cd1, rid: OneDeployContainer, sn: marketingsites-prod-odeastasia, dt: 2018-05-03T20:14:23.4188992Z, bt: 2019-11-14T08:13:40.0000000Z}"); + headers.Add("ms-operation-id", "1be30902be694b4c86c5a401f0bb91fb"); + headers.Add("P3P", "CP=\"CAO CONi OTR OUR DEM ONL\""); + headers.Add("X-UA-Compatible", "IE=Edge;chrome=1"); + headers.Add("X-Content-Type-Options", "nosniff"); + headers.Add("X-Frame-Options", "SAMEORIGIN"); + headers.Add("Access-Control-Allow-Methods", "HEAD,GET,POST,PATCH,PUT,OPTIONS"); + headers.Add("X-XSS-Protection", "1; mode=block"); + headers.Add("X-EdgeConnect-MidMile-RTT", "41"); + headers.Add("X-EdgeConnect-Origin-MEX-Latency", "282"); + headers.Add("Date", "Sat, 16 Nov 2019 20:09:49 GMT"); + headers.Add("Transfer-Encoding", "chunked"); + headers.Add("Connection", "keep-alive, Transfer-Encoding"); + headers.Add("Set-Cookie", "isFirstSession=1; path=/; secure; HttpOnly, MUID=054E212C88D965E415702F3A89116462; domain=.microsoft.com; expires=Tue, 16-Nov-2021 20:09:48 GMT; path=/;SameSite=None; secure, X-FD-FEATURES=ids=1690t1%2csfwaab%2catperf680t2%2c1786t1a%2c969t1%2c882c%2c936ca%2ctasmigration010%2ccartemberpl&imp=ec46db78-0cbf-40c4-97a2-a2420cd8e1ca; expires=Mon, 16-Nov-2020 20:09:48 GMT; path=/; secure; HttpOnly, X-FD-Time=1; expires=Sat, 16-Nov-2019 20:14:48 GMT; path=/; secure; HttpOnly, akacd_OneRF=1581710989~rv=77~id=b1ce48559f2a627f15dc2e792b7d9740; path=/; Expires=Fri, 14 Feb 2020 20:09:49 GMT, akacd_OneRF=1581710989~rv=77~id=b1ce48559f2a627f15dc2e792b7d9740; path=/; Expires=Fri, 14 Feb 2020 20:09:49 GMT"); + headers.Add("TLS_version", "tls1.2"); + headers.Add("Strict-Transport-Security", "max-age=31536000"); + headers.Add("X-RTag", "RT"); + headers.Add("Content-Type", "text/html; charset=utf-8"); + headers.Add("Expires", "-1"); + return headers; + }); + resMock.Setup(x => x.GetResponseStream()) + .Returns(() => new FileStream( + Path.Combine(_contentDir, "mshome.htm"), + FileMode.Open, + FileAccess.Read)); + resMock.Setup(x => x.LastModified).Returns(DateTime.UtcNow); + return resMock.Object; + }); + return reqMock.Object; + }); + + var htmlWeb = new HtmlWeb(factoryMock.Object); + var doc = htmlWeb.Load(new Uri("https://www.microsoft.com/")); + Assert.IsTrue(doc.DocumentNode.Descendants().Count() > 0); + } + + [Test] + public void TestLoadThrowsEncodingNotSupportedException() + { + var factoryMock = new Mock(); + factoryMock.Setup(x => x.Create(It.IsAny())) + .Returns(u => + { + var reqMock = new Mock(); + reqMock.Setup(x => x.Request).Returns(null as HttpWebRequest); + reqMock.Setup(x => x.GetResponse()).Returns(() => + { + var resMock = new Mock(); + resMock.Setup(x => x.ResponseUri).Returns(u); + resMock.Setup(x => x.StatusCode).Returns(HttpStatusCode.OK); + resMock.Setup(x => x.ContentType).Returns("text/html; charset=UTF-8"); + resMock.Setup(x => x.ContentEncoding).Returns("identity"); + resMock.Setup(x => x.Headers).Returns(() => + { + var headers = new WebHeaderCollection(); + headers.Add("Transfer-Encoding", "chunked"); + headers.Add("Connection", "keep-alive"); + headers.Add("X-Cache-Hits", "4"); + headers.Add("Cache-Control", "no-store, must-revalidate, no-cache, max-age=0"); + headers.Add("X-Chef", "Gennaro"); + headers.Add("Pragma", "public"); + headers.Add("X-Cache-Keep", "3600.000"); + headers.Add("Vary", "Accept-Encoding"); + headers.Add("X-Country-Code", "NZ"); + headers.Add("X-Cache-Status", "MISS"); + headers.Add("X-Backend", "i_04400f0cd28e027e7_10_170_21_65"); + headers.Add("Date", "Fri, 15 Nov 2019 07:44:42 GMT"); + headers.Add("X-Cache-TTL-Remaining", "595.653"); + headers.Add("Server", "ZENEDGE"); + headers.Add("X-Zen-Fury", "efadaddb4a91c6f51faaa477a91c047faa84f52d"); + headers.Add("X-Cache", "FoodCache"); + headers.Add("Accept-Ranges", "bytes"); + headers.Add("Age", "3004"); + headers.Add("X-Cache-Age", "3004.347"); + headers.Add("X-Cdn", "Served-By-Zenedge"); + headers.Add("Content-Type", "text/html; charset=UTF-8"); + headers.Add("Last-Modified", "Thu, 01 Jan 1970 00:00:00 GMT"); + headers.Add("Expires", "Fri, 15 Nov 2019 08:44:41 GMT"); + headers.Add("Content-Encoding", "identity"); + return headers; + }); + resMock.Setup(x => x.GetResponseStream()).Returns( + () => new MemoryStream()); + resMock.Setup(x => x.LastModified).Returns(DateTime.UtcNow); + return resMock.Object; + }); + return reqMock.Object; + }); + + var htmlWeb = new HtmlWeb(factoryMock.Object); + Exception exception = null; + try + { + htmlWeb.Load(new Uri("https://www.jamieoliver.com/recipes/chicken-recipes/chicken-tofu-noodle-soup/")); + } + catch (Exception e) + { + exception = e; + } + Assert.That(exception, Is.InstanceOf(typeof(EncodingNotSupportedException))); + } + + [Test] + public void TestLoadOverridingEncoding() + { + var factoryMock = new Mock(); + factoryMock.Setup(x => x.Create(It.IsAny())) + .Returns(u => + { + var reqMock = new Mock(); + reqMock.Setup(x => x.Request).Returns(null as HttpWebRequest); + reqMock.Setup(x => x.GetResponse()).Returns(() => + { + var resMock = new Mock(); + resMock.Setup(x => x.ResponseUri).Returns(u); + resMock.Setup(x => x.StatusCode).Returns(HttpStatusCode.OK); + resMock.Setup(x => x.ContentType).Returns("text/html; charset=UTF-8"); + resMock.Setup(x => x.ContentEncoding).Returns("identity"); + resMock.Setup(x => x.Headers).Returns(() => + { + var headers = new WebHeaderCollection(); + headers.Add("Transfer-Encoding", "chunked"); + headers.Add("Connection", "keep-alive"); + headers.Add("X-Cache-Hits", "4"); + headers.Add("Cache-Control", "no-store, must-revalidate, no-cache, max-age=0"); + headers.Add("X-Chef", "Gennaro"); + headers.Add("Pragma", "public"); + headers.Add("X-Cache-Keep", "3600.000"); + headers.Add("Vary", "Accept-Encoding"); + headers.Add("X-Country-Code", "NZ"); + headers.Add("X-Cache-Status", "MISS"); + headers.Add("X-Backend", "i_04400f0cd28e027e7_10_170_21_65"); + headers.Add("Date", "Fri, 15 Nov 2019 07:44:42 GMT"); + headers.Add("X-Cache-TTL-Remaining", "595.653"); + headers.Add("Server", "ZENEDGE"); + headers.Add("X-Zen-Fury", "efadaddb4a91c6f51faaa477a91c047faa84f52d"); + headers.Add("X-Cache", "FoodCache"); + headers.Add("Accept-Ranges", "bytes"); + headers.Add("Age", "3004"); + headers.Add("X-Cache-Age", "3004.347"); + headers.Add("X-Cdn", "Served-By-Zenedge"); + headers.Add("Content-Type", "text/html; charset=UTF-8"); + headers.Add("Last-Modified", "Thu, 01 Jan 1970 00:00:00 GMT"); + headers.Add("Expires", "Fri, 15 Nov 2019 08:44:41 GMT"); + headers.Add("Content-Encoding", "identity"); + return headers; + }); + resMock.Setup(x => x.GetResponseStream()).Returns( + () => new MemoryStream()); + resMock.Setup(x => x.LastModified).Returns(DateTime.UtcNow); + return resMock.Object; + }); + return reqMock.Object; + }); + + var url = new Uri("https://www.jamieoliver.com/recipes/chicken-recipes/chicken-tofu-noodle-soup/"); + var htmlWeb = new HtmlWeb(factoryMock.Object); + HtmlDocument doc = null; + try + { + doc = htmlWeb.Load(url); + Assert.Fail("Load should fail when receiving a response with 'Content-Encoding: identity' in the headers."); + } + catch (EncodingNotSupportedException) + { + try + { + htmlWeb.OverrideEncoding = Encoding.UTF8; + doc = htmlWeb.Load(url); + } + catch (Exception e) + { + Assert.Fail("Load failed even though the invalid encoding was adjusted by overriding it."); + } + } + Assert.That(doc, Is.Not.Null); + } + } +} + diff --git a/src/Tests/HtmlAgilityPack.Tests/files/mshome.htm b/src/Tests/HtmlAgilityPack.Tests/files/mshome.htm new file mode 100644 index 00000000..a7115f57 --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/files/mshome.htm @@ -0,0 +1,582 @@ + + + + +Microsoft Corporation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Microsoft Home + +  +  +All Products  +  +| +  +  +Support  +  +| +  +  +Search  +  +| +  +  +microsoft.com Home  +  + +
 
+  +  +Microsoft Home  +  +| +  +  +MSN Home  +  +| +  +  +Subscribe  +  +| +  +  +Manage Your Profile +
+
+ + + + + + + + + +
+ + + + + + + + +
Search for
+ + +
Advanced Search
+ + +
+ + + + + + + + + + + + + + + + + + +
Product Families
+
+ + +
+ + + + + + + + + + + + + + + + +
Resources
+
+ + +
+ + + + + + + + + + + + + + + + +
Information For
+
+ + +
+ + + + + + + + + + +
About Microsoft
+
+ + +
+ + + + +
Worldwide
+
+
+ + + + + + +
+ + +
Shoot for the stars
+ + + + + + + + + + + + + + + + + +
home & entertainment + + + + + + + + + + + + + +
home & entertainment
Download new transition elements for Movie Maker 2
Windows Movie Maker 2 for beginners and experts
Free stationery templates from Office
+
technical resources + + + + + + + + + + +
technical resources
Use C# and XML to display news feeds on your PC
Become an Exchange expert with a little help from your peers
Help improve security in a Windows 2000 Server environment
+
business agility + + + + + + + + + + + + + +
business agility
New for small businesses: Security Update e-mail alert
Download a calendar to track employee summer vacations
Discover Windows Server 2003
+
+ + + + + + + + + + + + + + + + + +

+ + + + + + + + + + + + +
Get to know .NET from A to Z

Download the .NET Alerts SDK

Write mobile games using the .NET Compact Framework

+
+ + + + + + + + + + + + + +
Download the Digital Photography Fun Pack

The Windows XP Expert Zone Community is for everyone

Download MOM Management Packs

+
+ + + + + + + + + + + + + +
Translate your Word documents into different languages

Top 10 reasons to use Visio 2003

Use animation to add life to PowerPoint presentations

+
+
More .NET ... +More Windows ... +More Office ... +

+ + +
Last Updated: Friday, May 30, 2003 - 3:05 p.m. Pacific Time
+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
today's news
AOL Time Warner, Microsoft agree to collaborate

Microsoft announces Real-Time Communications Server 2003

Additional benefits coming for customers with Software Assurance

Do amazing things with our free Windows XP DVD (U.S. only)


More News ...
+
+ + + + + + + + + + + + + + + + + + + + + + +
downloads
PowerToys Fun Pack

DirectX 9.0a

Windows Server 2003 Evaluation Kit


More Downloads ...
+
+ + + + + + + + + + + + + + + + + + + + + +
support
Support for Office XP, 2000, and 97

Product Support Centers (FAQs)

365 Windows XP tips, just one click away


More Support ...
+
+
+ + + +
+Contact Us +|Microsoft This Week! Newsletter +|Legal + +
+ + ©2003 Microsoft Corporation. All rights reserved. + Terms of Use | +Privacy Statement | +Accessibility + +
+ + +.NET +Windows +Office + + + + diff --git a/src/Tests/HtmlAgilityPack.Tests/files/overflow.html b/src/Tests/HtmlAgilityPack.Tests/files/overflow.html new file mode 100644 index 00000000..29cf0db1 --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/files/overflow.html @@ -0,0 +1,94 @@ +User list - XCC Forum + + + + + + + + + + + + + + + + +
X +
zzz
X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X + X + X + X + X +
X + X +X
X + X + X +
X + X + X + X + X + X +
diff --git a/src/Tests/HtmlAgilityPack.Tests/files/regression.html b/src/Tests/HtmlAgilityPack.Tests/files/regression.html new file mode 100644 index 00000000..2a198396 --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/files/regression.html @@ -0,0 +1,175 @@ + + + +Steam Community :: Steam Web API Key + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+
+

Register Steam Web API Key

+
+
+ + +
+ +
+ +

Your Steam Web API Key

+

Key: Redacted

+

Domain Name: Redacted

+
+ + +
+ + +
+ + +
+
+
+
+
+
+ +
+ + + + + + + +
+ + diff --git a/src/Tests/HtmlAgilityPack.Tests/files/test.html b/src/Tests/HtmlAgilityPack.Tests/files/test.html new file mode 100644 index 00000000..d4924105 --- /dev/null +++ b/src/Tests/HtmlAgilityPack.Tests/files/test.html @@ -0,0 +1,12 @@ + + + + + + +

This is bold heading

+

This is underlined paragraph

+

This is italic heading

+

This is new heading

+ + diff --git a/src/Tools/TestCodeGen/Generator/HtmlWebTestGenerator.cs b/src/Tools/TestCodeGen/Generator/HtmlWebTestGenerator.cs new file mode 100644 index 00000000..ba1fd2b0 --- /dev/null +++ b/src/Tools/TestCodeGen/Generator/HtmlWebTestGenerator.cs @@ -0,0 +1,224 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; +using System.Text.RegularExpressions; +using HtmlAgilityPack; + +namespace TestCodeGen.Generator +{ + public class HtmlWebTestGenerator : TestCodeGenerator + { + public override string MenuItemName => "Generate a test for HtmlWeb class mocking the web request."; + + #region Inner Classes + + class DummyException : Exception + { + } + + #endregion + + #region Parameter Loaders + + class UrlParamLoader : ParamLoader + { + public override string Message + => "Enter a URL"; + + public UrlParamLoader(TestCodeGenerator generator) : base(generator) { } + + public override void SetUp(string input) + { + try + { + ((HtmlWebTestGenerator)Generator).Url = new Uri(input); + } + catch (UriFormatException e) + { + throw new Exception("The entered URL was invalid.", e); + } + } + } + + class OutputContentParamLoader : ParamLoader + { + public override string Message + => "Does the test need response content? [Yes|No] (default: Yes)"; + + public OutputContentParamLoader(TestCodeGenerator generator) : base(generator) { } + + public override void SetUp(string input) + { + switch (input.Trim().ToLower()) + { + case "no": + case "n": + ((HtmlWebTestGenerator)Generator).OutputContent = false; + return; + } + ((HtmlWebTestGenerator)Generator).OutputContent = true; + } + } + + #endregion + + public Uri Url { get; private set; } + public bool OutputContent { get; private set; } + + public HtmlWebTestGenerator() + { + _paramLoaders.Add(new OutputContentParamLoader(this)); + _paramLoaders.Add(new UrlParamLoader(this)); + } + + public override IEnumerable Generate() + { + var code4StatusCode = ""; + var code4ContentType = ""; + var code4ContentEncoding = ""; + var code4Headers = ""; + var code4Content = ""; + string content = null; + + var htmlWeb = new HtmlWeb(); + htmlWeb.PostResponse += (HttpWebRequest req, HttpWebResponse res) => + { + // generate code for mocking IHttpWebResponse.StatusCode property + code4StatusCode = $@"{Indent(6)}resMock.Setup(x => x.StatusCode).Returns({typeof(HttpStatusCode).Name}.{res.StatusCode.ToString()});"; + + // generate code for mocking IHttpWebResponse.ContentType property + code4ContentType = $@"{Indent(6)}resMock.Setup(x => x.ContentType).Returns(""{res.ContentType}"");"; + + // generate code for mocking IHttpWebResponse.ContentEncoding property + code4ContentEncoding = $@"{Indent(6)}resMock.Setup(x => x.ContentEncoding).Returns(""{res.ContentEncoding}"");"; + + // generate code for mocking IHttpWebResponse.Headers property + var headers = res.Headers; + code4Headers = headers.AllKeys + .Select(k => $@"{Indent(7)}headers.Add(""{k}"", ""{headers[k].Replace("\"", "\\\"")}"");") + .DefaultIfEmpty() + .Aggregate((a, b) => $"{a}{Environment.NewLine}{b}"); + + code4Headers = $@"{Indent(6)}resMock.Setup(x => x.Headers).Returns(() => +{Indent(6)}{{ +{Indent(7)}var headers = new WebHeaderCollection(); +{code4Headers} +{Indent(7)}return headers; +{Indent(6)}}});"; + + // generate code for mocking IHttpWebResponse.GetResponseStream() method + if (OutputContent) + { + using (var s = res.GetResponseStream()) + { + Encoding encoding; + try + { + encoding = Encoding.GetEncoding(res.ContentEncoding); + } + catch (ArgumentException) + { + encoding = Encoding.UTF8; + } + var r = new StreamReader(s, encoding); + content = r.ReadToEnd(); + } + + code4Content = $@"{Indent(6)}resMock.Setup(x => x.GetResponseStream()) +{Indent(7)}.Returns(() => new FileStream( +{Indent(8)}Path.Combine(_contentDir, ""{TestName}.html""), +{Indent(8)}FileMode.Open, +{Indent(8)}FileAccess.Read));"; + } + else + { + code4Content = $@"{Indent(6)}resMock.Setup(x => x.GetResponseStream()).Returns( +{Indent(7)}() => new MemoryStream());"; + } + + throw new DummyException(); + }; + + try + { + htmlWeb.Load(Url); + } + catch (DummyException) + { + } + + var regexNewLine = new Regex("(\n\r?)|(\r\n)"); + yield return new TestCode + { + File = new FileInfo(Path.Combine(OutDir.ToString(), TestName + ".cs")), + Content = regexNewLine.Replace($@"using System; +using System.IO; +using System.Net; +using System.Reflection; +using Moq; +using NUnit.Framework; + +namespace HtmlAgilityPack.Tests +{{ +{Indent(1)}[TestFixture] +{Indent(1)}class {TestName} +{Indent(1)}{{ +{Indent(2)}private string _contentDir; + +{Indent(2)}[OneTimeSetUp] +{Indent(2)}public void Setup() +{Indent(2)}{{ +{Indent(3)}_contentDir = Path.Combine( +{Indent(4)}Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), +{Indent(4)}""files""); +{Indent(2)}}} + +{Indent(2)}[Test] +{Indent(2)}public void Test() +{Indent(2)}{{ +{Indent(3)}var factoryMock = new Mock(); +{Indent(3)}factoryMock.Setup(x => x.Create(It.IsAny())) +{Indent(4)}.Returns(u => +{Indent(4)}{{ +{Indent(5)}var reqMock = new Mock(); +{Indent(5)}reqMock.Setup(x => x.Request).Returns(null as HttpWebRequest); +{Indent(5)}reqMock.Setup(x => x.GetResponse()).Returns(() => +{Indent(5)}{{ +{Indent(6)}var resMock = new Mock(); +{Indent(6)}resMock.Setup(x => x.ResponseUri).Returns(u); +{code4StatusCode} +{code4ContentType} +{code4ContentEncoding} +{code4Headers} +{code4Content} +{Indent(6)}resMock.Setup(x => x.LastModified).Returns(DateTime.UtcNow); +{Indent(6)}return resMock.Object; +{Indent(5)}}}); +{Indent(5)}return reqMock.Object; +{Indent(4)}}}); + +{Indent(3)}var htmlWeb = new HtmlWeb(factoryMock.Object); +{Indent(3)}var doc = htmlWeb.Load(new Uri(""{Url.OriginalString}"")); +{Indent(2)}}} +{Indent(1)}}} +}} +", Environment.NewLine), + }; + + if (OutputContent) + yield return new TestCode + { + File = new FileInfo(Path.Combine(OutDir.ToString(), "files", TestName + ".html")), + Content = content, + }; + } + + public string Indent(int count) + { + return new string(' ', 4 * count); + } + } +} diff --git a/src/Tools/TestCodeGen/Program.cs b/src/Tools/TestCodeGen/Program.cs new file mode 100644 index 00000000..ea1b9827 --- /dev/null +++ b/src/Tools/TestCodeGen/Program.cs @@ -0,0 +1,79 @@ +using System; +using System.IO; +using System.Linq; +using TestCodeGen.Generator; + +namespace TestCodeGen +{ + class Program + { + static void Main(string[] args) + { + DirectoryInfo outDir = null; + Action paramAction = null; + foreach (var arg in args) + { + switch (arg) + { + case "-t": + paramAction = a => + { + outDir = new DirectoryInfo(a); + }; + break; + default: + paramAction?.Invoke(arg); + paramAction = null; + break; + } + } + + outDir = outDir ?? new DirectoryInfo(Directory.GetCurrentDirectory()); + + var generators = new TestCodeGenerator[] + { + new HtmlWebTestGenerator { OutDir = outDir }, + }; + + Console.WriteLine("Html Agile Pack Test Code Generator"); + Console.WriteLine(""); + for (var i = 0; i < generators.Length; i++) + { + Console.WriteLine($"{i}: {generators[i].MenuItemName}"); + } + Console.WriteLine(""); + Console.Write("Which do you want? Select a menu item by number : "); + var itemIndex = Console.ReadLine(); + if (!int.TryParse(itemIndex, out var index) + || index >= generators.Length) + { + Console.WriteLine("[ERROR] Your choice was invalid."); + return; + } + + try + { + foreach (var paramLoader in generators[index].ParamLoaders) + { + Console.Write($"{paramLoader.Message} : "); + var input = Console.ReadLine(); + paramLoader.SetUp(input); + } + } + catch (Exception e) + { + Console.WriteLine($"[ERROR] {e.Message}"); + return; + } + + var files = generators[index].GenerateAndSaveToFile().ToList(); + + Console.WriteLine("generating test code..."); + Console.WriteLine($"Test code was successfully saved to:"); + foreach (var file in files) + { + Console.WriteLine(" " + file.FullName); + } + } + } +} diff --git a/src/Tools/TestCodeGen/TestCode.cs b/src/Tools/TestCodeGen/TestCode.cs new file mode 100644 index 00000000..0196ff07 --- /dev/null +++ b/src/Tools/TestCodeGen/TestCode.cs @@ -0,0 +1,10 @@ +using System.IO; + +namespace TestCodeGen +{ + public class TestCode + { + public FileInfo File { get; set; } + public string Content { get; set; } + } +} \ No newline at end of file diff --git a/src/Tools/TestCodeGen/TestCodeGen.csproj b/src/Tools/TestCodeGen/TestCodeGen.csproj new file mode 100644 index 00000000..af3ef696 --- /dev/null +++ b/src/Tools/TestCodeGen/TestCodeGen.csproj @@ -0,0 +1,15 @@ + + + + Exe + netcoreapp3.0 + + + + + + + + + + diff --git a/src/Tools/TestCodeGen/TestCodeGenerator.cs b/src/Tools/TestCodeGen/TestCodeGenerator.cs new file mode 100644 index 00000000..f6ad2a58 --- /dev/null +++ b/src/Tools/TestCodeGen/TestCodeGenerator.cs @@ -0,0 +1,89 @@ +using System; +using System.Collections.Generic; +using System.IO; + +namespace TestCodeGen +{ + public abstract class TestCodeGenerator + { + #region Inner Classes + + public abstract class ParamLoader + { + protected TestCodeGenerator Generator { get; private set; } + + public ParamLoader(TestCodeGenerator generator) + { + Generator = generator; + } + public abstract string Message { get; } + public abstract void SetUp(string input); + } + + #endregion + + #region Parameter Loaders + + class OutDirParamLoader : ParamLoader + { + public override string Message + => $"Where do you save the test code? (default: {Generator.OutDir.FullName})"; + + public OutDirParamLoader(TestCodeGenerator generator) : base(generator) { } + + public override void SetUp(string input) + { + if (!string.IsNullOrEmpty(input)) + Generator.OutDir = new DirectoryInfo(input); + } + } + + class TestNameParamLoader : ParamLoader + { + public override string Message + => "Enter test name"; + + public TestNameParamLoader(TestCodeGenerator generator) : base(generator) { } + + public override void SetUp(string input) + { + if (string.IsNullOrEmpty(input)) + throw new ArgumentException("Cannot proceed without a test name."); + Generator.TestName = input; + } + } + + #endregion + + public abstract string MenuItemName { get; } + protected List _paramLoaders { get; } + public IReadOnlyList ParamLoaders => _paramLoaders.AsReadOnly(); + public DirectoryInfo OutDir { get; set; } + public string TestName { get; private set; } + + public TestCodeGenerator() + { + _paramLoaders = new List(); + _paramLoaders.Add(new OutDirParamLoader(this)); + _paramLoaders.Add(new TestNameParamLoader(this)); + } + public abstract IEnumerable Generate(); + + public virtual IEnumerable GenerateAndSaveToFile() + { + var testCodes = Generate(); + + foreach (var testCode in testCodes) + { + if (!testCode.File.Directory.Exists) + testCode.File.Directory.Create(); + using (var s = new FileStream(testCode.File.ToString(), FileMode.Create, FileAccess.Write)) + using (var w = new StreamWriter(s)) + { + w.WriteLine(testCode.Content); + } + yield return testCode.File; + } + } + } +}