C#爬蟲助手快速發(fā)送Post和Get請(qǐng)求幫助類
C#爬蟲助手幫助類:快速發(fā)送Post和Get請(qǐng)求幫助類,Http連接幫助類,
HttpHelper幫助類,可快速提取網(wǎng)頁(yè)Byte數(shù)據(jù)得到圖片文件實(shí)體
HttpHelper幫助類源碼下載:
using System; using System.Collections.Generic; using System.Text; using System.Net; using System.IO; using System.Text.RegularExpressions; using System.IO.Compression; using System.Security.Cryptography.X509Certificates; using System.Net.Security; using System.Linq; using System.Net.Cache; namespace JsonsUtil.Helper { /// <summary> /// Http連接幫助類 /// </summary> public class HttpHelper { #region 預(yù)定義方變量 //默認(rèn)的編碼 private Encoding encoding = Encoding.Default; //Post數(shù)據(jù)編碼 private Encoding postencoding = Encoding.Default; //HttpWebRequest對(duì)象用來發(fā)起請(qǐng)求 private HttpWebRequest request = null; //獲取影響流的數(shù)據(jù)對(duì)象 private HttpWebResponse response = null; #endregion #region Public /// <summary> /// 根據(jù)相傳入的數(shù)據(jù),得到相應(yīng)頁(yè)面數(shù)據(jù) /// </summary> /// <param name="item">參數(shù)類對(duì)象</param> /// <returns>返回HttpResult類型</returns> public HttpResult GetHtml(HttpItem item) { //返回參數(shù) HttpResult result = new HttpResult(); try { //準(zhǔn)備參數(shù) SetRequest(item); } catch (Exception ex) { //配置參數(shù)時(shí)出錯(cuò) return new HttpResult() { Cookie = string.Empty, Header = null, Html = ex.Message, StatusDescription = "配置參數(shù)時(shí)出錯(cuò):" ex.Message }; } try { //請(qǐng)求數(shù)據(jù) using (response = (HttpWebResponse)request.GetResponse()) { GetData(item, result); } } catch (WebException ex) { if (ex.Response != null) { using (response = (HttpWebResponse)ex.Response) { GetData(item, result); } } else { result.Html = ex.Message; } } catch (Exception ex) { result.Html = ex.Message; } if (item.IsToLower) result.Html = result.Html.ToLower(); return result; } #endregion #region GetData /// <summary> /// 獲取數(shù)據(jù)的并解析的方法 /// </summary> /// <param name="item"></param> /// <param name="result"></param> private void GetData(HttpItem item, HttpResult result) { #region base //獲取StatusCode result.StatusCode = response.StatusCode; //獲取StatusDescription result.StatusDescription = response.StatusDescription; //獲取Headers result.Header = response.Headers; //獲取最后訪問的URl result.ResponseUri = response.ResponseUri.ToString(); //獲取CookieCollection if (response.Cookies != null) result.CookieCollection = response.Cookies; //獲取set-cookie if (response.Headers["set-cookie"] != null) result.Cookie = response.Headers["set-cookie"]; //獲取HttpWebResponse if (response.ContentEncoding != null) { result.WebRes = response.ContentEncoding response.Headers["Accept-Encoding"]; } #endregion #region byte //處理網(wǎng)頁(yè)Byte byte[] ResponseByte = GetByte(); #endregion #region Html if (ResponseByte != null & ResponseByte.Length > 0) { //設(shè)置編碼 SetEncoding(item, result, ResponseByte); //得到返回的HTML result.Html = encoding.GetString(ResponseByte); } else { //沒有返回任何Html代碼 result.Html = string.Empty; } #endregion } /// <summary> /// 設(shè)置編碼 /// </summary> /// <param name="item">HttpItem</param> /// <param name="result">HttpResult</param> /// <param name="ResponseByte">byte[]</param> private void SetEncoding(HttpItem item, HttpResult result, byte[] ResponseByte) { //是否返回Byte類型數(shù)據(jù) if (item.ResultType == ResultType.Byte) result.ResultByte = ResponseByte; //從這里開始我們要無視編碼了//<meta[^<]*charset=([^<]*)[\"'] if (encoding == null) { Match meta = Regex.Match(Encoding.Default.GetString(ResponseByte), "<meta[^<]*charset=([^<]*)[\"']", RegexOptions.IgnoreCase); string c = string.Empty; if (meta != null && meta.Groups.Count > 0) { c = meta.Groups[1].Value.ToLower().Trim(); if (c.Length < 2) { meta = Regex.Match(Encoding.Default.GetString(ResponseByte), "<meta[^<]*charset=([^<]*)", RegexOptions.IgnoreCase); if (meta != null && meta.Groups.Count > 0) { c = meta.Groups[1].Value.ToLower().Trim(); } } } if (c.Length > 2) { try { encoding = Encoding.GetEncoding(c.Replace("/>", string.Empty).Replace("\"", string.Empty).Replace("'", "").Replace(";", "").Replace("iso-8859-1", "gbk").Trim()); } catch { if (string.IsNullOrEmpty(response.CharacterSet)) { encoding = Encoding.UTF8; } else { encoding = Encoding.GetEncoding(response.CharacterSet); } } } else { if (string.IsNullOrEmpty(response.CharacterSet)) { encoding = Encoding.UTF8; } else { encoding = Encoding.GetEncoding(response.CharacterSet); } } } } /// <summary> /// 提取網(wǎng)頁(yè)Byte /// </summary> /// <returns></returns> private byte[] GetByte() { byte[] ResponseByte = null; using (MemoryStream _stream = new MemoryStream()) { //GZIIP處理 if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) { //開始讀取流并設(shè)置編碼方式 new GZipStream(response.GetResponseStream(), CompressionMode.Decompress).CopyTo(_stream, 10240); } else { //開始讀取流并設(shè)置編碼方式 response.GetResponseStream().CopyTo(_stream, 10240); } //獲取Byte ResponseByte = _stream.ToArray(); } return ResponseByte; } #endregion #region SetRequest /// <summary> /// 為請(qǐng)求準(zhǔn)備參數(shù) /// </summary> ///<param name="item">參數(shù)列表</param> private void SetRequest(HttpItem item) { // 驗(yàn)證證書 SetCer(item); //設(shè)置Header參數(shù) if (item.Header != null && item.Header.Count > 0) foreach (string key in item.Header.AllKeys) { request.Headers.Add(key, item.Header[key]); } // 設(shè)置代理 SetProxy(item); if (item.ProtocolVersion != null) request.ProtocolVersion = item.ProtocolVersion; request.ServicePoint.Expect100Continue = item.Expect100Continue; //請(qǐng)求方式Get或者Post request.Method = item.Method; request.Timeout = item.Timeout; request.KeepAlive = item.KeepAlive; request.ReadWriteTimeout = item.ReadWriteTimeout; if (!string.IsNullOrWhiteSpace(item.Host)) { request.Host = item.Host; } if (item.IfModifiedSince != null) request.IfModifiedSince = Convert.ToDateTime(item.IfModifiedSince); //Accept request.Accept = item.Accept; //ContentType返回類型 request.ContentType = item.ContentType; //UserAgent客戶端的訪問類型,包括瀏覽器版本和操作系統(tǒng)信息 request.UserAgent = item.UserAgent; // 編碼 encoding = item.Encoding; //設(shè)置安全憑證 request.Credentials = item.ICredentials; //設(shè)置Cookie SetCookie(item); //來源地址 request.Referer = item.Referer; //是否執(zhí)行跳轉(zhuǎn)功能 request.AllowAutoRedirect = item.Allowautoredirect; if (item.MaximumAutomaticRedirections > 0) { request.MaximumAutomaticRedirections = item.MaximumAutomaticRedirections; } //設(shè)置Post數(shù)據(jù) SetPostData(item); //設(shè)置最大連接 if (item.Connectionlimit > 0) request.ServicePoint.ConnectionLimit = item.Connectionlimit; } /// <summary> /// 設(shè)置證書 /// </summary> /// <param name="item"></param> private void SetCer(HttpItem item) { if (!string.IsNullOrWhiteSpace(item.CerPath)) { //這一句一定要寫在創(chuàng)建連接的前面。使用回調(diào)的方法進(jìn)行證書驗(yàn)證。 ServicePointManager.ServerCertificateValidationCallback = new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult); //初始化對(duì)像,并設(shè)置請(qǐng)求的URL地址 request = (HttpWebRequest)WebRequest.Create(item.URL); SetCerList(item); //將證書添加到請(qǐng)求里 request.ClientCertificates.Add(new X509Certificate(item.CerPath)); } else { //初始化對(duì)像,并設(shè)置請(qǐng)求的URL地址 request = (HttpWebRequest)WebRequest.Create(item.URL); SetCerList(item); } } /// <summary> /// 設(shè)置多個(gè)證書 /// </summary> /// <param name="item"></param> private void SetCerList(HttpItem item) { if (item.ClentCertificates != null && item.ClentCertificates.Count > 0) { foreach (X509Certificate c in item.ClentCertificates) { request.ClientCertificates.Add(c); } } } /// <summary> /// 設(shè)置Cookie /// </summary> /// <param name="item">Http參數(shù)</param> private void SetCookie(HttpItem item) { if (!string.IsNullOrEmpty(item.Cookie)) request.Headers[HttpRequestHeader.Cookie] = item.Cookie; //設(shè)置CookieCollection if (item.ResultCookieType == ResultCookieType.CookieCollection) { request.CookieContainer = new CookieContainer(); if (item.CookieCollection != null && item.CookieCollection.Count > 0) request.CookieContainer.Add(item.CookieCollection); } } /// <summary> /// 設(shè)置Post數(shù)據(jù) /// </summary> /// <param name="item">Http參數(shù)</param> private void SetPostData(HttpItem item) { //驗(yàn)證在得到結(jié)果時(shí)是否有傳入數(shù)據(jù) if (!request.Method.Trim().ToLower().Contains("get")) { if (item.PostEncoding != null) { postencoding = item.PostEncoding; } byte[] buffer = null; //寫入Byte類型 if (item.PostDataType == PostDataType.Byte && item.PostdataByte != null && item.PostdataByte.Length > 0) { //驗(yàn)證在得到結(jié)果時(shí)是否有傳入數(shù)據(jù) buffer = item.PostdataByte; }//寫入文件 else if (item.PostDataType == PostDataType.FilePath && !string.IsNullOrWhiteSpace(item.Postdata)) { StreamReader r = new StreamReader(item.Postdata, postencoding); buffer = postencoding.GetBytes(r.ReadToEnd()); r.Close(); } //寫入字符串 else if (!string.IsNullOrWhiteSpace(item.Postdata)) { buffer = postencoding.GetBytes(item.Postdata); } if (buffer != null) { request.ContentLength = buffer.Length; request.GetRequestStream().Write(buffer, 0, buffer.Length); } } } /// <summary> /// 設(shè)置代理 /// </summary> /// <param name="item">參數(shù)對(duì)象</param> private void SetProxy(HttpItem item) { bool isIeProxy = false; if (!string.IsNullOrWhiteSpace(item.ProxyIp)) { isIeProxy = item.ProxyIp.ToLower().Contains("ieproxy"); } if (!string.IsNullOrWhiteSpace(item.ProxyIp) && !isIeProxy) { //設(shè)置代理服務(wù)器 if (item.ProxyIp.Contains(":")) { string[] plist = item.ProxyIp.Split(':'); WebProxy myProxy = new WebProxy(plist[0].Trim(), Convert.ToInt32(plist[1].Trim())); //建議連接 myProxy.Credentials = new NetworkCredential(item.ProxyUserName, item.ProxyPwd); //給當(dāng)前請(qǐng)求對(duì)象 request.Proxy = myProxy; } else { WebProxy myProxy = new WebProxy(item.ProxyIp, false); //建議連接 myProxy.Credentials = new NetworkCredential(item.ProxyUserName, item.ProxyPwd); //給當(dāng)前請(qǐng)求對(duì)象 request.Proxy = myProxy; } } else if (isIeProxy) { //設(shè)置為IE代理 } else { request.Proxy = item.WebProxy; } } #endregion #region private main /// <summary> /// 回調(diào)驗(yàn)證證書問題 /// </summary> /// <param name="sender">流對(duì)象</param> /// <param name="certificate">證書</param> /// <param name="chain">X509Chain</param> /// <param name="errors">SslPolicyErrors</param> /// <returns>bool</returns> private bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors) { return true; } #endregion } #region public calss /// <summary> /// Http請(qǐng)求參考類 /// </summary> public class HttpItem { /// <summary> /// 請(qǐng)求URL必須填寫 /// </summary> public string URL { get; set; } string _Method = "GET"; /// <summary> /// 請(qǐng)求方式默認(rèn)為GET方式,當(dāng)為POST方式時(shí)必須設(shè)置Postdata的值 /// </summary> public string Method { get { return _Method; } set { _Method = value; } } int _Timeout = 100000; /// <summary> /// 默認(rèn)請(qǐng)求超時(shí)時(shí)間 /// </summary> public int Timeout { get { return _Timeout; } set { _Timeout = value; } } int _ReadWriteTimeout = 30000; /// <summary> /// 默認(rèn)寫入Post數(shù)據(jù)超時(shí)間 /// </summary> public int ReadWriteTimeout { get { return _ReadWriteTimeout; } set { _ReadWriteTimeout = value; } } /// <summary> /// 設(shè)置Host的標(biāo)頭信息 /// </summary> public string Host { get; set; } Boolean _KeepAlive = true; /// <summary> /// 獲取或設(shè)置一個(gè)值,該值指示是否與 Internet 資源建立持久性連接默認(rèn)為true。 /// </summary> public Boolean KeepAlive { get { return _KeepAlive; } set { _KeepAlive = value; } } string _Accept = "text/html, application/xhtml xml, */*"; /// <summary> /// 請(qǐng)求標(biāo)頭值 默認(rèn)為text/html, application/xhtml xml, */* /// </summary> public string Accept { get { return _Accept; } set { _Accept = value; } } string _ContentType = "text/html"; /// <summary> /// 請(qǐng)求返回類型默認(rèn) text/html /// </summary> public string ContentType { get { return _ContentType; } set { _ContentType = value; } } string _UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"; /// <summary> /// 客戶端訪問信息默認(rèn)Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) /// </summary> public string UserAgent { get { return _UserAgent; } set { _UserAgent = value; } } /// <summary> /// 返回?cái)?shù)據(jù)編碼默認(rèn)為NUll,可以自動(dòng)識(shí)別,一般為utf-8,gbk,gb2312 /// </summary> public Encoding Encoding { get; set; } private PostDataType _PostDataType = PostDataType.String; /// <summary> /// Post的數(shù)據(jù)類型 /// </summary> public PostDataType PostDataType { get { return _PostDataType; } set { _PostDataType = value; } } /// <summary> /// Post請(qǐng)求時(shí)要發(fā)送的字符串Post數(shù)據(jù) /// </summary> public string Postdata { get; set; } /// <summary> /// Post請(qǐng)求時(shí)要發(fā)送的Byte類型的Post數(shù)據(jù) /// </summary> public byte[] PostdataByte { get; set; } /// <summary> /// Cookie對(duì)象集合 /// </summary> public CookieCollection CookieCollection { get; set; } /// <summary> /// 請(qǐng)求時(shí)的Cookie /// </summary> public string Cookie { get; set; } /// <summary> /// 來源地址,上次訪問地址 /// </summary> public string Referer { get; set; } /// <summary> /// 證書絕對(duì)路徑 /// </summary> public string CerPath { get; set; } /// <summary> /// 設(shè)置代理對(duì)象,不想使用IE默認(rèn)配置就設(shè)置為Null,而且不要設(shè)置ProxyIp /// </summary> public WebProxy WebProxy { get; set; } private Boolean isToLower = false; /// <summary> /// 是否設(shè)置為全文小寫,默認(rèn)為不轉(zhuǎn)化 /// </summary> public Boolean IsToLower { get { return isToLower; } set { isToLower = value; } } private Boolean allowautoredirect = false; /// <summary> /// 支持跳轉(zhuǎn)頁(yè)面,查詢結(jié)果將是跳轉(zhuǎn)后的頁(yè)面,默認(rèn)是不跳轉(zhuǎn) /// </summary> public Boolean Allowautoredirect { get { return allowautoredirect; } set { allowautoredirect = value; } } private int connectionlimit = 1024; /// <summary> /// 最大連接數(shù) /// </summary> public int Connectionlimit { get { return connectionlimit; } set { connectionlimit = value; } } /// <summary> /// 代理Proxy 服務(wù)器用戶名 /// </summary> public string ProxyUserName { get; set; } /// <summary> /// 代理 服務(wù)器密碼 /// </summary> public string ProxyPwd { get; set; } /// <summary> /// 代理 服務(wù)IP,如果要使用IE代理就設(shè)置為ieproxy /// </summary> public string ProxyIp { get; set; } private ResultType resulttype = ResultType.String; /// <summary> /// 設(shè)置返回類型String和Byte /// </summary> public ResultType ResultType { get { return resulttype; } set { resulttype = value; } } private WebHeaderCollection header = new WebHeaderCollection(); /// <summary> /// header對(duì)象 /// </summary> public WebHeaderCollection Header { get { return header; } set { header = value; } } /// <summary> // 獲取或設(shè)置用于請(qǐng)求的 HTTP 版本。返回結(jié)果:用于請(qǐng)求的 HTTP 版本。默認(rèn)為 System.Net.HttpVersion.Version11。 /// </summary> public Version ProtocolVersion { get; set; } private Boolean _expect100continue = true; /// <summary> /// 獲取或設(shè)置一個(gè) System.Boolean 值,該值確定是否使用 100-Continue 行為。如果 POST 請(qǐng)求需要 100-Continue 響應(yīng),則為 true;否則為 false。默認(rèn)值為 true。 /// </summary> public Boolean Expect100Continue { get { return _expect100continue; } set { _expect100continue = value; } } /// <summary> /// 設(shè)置509證書集合 /// </summary> public X509CertificateCollection ClentCertificates { get; set; } /// <summary> /// 設(shè)置或獲取Post參數(shù)編碼,默認(rèn)的為Default編碼 /// </summary> public Encoding PostEncoding { get; set; } private ResultCookieType _ResultCookieType = ResultCookieType.String; /// <summary> /// Cookie返回類型,默認(rèn)的是只返回字符串類型 /// </summary> public ResultCookieType ResultCookieType { get { return _ResultCookieType; } set { _ResultCookieType = value; } } private ICredentials _ICredentials = CredentialCache.DefaultCredentials; /// <summary> /// 獲取或設(shè)置請(qǐng)求的身份驗(yàn)證信息。 /// </summary> public ICredentials ICredentials { get { return _ICredentials; } set { _ICredentials = value; } } /// <summary> /// 設(shè)置請(qǐng)求將跟隨的重定向的最大數(shù)目 /// </summary> public int MaximumAutomaticRedirections { get; set; } private DateTime? _IfModifiedSince = null; /// <summary> /// 獲取和設(shè)置IfModifiedSince,默認(rèn)為當(dāng)前日期和時(shí)間 /// </summary> public DateTime? IfModifiedSince { get { return _IfModifiedSince; } set { _IfModifiedSince = value; } } } /// <summary> /// Http返回參數(shù)類 /// </summary> public class HttpResult { /// <summary> /// 獲取HttpWebResponse信息 /// </summary> public string WebRes { get; set; } /// <summary> /// Http請(qǐng)求返回的Cookie /// </summary> public string Cookie { get; set; } /// <summary> /// Cookie對(duì)象集合 /// </summary> public CookieCollection CookieCollection { get; set; } private string _html = string.Empty; /// <summary> /// 返回的String類型數(shù)據(jù) 只有ResultType.String時(shí)才返回?cái)?shù)據(jù),其它情況為空 /// </summary> public string Html { get { return _html; } set { _html = value; } } /// <summary> /// 返回的Byte數(shù)組 只有ResultType.Byte時(shí)才返回?cái)?shù)據(jù),其它情況為空 /// </summary> public byte[] ResultByte { get; set; } /// <summary> /// header對(duì)象 /// </summary> public WebHeaderCollection Header { get; set; } /// <summary> /// 返回狀態(tài)說明 /// </summary> public string StatusDescription { get; set; } /// <summary> /// 返回狀態(tài)碼,默認(rèn)為OK /// </summary> public HttpStatusCode StatusCode { get; set; } /// <summary> /// 最后訪問的URl /// </summary> public string ResponseUri { get; set; } /// <summary> /// 獲取重定向的URl /// </summary> public string RedirectUrl { get { try { if (Header != null && Header.Count > 0) { if (Header.AllKeys.Any(k => k.ToLower().Contains("location"))) { string locationurl = Header["location"].ToString().ToLower(); if (!string.IsNullOrWhiteSpace(locationurl)) { bool b = locationurl.StartsWith("http://") || locationurl.StartsWith("https://"); if (!b) { locationurl = new Uri(new Uri(ResponseUri), locationurl).AbsoluteUri; } } return locationurl; } } } catch { } return string.Empty; } } } /// <summary> /// 返回類型 /// </summary> public enum ResultType { /// <summary> /// 表示只返回字符串 只有Html有數(shù)據(jù) /// </summary> String, /// <summary> /// 表示返回字符串和字節(jié)流 ResultByte和Html都有數(shù)據(jù)返回 /// </summary> Byte } /// <summary> /// Post的數(shù)據(jù)格式默認(rèn)為string /// </summary> public enum PostDataType { /// <summary> /// 字符串類型,這時(shí)編碼Encoding可不設(shè)置 /// </summary> String, /// <summary> /// Byte類型,需要設(shè)置PostdataByte參數(shù)的值編碼Encoding可設(shè)置為空 /// </summary> Byte, /// <summary> /// 傳文件,Postdata必須設(shè)置為文件的絕對(duì)路徑,必須設(shè)置Encoding的值 /// </summary> FilePath } /// <summary> /// Cookie返回類型 /// </summary> public enum ResultCookieType { /// <summary> /// 只返回字符串類型的Cookie /// </summary> String, /// <summary> /// CookieCollection格式的Cookie集合同時(shí)也返回String類型的cookie /// </summary> CookieCollection } #endregion }
原文鏈接:C#爬蟲助手快速發(fā)送Post和Get請(qǐng)求幫助類