-
Notifications
You must be signed in to change notification settings - Fork 0
/
WebRequestRobot.cs
166 lines (149 loc) · 7.58 KB
/
WebRequestRobot.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Net;
using System.Runtime.Serialization;
using System.ServiceModel;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace sharp_net {
public class WebRequestRobot {
const string userAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36";
public void DownloadImage(string uri, string filePath) {
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
// Check that the remote file was found. The ContentType
// check is performed since a request for a non-existent
// image file might be redirected to a 404-page, which would
// yield the StatusCode "OK", even though the image was not
// found.
if (response.StatusCode == HttpStatusCode.OK ||
response.StatusCode == HttpStatusCode.Moved ||
response.StatusCode == HttpStatusCode.Redirect) {
// 抽屉的bug http://img1.chouti.com/group5/M03/74/6C/wKgCEVJXEiDoGigtAA7ki98-vN4272.jpg
// response.ContentType.StartsWith("image", StringComparison.OrdinalIgnoreCase)
// if the remote file was found, download oit
string fileName = Path.GetFileName(filePath);
string path = filePath.Replace(fileName, "");
if (!Directory.Exists(path)) {
Directory.CreateDirectory(path);
}
using (Stream inputStream = response.GetResponseStream())
using (Stream outputStream = File.OpenWrite(filePath)) {
byte[] buffer = new byte[4096];
int bytesRead;
do {
bytesRead = inputStream.Read(buffer, 0, buffer.Length);
outputStream.Write(buffer, 0, bytesRead);
} while (bytesRead != 0);
}
}
}
public string Get302Location(string url) {
HttpWebRequest req = WebRequest.Create(url) as HttpWebRequest;
req.AllowAutoRedirect = false;
HttpWebResponse response = req.GetResponse() as HttpWebResponse;
if (response.StatusCode == HttpStatusCode.Redirect ||
response.StatusCode == HttpStatusCode.MovedPermanently) {
// Do something...
return response.Headers["Location"];
}
return string.Empty;
}
private void InjectWeiboCookie(HttpWebRequest request) {
request.CookieContainer = new CookieContainer();
string cookieRaw = ConfigurationManager.AppSettings["WeiboCookie"];
string[] cookiestrs = cookieRaw.Split(';');
foreach (string cookiestr in cookiestrs) {
string[] cookiekv = cookiestr.Trim().Split('=');
var cookie = new Cookie(cookiekv[0], cookiekv[1], "/", ".weibo.com");
request.CookieContainer.Add(cookie);
}
}
public async Task<string> WeiboShortUrl(string url) {
var wsuRequest = WebRequest.Create("http://weibo.com/aj/mblog/video?_wv=5&url=" + url) as HttpWebRequest;
if (wsuRequest == null)
return string.Empty;// ignore file
wsuRequest.UserAgent = userAgent;
InjectWeiboCookie(wsuRequest);
try {
using (var wsuResponse = await wsuRequest.GetResponseAsync() as HttpWebResponse) {
if (wsuResponse.StatusCode != HttpStatusCode.OK)
return string.Empty;
string responseData = new StreamReader(wsuResponse.GetResponseStream()).ReadToEnd();
if (responseData.StartsWith("<!DOCTYPE"))
return "sorry,系统出现错误,请稍后再试。";
var wsuObj = JsonConvert.DeserializeObject<WeiboShartUrlResponse>(responseData);
return wsuObj.data.url;
}
} catch (Exception) {
return string.Empty;
}
}
//http://www.cnblogs.com/e241138/archive/2012/12/16/2820054.html
public async Task<string> AnalysisVideoUrl(string url) {
string weiboShortUrl = await WeiboShortUrl(url);
if (string.IsNullOrEmpty(weiboShortUrl))
return string.Empty;
if (weiboShortUrl.StartsWith("sorry"))
return weiboShortUrl;
string videoUrl = string.Format("http://api.weibo.com/widget/show.jsonp?vers=3&lang=zh-cn&short_url={0}&template_name=embed&source=2292547934", weiboShortUrl);
var videoRequest = WebRequest.Create(videoUrl) as HttpWebRequest;
if (videoRequest == null)
return string.Empty;// ignore file
videoRequest.UserAgent = userAgent;
InjectWeiboCookie(videoRequest);
try {
using (var videoResponse = await videoRequest.GetResponseAsync() as HttpWebResponse) {
if (videoResponse.StatusCode != HttpStatusCode.OK) return string.Empty;
string responseData = new StreamReader(videoResponse.GetResponseStream()).ReadToEnd();
var videoObj = JsonConvert.DeserializeObject<WeiboVideoResponse>(responseData);
return videoObj.result;
}
} catch (WebException) {
return string.Empty;
}
}
public async Task<string> GetWebPageTitle(string url) {
var request = HttpWebRequest.Create(url) as HttpWebRequest;
// If the request wasn't an HTTP request (like a file), ignore it
if (request == null) return string.Empty;
// Use the user's credentials
request.UseDefaultCredentials = true;
// Obtain a response from the server, if there was an error, return nothing
try {
using (var response = await request.GetResponseAsync() as HttpWebResponse) {
if (new List<string>(response.Headers.AllKeys).Contains("Content-Type")) {
if (response.Headers["Content-Type"].StartsWith("text/html")) {
string responseData;
if(response.ContentType.Contains("GBK"))
responseData = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312")).ReadToEnd();
else
responseData = new StreamReader(response.GetResponseStream()).ReadToEnd();
Regex regex = new Regex(@"(?<=<title.*>)([\s\S]*)(?=</title>)", RegexOptions.IgnoreCase);
return regex.Match(responseData).Value.Trim();
}
}
}
} catch (WebException) {
return string.Empty;
}
return string.Empty;
}
private class WeiboShartUrlResponse {
public string code { get; set; }
public string msg { get; set; }
public WeiboShartUrlData data { get; set; }
}
private class WeiboShartUrlData{
public string url { get; set; }
public string title { get; set; }
}
private class WeiboVideoResponse {
public string result { get; set; }
}
}
}