Skip to content

Commit

Permalink
Get and replace cookie
Browse files Browse the repository at this point in the history
  • Loading branch information
NaiboWang committed Jul 5, 2023
1 parent cbf48d4 commit a0dbaea
Show file tree
Hide file tree
Showing 13 changed files with 73 additions and 11 deletions.
Binary file modified ElectronJS/EasySpider_en.crx
Binary file not shown.
Binary file modified ElectronJS/EasySpider_zh.crx
Binary file not shown.
12 changes: 11 additions & 1 deletion ElectronJS/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,17 @@ async function beginInvoke(msg, ws) {
try{
flowchart_window.openDevTools();
} catch {

console.log("open devtools error");
}
} else if (msg.type == 7) {
// 获得当前页面Cookies
try{
let cookies = await driver.manage().getCookies();
console.log("Cookies: ", cookies);
let cookiesText = cookies.map(cookie => `${cookie.name}=${cookie.value}`).join('\n');
socket_flowchart.send(JSON.stringify({"type": "GetCookies", "message": cookiesText}));
} catch {
console.log("Cannot get Cookies");
}
}
}
Expand Down
17 changes: 17 additions & 0 deletions ElectronJS/src/taskGrid/FlowChart_CN.html
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,23 @@ <h4 class="modal-title">等价XPath</h4>
<input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['scrollCount']" type="number" required></input>
<label>滚动后等待时间(秒):</label>
<input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['scrollWaitTime']" type="number" required></input>

<p style="margin-top: 10px">
<a class="btn btn-primary" data-toggle="collapse" href="#collapseOpenPage" role="button" aria-expanded="false" aria-controls="collapseExample">
点此展开/折叠高级操作
</a>
</p>
<div :class="{collapse: true, 'show': nowNode['parameters']['cookies'].length!=0}" id="collapseOpenPage">
<div>
<label>加载后设置Cookies: </label>
<p style="margin-bottom: 20px;color:white"><a class="btn btn-primary" @click="getCookies">
点击获取当前页面Cookie
</a></p>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
placeholder='key=value形式,每行一个键值对' v-model='nowNode["parameters"]["cookies"]' id="pageCookies" style="font-size: 14px!important;"></textarea>
</div>
</div>

</div>

<div class="elements" v-if="nodeType==2">
Expand Down
13 changes: 11 additions & 2 deletions ElectronJS/src/taskGrid/FlowChart_CN.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,15 @@ let app = new Vue({
},
},
methods: {
getCookies: function() { //获取cookies
let command = new WebSocket("ws://localhost:"+getUrlParam("wsport"))
command.onopen = function() {
let message = {
type: 7, //消息类型,0代表连接操作
};
this.send(JSON.stringify(message));
};
},
changeXPaths: function (XPaths){
let result = "";
for (let i = 0; i < XPaths.length; i++) {
Expand Down Expand Up @@ -588,7 +597,7 @@ document.onkeydown = function(e) {
location.reload();
} else if (currKey == 123) {
console.log("打开devtools")
let command = new WebSocket("ws://localhost:8084")
let command = new WebSocket("ws://localhost:"+getUrlParam("wsport"))
command.onopen = function() {
let message = {
type: 6, //消息类型,0代表连接操作
Expand All @@ -604,4 +613,4 @@ function inputDelete(e) {
e.stopPropagation(); //输入框按delete应该正常运行
//Electron中如果有alert或者confirm,执行后会卡死输入框,所以最好不要用
}
}
}
2 changes: 1 addition & 1 deletion ElectronJS/src/taskGrid/logic.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ function handleAddElement(msg) {
addElement(1, msg);
} else if (msg["type"] == "singleClick") {
addElement(2, msg);
} else if (msg["type"] == "InputText") {
} else if (msg["type"] == "inputText") {
addElement(4, msg);
} else if (msg["type"] == "changeOption"){
addElement(6, msg);
Expand Down
19 changes: 15 additions & 4 deletions ElectronJS/src/taskGrid/logic_CN.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ function handleAddElement(msg) {
addElement(1, msg);
} else if (msg["type"] == "singleClick") {
addElement(2, msg);
} else if (msg["type"] == "InputText") {
} else if (msg["type"] == "inputText") {
addElement(4, msg);
} else if (msg["type"] == "changeOption"){
addElement(6, msg);
Expand Down Expand Up @@ -96,6 +96,14 @@ function handleAddElement(msg) {
addElement(8, msg);
addElement(3, msg);
notifyParameterNum(msg["parameters"].length); //通知浏览器端参数的个数变化
} else if(msg["type"] == "GetCookies"){
for(let node of nodeList){
if(node["option"] == 1){
node["parameters"]["cookies"] = msg["message"];
$("#pageCookies").val(msg["message"]);
break;
}
}
}
}

Expand Down Expand Up @@ -156,6 +164,7 @@ function addParameters(t) {
t["parameters"]["scrollType"] = 0; //滚动类型,0不滚动,1向下滚动1屏,2滚动到底部
t["parameters"]["scrollCount"] = 1; //滚动次数
t["parameters"]["scrollWaitTime"] = 1; //滚动后等待时间
t["parameters"]["cookies"] = ""; //cookies
} else if (t.option == 2) { //点击元素
t["parameters"]["scrollType"] = 0; //滚动类型,0不滚动,1向下滚动1屏,2滚动到底部
t["parameters"]["scrollCount"] = 1; //滚动次数
Expand Down Expand Up @@ -254,8 +263,7 @@ function modifyParameters(t, para) {
}
}

//点击确定按钮时的处理
$("#confirm").mousedown(function() {
function updateUI() {
refresh(false);
app.$data.nowArrow["num"]++; //改变元素的值,通知画图,重新对锚点画图
let tnodes = document.getElementsByClassName("clk");
Expand All @@ -268,7 +276,10 @@ $("#confirm").mousedown(function() {
break;
}
}
});
}

//点击确定按钮时的处理
$("#confirm").mousedown(updateUI);

//获取url中的参数
function getUrlParam(name) {
Expand Down
1 change: 1 addition & 0 deletions ElectronJS/tasks/141.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":141,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/5/2023, 11:13:03 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":"ipLoc-djd=53283-53456-0-0; areaId=53283; mba_sid=16885699655799419528717331774.0; __jdc=122270672; 3AB9D23F7A4B3C9B=3EHIRO46HKXUNNSA7AFBSLZLZ6ICUQG3NUT5VTWFZFUBTRI5ZUXV6XM5CYB5VWCYC6YLWOOIUQAUILWMLGTQWCRMBA; __jdb=122270672.1.16885699655751091362768|1.1688569965; mba_muid=16885699655751091362768; __jdv=122270672%7Clocalhost%3A8074%7C-%7Creferral%7C-%7C1688569965576; __jda=122270672.16885699655751091362768.1688569965.1688569965.1688569965.1"}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG LeftSide_menu_hover__OCHiO']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"],"exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"wm0dfnp2tjeljpuyon0","iframe":false,"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
1 change: 1 addition & 0 deletions ElectronJS/tasks/142.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":142,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/6/2023, 3:38:35 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":"test=123\nipLoc-djd=53283-53456-0-0\nareaId=53283\nmba_sid=16885856346417163685425076773.0\n__jdc=122270672\n__jdb=122270672.1.16885856346381587112207|1.1688585634\nmba_muid=16885856346381587112207\n__jdv=122270672%7Clocalhost%3A8074%7C-%7Creferral%7C-%7C1688585634639\n__jda=122270672.16885856346381587112207.1688585634.1688585634.1688585634.1"}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"p2h2i1dva8ljq4aje2","iframe":false,"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
2 changes: 1 addition & 1 deletion ExecuteStage/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true,
"args": ["--id", "[1]", "--read_type", "remote", "--headless", "0"]
"args": ["--id", "[3]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
// "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"]
}
Expand Down
14 changes: 13 additions & 1 deletion ExecuteStage/easyspider_executestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ def preprocess(self):
iframe = node["parameters"]["iframe"]
except:
node["parameters"]["iframe"] = False
if node["option"] == 1: # 打开网页操作
try:
cookies = node["parameters"]["cookies"]
except:
node["parameters"]["cookies"] = ""
if node["option"] == 3: # 提取数据操作
paras = node["parameters"]["paras"]
for para in paras:
Expand Down Expand Up @@ -705,6 +710,14 @@ def openPage(self, para, loopValue):
self.browser.set_page_load_timeout(maxWaitTime) # 加载页面最大超时时间
self.browser.set_script_timeout(maxWaitTime)
self.browser.get(url)
if para["cookies"] != "":
self.browser.delete_all_cookies() # 清除所有已有cookie
cookies = para["cookies"].split('\n')
for cookie in cookies:
name, value = cookie.split('=', 1)
cookie_dict = {'name': name, 'value': value}
# 加载 cookie
self.browser.add_cookie(cookie_dict)
self.Log('Loading page: ' + url)
self.recordLog('Loading page: ' + url)
except TimeoutException:
Expand Down Expand Up @@ -1001,7 +1014,6 @@ def get_content(self, p, element):
return content

# 提取数据事件

def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
pageHTML = etree.HTML(self.browser.page_source)
if loopElement != "": # 只在数据在循环中提取时才需要获取循环元素
Expand Down
1 change: 1 addition & 0 deletions ExecuteStage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def isnull(s):
return len(s) != 0



class Time:
def __init__(self, type1=""):
self.t = int(round(time.time() * 1000))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ global.ws.onopen = function() {

export function input(value) {
let message = {
"type": "InputText",
"type": "inputText",
"history": history.length, //记录history的长度
"tabIndex": -1,
"xpath": readXPath(global.nodeList[0]["node"], 0),
Expand Down

0 comments on commit a0dbaea

Please sign in to comment.