From d6406e9e3d2d6b4133753ebda565499538861acc Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 12 Nov 2019 12:07:27 -0800 Subject: [PATCH 1/2] proxy mode: replay improvements for content not captured via proxy mode - if preflight OPTIONS request, respond directly (don't attempt OPTIONS capture lookup) - if preflight CORS request, ensure response has appropriate CORS headers, even if not captured - wombat: update to latest wombat with updated Date() fixed timezone in proxy mode --- pywb/apps/rewriterapp.py | 20 ++++++++++++++++++++ pywb/apps/wbrequestresponse.py | 8 ++++---- pywb/static/wombatProxyMode.js | 2 +- tests/test_proxy.py | 14 ++++++++++++++ wombat | 2 +- 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 155e4f710..9032588a8 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -326,6 +326,10 @@ def render_content(self, wb_url, kwargs, environ): 'pywb.static_prefix', '/static/') is_proxy = ('wsgiprox.proxy_host' in environ) + # if OPTIONS in proxy mode, just generate the proxy responss + if is_proxy and self.is_preflight(environ): + return WbResponse.options_response(environ) + environ['pywb.host_prefix'] = host_prefix if self.use_js_obj_proxy: @@ -551,6 +555,9 @@ def render_content(self, wb_url, kwargs, environ): response = WbResponse(status_headers, gen) + if is_proxy and environ.get('HTTP_ORIGIN'): + response.add_access_control_headers(environ) + return response def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy): @@ -817,6 +824,19 @@ def is_ajax(self, environ): return False + def is_preflight(self, environ): + if environ.get('REQUEST_METHOD') != 'OPTIONS': + return False + + if not environ.get('HTTP_ORIGIN'): + return False + + if not environ.get('HTTP_ACCESS_CONTROL_REQUEST_METHOD') and not environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'): + return False + + return True + + def get_base_url(self, wb_url, kwargs): type_ = kwargs.get('type') return self.paths[type_].format(**kwargs) diff --git a/pywb/apps/wbrequestresponse.py b/pywb/apps/wbrequestresponse.py index b06045822..b0f0f8d99 100644 --- a/pywb/apps/wbrequestresponse.py +++ b/pywb/apps/wbrequestresponse.py @@ -186,14 +186,14 @@ def add_access_control_headers(self, env=None): allowed_methods = allowed_methods + ', ' + r_method acr_headers = env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS') if acr_headers is not None: - self.status_headers.add_header('Access-Control-Allow-Headers', acr_headers) + self.status_headers.replace_header('Access-Control-Allow-Headers', acr_headers) allowed_origin = env.get('HTTP_ORIGIN', env.get('HTTP_REFERER', allowed_origin)) if allowed_origin is None: allowed_origin = '*' self.status_headers.replace_header('Access-Control-Allow-Origin', allowed_origin) - self.status_headers.add_header('Access-Control-Allow-Methods', allowed_methods) - self.status_headers.add_header('Access-Control-Allow-Credentials', 'true') - self.status_headers.add_header('Access-Control-Max-Age', '1800') + self.status_headers.replace_header('Access-Control-Allow-Methods', allowed_methods) + self.status_headers.replace_header('Access-Control-Allow-Credentials', 'true') + self.status_headers.replace_header('Access-Control-Max-Age', '1800') return self def __repr__(self): diff --git a/pywb/static/wombatProxyMode.js b/pywb/static/wombatProxyMode.js index c6171acdb..462ed7d9a 100644 --- a/pywb/static/wombatProxyMode.js +++ b/pywb/static/wombatProxyMode.js @@ -16,4 +16,4 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with pywb. If not, see . */ -(function(){function autobind(clazz){for(var prop,propValue,proto=clazz.__proto__||clazz.constructor.prototype||clazz.prototype,clazzProps=Object.getOwnPropertyNames(proto),len=clazzProps.length,i=0;i source[srcset], picture > source[data-srcset], picture > source[data-src], video > source[srcset], video > source[data-srcset], video > source[data-src], audio > source[srcset], audio > source[data-srcset], audio > source[data-src]",autobind(this),this._init(config,true)):new AutoFetcherProxyMode(wombat,config)}function WombatLite($wbwindow,wbinfo){return this instanceof WombatLite?void(this.wb_info=wbinfo,this.$wbwindow=$wbwindow,this.wb_info.top_host=this.wb_info.top_host||"*",this.wb_info.wombat_opts=this.wb_info.wombat_opts||{},this.WBAutoFetchWorker=null,this.historyCB=null):new WombatLite($wbwindow,wbinfo)}AutoFetcherProxyMode.prototype._init=function(config,first){var afwpm=this,wombat=this.wombat;if(document.readyState==="complete")return this.styleTag=document.createElement("style"),this.styleTag.id="$wrStyleParser$",this.styleTag.disabled=true,document.head.appendChild(this.styleTag),void(config.isTop?fetch(config.workerURL).then(function(res){res.text().then(function(text){var blob=new Blob([text],{type:"text/javascript"});afwpm.worker=new wombat.$wbwindow.Worker(URL.createObjectURL(blob),{type:"classic",credentials:"include"}),afwpm.startChecking()}).catch(error=>{console.error("Could not create the backing worker for AutoFetchWorkerProxyMode"),console.error(error)})}):(this.worker={postMessage:function(msg){msg.wb_type||(msg={wb_type:"aaworker",msg:msg}),wombat.$wbwindow.top.postMessage(msg,"*")},terminate:function(){}},this.startChecking()));if(first)var i=setInterval(function(){document.readyState==="complete"&&(afwpm._init(config),clearInterval(i))},1e3)},AutoFetcherProxyMode.prototype.startChecking=function(){for(;this.worker&&this.msgQ.length;)this.postMessage(this.msgQ.shift());this.extractFromLocalDoc(),this.mutationObz=new MutationObserver(this.mutationCB),this.mutationObz.observe(document.documentElement,{characterData:false,characterDataOldValue:false,attributes:true,attributeOldValue:true,subtree:true,childList:true,attributeFilter:["src","srcset"]})},AutoFetcherProxyMode.prototype.terminate=function(){this.worker&&this.worker.terminate()},AutoFetcherProxyMode.prototype.justFetch=function(urls){this.postMessage({type:"fetch-all",values:urls})},AutoFetcherProxyMode.prototype.fetchAsPage=function(url,title){if(url){var headers={"X-Wombat-History-Page":url};if(title){var encodedTitle=encodeURIComponent(title.trim());title&&(headers["X-Wombat-History-Title"]=encodedTitle)}var fetchData={url:url,options:{headers:headers,cache:"no-store"}};this.justFetch([fetchData])}},AutoFetcherProxyMode.prototype.postMessage=function(msg){this.worker?this.worker.postMessage(msg):this.msgQ.push(msg)},AutoFetcherProxyMode.prototype.handleMutatedStyleElem=function(elem,accum,text){var checkNode,baseURI=document.baseURI;if(text){if(!elem.parentNode||elem.parentNode.localName!=="style")return;checkNode=elem.parentNode}else checkNode=elem;try{var extractedMedia=this.extractMediaRules(checkNode.sheet,baseURI);if(extractedMedia.length)return void(accum.media=accum.media.concat(extractedMedia))}catch(e){}!text&&checkNode.href&&accum.deferred.push(this.fetchCSSAndExtract(checkNode.href))},AutoFetcherProxyMode.prototype.handleMutatedElem=function(elem,accum){var baseURI=document.baseURI;if(elem.nodeType===Node.TEXT_NODE)return this.handleMutatedStyleElem(elem,accum,true);switch(elem.localName){case"img":case"video":case"audio":case"source":return this.handleDomElement(elem,baseURI,accum);case"style":return this.handleMutatedStyleElem(elem,accum);case"link":if(elem.rel==="stylesheet"||elem.rel==="preload"&&elem.as==="style")return this.handleMutatedStyleElem(elem,accum);}return this.extractSrcSrcsetFrom(elem,baseURI,accum)},AutoFetcherProxyMode.prototype.mutationCB=function(mutationList,observer){for(var accum={type:"values",srcset:[],src:[],media:[],deferred:[]},i=0;i source[srcset], picture > source[data-srcset], picture > source[data-src], video > source[srcset], video > source[data-srcset], video > source[data-src], audio > source[srcset], audio > source[data-srcset], audio > source[data-src]",autobind(this),this._init(config,true)):new AutoFetcherProxyMode(wombat,config)}function WombatLite($wbwindow,wbinfo){return this instanceof WombatLite?void(this.wb_info=wbinfo,this.$wbwindow=$wbwindow,this.wb_info.top_host=this.wb_info.top_host||"*",this.wb_info.wombat_opts=this.wb_info.wombat_opts||{},this.WBAutoFetchWorker=null,this.historyCB=null):new WombatLite($wbwindow,wbinfo)}AutoFetcherProxyMode.prototype._init=function(config,first){var afwpm=this,wombat=this.wombat;if(document.readyState==="complete")return this.styleTag=document.createElement("style"),this.styleTag.id="$wrStyleParser$",this.styleTag.disabled=true,document.head.appendChild(this.styleTag),void(config.isTop?fetch(config.workerURL).then(function(res){res.text().then(function(text){var blob=new Blob([text],{type:"text/javascript"});afwpm.worker=new wombat.$wbwindow.Worker(URL.createObjectURL(blob),{type:"classic",credentials:"include"}),afwpm.startChecking()}).catch(error=>{console.error("Could not create the backing worker for AutoFetchWorkerProxyMode"),console.error(error)})}):(this.worker={postMessage:function(msg){msg.wb_type||(msg={wb_type:"aaworker",msg:msg}),wombat.$wbwindow.top.postMessage(msg,"*")},terminate:function(){}},this.startChecking()));if(first)var i=setInterval(function(){document.readyState==="complete"&&(afwpm._init(config),clearInterval(i))},1e3)},AutoFetcherProxyMode.prototype.startChecking=function(){for(;this.worker&&this.msgQ.length;)this.postMessage(this.msgQ.shift());this.extractFromLocalDoc(),this.mutationObz=new MutationObserver(this.mutationCB),this.mutationObz.observe(document.documentElement,{characterData:false,characterDataOldValue:false,attributes:true,attributeOldValue:true,subtree:true,childList:true,attributeFilter:["src","srcset"]})},AutoFetcherProxyMode.prototype.terminate=function(){this.worker&&this.worker.terminate()},AutoFetcherProxyMode.prototype.justFetch=function(urls){this.postMessage({type:"fetch-all",values:urls})},AutoFetcherProxyMode.prototype.fetchAsPage=function(url,title){if(url){var headers={"X-Wombat-History-Page":url};if(title){var encodedTitle=encodeURIComponent(title.trim());title&&(headers["X-Wombat-History-Title"]=encodedTitle)}var fetchData={url:url,options:{headers:headers,cache:"no-store"}};this.justFetch([fetchData])}},AutoFetcherProxyMode.prototype.postMessage=function(msg){this.worker?this.worker.postMessage(msg):this.msgQ.push(msg)},AutoFetcherProxyMode.prototype.handleMutatedStyleElem=function(elem,accum,text){var checkNode,baseURI=document.baseURI;if(text){if(!elem.parentNode||elem.parentNode.localName!=="style")return;checkNode=elem.parentNode}else checkNode=elem;try{var extractedMedia=this.extractMediaRules(checkNode.sheet,baseURI);if(extractedMedia.length)return void(accum.media=accum.media.concat(extractedMedia))}catch(e){}!text&&checkNode.href&&accum.deferred.push(this.fetchCSSAndExtract(checkNode.href))},AutoFetcherProxyMode.prototype.handleMutatedElem=function(elem,accum){var baseURI=document.baseURI;if(elem.nodeType===Node.TEXT_NODE)return this.handleMutatedStyleElem(elem,accum,true);switch(elem.localName){case"img":case"video":case"audio":case"source":return this.handleDomElement(elem,baseURI,accum);case"style":return this.handleMutatedStyleElem(elem,accum);case"link":if(elem.rel==="stylesheet"||elem.rel==="preload"&&elem.as==="style")return this.handleMutatedStyleElem(elem,accum);}return this.extractSrcSrcsetFrom(elem,baseURI,accum)},AutoFetcherProxyMode.prototype.mutationCB=function(mutationList,observer){for(var accum={type:"values",srcset:[],src:[],media:[],deferred:[]},i=0;i Date: Tue, 12 Nov 2019 12:39:55 -0800 Subject: [PATCH 2/2] bump version to 2.4.0rc3 --- pywb/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pywb/version.py b/pywb/version.py index 9cd4096bb..cce7cd754 100644 --- a/pywb/version.py +++ b/pywb/version.py @@ -1,4 +1,4 @@ -__version__ = '2.4.0rc0' +__version__ = '2.4.0rc3' if __name__ == '__main__': print(__version__)