From f0795567f2bce3bd6d9c3bfea4a8171b8e78923b Mon Sep 17 00:00:00 2001 From: ddio Date: Tue, 26 Oct 2021 12:53:45 +0800 Subject: [PATCH 1/4] feat: support min_monthly_price & switch to pipenv, #89 --- Pipfile | 13 +- Pipfile.lock | 376 ++++++++++++------ .../migrations/0008_support_price_range.py | 23 ++ backend/rental/models.py | 1 + requirements.txt | 40 -- 5 files changed, 277 insertions(+), 176 deletions(-) create mode 100644 backend/rental/migrations/0008_support_price_range.py delete mode 100644 requirements.txt diff --git a/Pipfile b/Pipfile index 96321a4f..710522f7 100644 --- a/Pipfile +++ b/Pipfile @@ -4,13 +4,16 @@ url = "https://pypi.org/simple" verify_ssl = true [dev-packages] +pylint-django = "*" +pylint = "*" +pylint-plugin-utils = "*" [packages] asn1crypto = "==0.24.0" astroid = "==2.0" attrs = "==17.4.0" beautifulsoup4 = "==4.6.3" -cffi = "==1.11.5" +cffi = "==1.13.2" constantly = "==15.1.0" cryptography = "==2.3" cssselect = "==1.0.3" @@ -27,24 +30,20 @@ parsel = "==1.4.0" pyasn1 = "==0.4.2" pyasn1-modules = "==0.2.1" pycparser = "==2.18" -pylint = "==2.0.0" -pylint-django = "==2.0.2" -pylint-plugin-utils = "==0.4" pytz = "==2018.5" queuelib = "==1.5.0" raven = "==6.9.0" service-identity = "==17.0.0" six = "==1.11.0" -typed-ast = "==1.1.0" +typed-ast = "==1.4.2" w3lib = "==1.19.0" wrapt = "==1.10.11" Automat = "==0.6.0" PyDispatcher = "==2.0.5" pyOpenSSL = "==17.5.0" -Scrapy = "==1.5.0" Twisted = "==17.9.0" "zope.interface" = "==4.4.3" -psycopg2-binary = "==2.7.5" +psycopg2-binary = "*" [requires] python_version = "3" diff --git a/Pipfile.lock b/Pipfile.lock index 820f77bb..8c4b5cce 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "806556625c1177fd8cb6440681a2961bd8ef7b6b259c0693848d83baa0b6e4f3" + "sha256": "dec808d6c7c83e9036c025b3ae3ffd59b3ee83cbc677bc5f81362360c70db0e5" }, "pipfile-spec": 6, "requires": { @@ -59,41 +59,42 @@ }, "cffi": { "hashes": [ - "sha256:151b7eefd035c56b2b2e1eb9963c90c6302dc15fbd8c1c0a83a163ff2c7d7743", - "sha256:1553d1e99f035ace1c0544050622b7bc963374a00c467edafac50ad7bd276aef", - "sha256:1b0493c091a1898f1136e3f4f991a784437fac3673780ff9de3bcf46c80b6b50", - "sha256:2ba8a45822b7aee805ab49abfe7eec16b90587f7f26df20c71dd89e45a97076f", - "sha256:3bb6bd7266598f318063e584378b8e27c67de998a43362e8fce664c54ee52d30", - "sha256:3c85641778460581c42924384f5e68076d724ceac0f267d66c757f7535069c93", - "sha256:3eb6434197633b7748cea30bf0ba9f66727cdce45117a712b29a443943733257", - "sha256:495c5c2d43bf6cebe0178eb3e88f9c4aa48d8934aa6e3cddb865c058da76756b", - "sha256:4c91af6e967c2015729d3e69c2e51d92f9898c330d6a851bf8f121236f3defd3", - "sha256:57b2533356cb2d8fac1555815929f7f5f14d68ac77b085d2326b571310f34f6e", - "sha256:770f3782b31f50b68627e22f91cb182c48c47c02eb405fd689472aa7b7aa16dc", - "sha256:79f9b6f7c46ae1f8ded75f68cf8ad50e5729ed4d590c74840471fc2823457d04", - "sha256:7a33145e04d44ce95bcd71e522b478d282ad0eafaf34fe1ec5bbd73e662f22b6", - "sha256:857959354ae3a6fa3da6651b966d13b0a8bed6bbc87a0de7b38a549db1d2a359", - "sha256:87f37fe5130574ff76c17cab61e7d2538a16f843bb7bca8ebbc4b12de3078596", - "sha256:95d5251e4b5ca00061f9d9f3d6fe537247e145a8524ae9fd30a2f8fbce993b5b", - "sha256:9d1d3e63a4afdc29bd76ce6aa9d58c771cd1599fbba8cf5057e7860b203710dd", - "sha256:a36c5c154f9d42ec176e6e620cb0dd275744aa1d804786a71ac37dc3661a5e95", - "sha256:a6a5cb8809091ec9ac03edde9304b3ad82ad4466333432b16d78ef40e0cce0d5", - "sha256:ae5e35a2c189d397b91034642cb0eab0e346f776ec2eb44a49a459e6615d6e2e", - "sha256:b0f7d4a3df8f06cf49f9f121bead236e328074de6449866515cea4907bbc63d6", - "sha256:b75110fb114fa366b29a027d0c9be3709579602ae111ff61674d28c93606acca", - "sha256:ba5e697569f84b13640c9e193170e89c13c6244c24400fc57e88724ef610cd31", - "sha256:be2a9b390f77fd7676d80bc3cdc4f8edb940d8c198ed2d8c0be1319018c778e1", - "sha256:ca1bd81f40adc59011f58159e4aa6445fc585a32bb8ac9badf7a2c1aa23822f2", - "sha256:d5d8555d9bfc3f02385c1c37e9f998e2011f0db4f90e250e5bc0c0a85a813085", - "sha256:e55e22ac0a30023426564b1059b035973ec82186ddddbac867078435801c7801", - "sha256:e90f17980e6ab0f3c2f3730e56d1fe9bcba1891eeea58966e89d352492cc74f4", - "sha256:ecbb7b01409e9b782df5ded849c178a0aa7c906cf8c5a67368047daab282b184", - "sha256:ed01918d545a38998bfa5902c7c00e0fee90e957ce036a4000a88e3fe2264917", - "sha256:edabd457cd23a02965166026fd9bfd196f4324fe6032e866d0f3bd0301cd486f", - "sha256:fdf1c1dc5bafc32bc5d08b054f94d659422b05aba244d6be4ddc1c72d9aa70fb" - ], - "index": "pypi", - "version": "==1.11.5" + "sha256:0b49274afc941c626b605fb59b59c3485c17dc776dc3cc7cc14aca74cc19cc42", + "sha256:0e3ea92942cb1168e38c05c1d56b0527ce31f1a370f6117f1d490b8dcd6b3a04", + "sha256:135f69aecbf4517d5b3d6429207b2dff49c876be724ac0c8bf8e1ea99df3d7e5", + "sha256:19db0cdd6e516f13329cba4903368bff9bb5a9331d3410b1b448daaadc495e54", + "sha256:2781e9ad0e9d47173c0093321bb5435a9dfae0ed6a762aabafa13108f5f7b2ba", + "sha256:291f7c42e21d72144bb1c1b2e825ec60f46d0a7468f5346841860454c7aa8f57", + "sha256:2c5e309ec482556397cb21ede0350c5e82f0eb2621de04b2633588d118da4396", + "sha256:2e9c80a8c3344a92cb04661115898a9129c074f7ab82011ef4b612f645939f12", + "sha256:32a262e2b90ffcfdd97c7a5e24a6012a43c61f1f5a57789ad80af1d26c6acd97", + "sha256:3c9fff570f13480b201e9ab69453108f6d98244a7f495e91b6c654a47486ba43", + "sha256:415bdc7ca8c1c634a6d7163d43fb0ea885a07e9618a64bda407e04b04333b7db", + "sha256:42194f54c11abc8583417a7cf4eaff544ce0de8187abaf5d29029c91b1725ad3", + "sha256:4424e42199e86b21fc4db83bd76909a6fc2a2aefb352cb5414833c030f6ed71b", + "sha256:4a43c91840bda5f55249413037b7a9b79c90b1184ed504883b72c4df70778579", + "sha256:599a1e8ff057ac530c9ad1778293c665cb81a791421f46922d80a86473c13346", + "sha256:5c4fae4e9cdd18c82ba3a134be256e98dc0596af1e7285a3d2602c97dcfa5159", + "sha256:5ecfa867dea6fabe2a58f03ac9186ea64da1386af2159196da51c4904e11d652", + "sha256:62f2578358d3a92e4ab2d830cd1c2049c9c0d0e6d3c58322993cc341bdeac22e", + "sha256:6471a82d5abea994e38d2c2abc77164b4f7fbaaf80261cb98394d5793f11b12a", + "sha256:6d4f18483d040e18546108eb13b1dfa1000a089bcf8529e30346116ea6240506", + "sha256:71a608532ab3bd26223c8d841dde43f3516aa5d2bf37b50ac410bb5e99053e8f", + "sha256:74a1d8c85fb6ff0b30fbfa8ad0ac23cd601a138f7509dc617ebc65ef305bb98d", + "sha256:7b93a885bb13073afb0aa73ad82059a4c41f4b7d8eb8368980448b52d4c7dc2c", + "sha256:7d4751da932caaec419d514eaa4215eaf14b612cff66398dd51129ac22680b20", + "sha256:7f627141a26b551bdebbc4855c1157feeef18241b4b8366ed22a5c7d672ef858", + "sha256:8169cf44dd8f9071b2b9248c35fc35e8677451c52f795daa2bb4643f32a540bc", + "sha256:aa00d66c0fab27373ae44ae26a66a9e43ff2a678bf63a9c7c1a9a4d61172827a", + "sha256:ccb032fda0873254380aa2bfad2582aedc2959186cce61e3a17abc1a55ff89c3", + "sha256:d754f39e0d1603b5b24a7f8484b22d2904fa551fe865fd0d4c3332f078d20d4e", + "sha256:d75c461e20e29afc0aee7172a0950157c704ff0dd51613506bd7d82b718e7410", + "sha256:dcd65317dd15bc0451f3e01c80da2216a31916bdcffd6221ca1202d96584aa25", + "sha256:e570d3ab32e2c2861c4ebe6ffcad6a8abf9347432a37608fe1fbd157b3f0036b", + "sha256:fd43a88e045cf992ed09fa724b5315b790525f2676883a6ea64e3263bae6549d" + ], + "index": "pypi", + "version": "==1.13.2" }, "constantly": { "hashes": [ @@ -272,52 +273,76 @@ }, "psycopg2-binary": { "hashes": [ - "sha256:04afb59bbbd2eab3148e6816beddc74348078b8c02a1113ea7f7822f5be4afe3", - "sha256:098b18f4d8857a8f9b206d1dc54db56c2255d5d26458917e7bcad61ebfe4338f", - "sha256:0bf855d4a7083e20ead961fda4923887094eaeace0ab2d76eb4aa300f4bbf5bd", - "sha256:197dda3ffd02057820be83fe4d84529ea70bf39a9a4daee1d20ffc74eb3d042e", - "sha256:278ef63afb4b3d842b4609f2c05ffbfb76795cf6a184deeb8707cd5ed3c981a5", - "sha256:3cbf8c4fc8f22f0817220891cf405831559f4d4c12c4f73913730a2ea6c47a47", - "sha256:4305aed922c4d9d6163ab3a41d80b5a1cfab54917467da8168552c42cad84d32", - "sha256:47ee296f704fb8b2a616dec691cdcfd5fa0f11943955e88faa98cbd1dc3b3e3d", - "sha256:4a0e38cb30457e70580903367161173d4a7d1381eb2f2cfe4e69b7806623f484", - "sha256:4d6c294c6638a71cafb82a37f182f24321f1163b08b5d5ca076e11fe838a3086", - "sha256:4f3233c366500730f839f92833194fd8f9a5c4529c8cd8040aa162c3740de8e5", - "sha256:5221f5a3f4ca2ddf0d58e8b8a32ca50948be9a43351fda797eb4e72d7a7aa34d", - "sha256:5c6ca0b507540a11eaf9e77dee4f07c131c2ec80ca0cffa146671bf690bc1c02", - "sha256:789bd89d71d704db2b3d5e67d6d518b158985d791d3b2dec5ab85457cfc9677b", - "sha256:7b94d29239efeaa6a967f3b5971bd0518d2a24edd1511edbf4a2c8b815220d07", - "sha256:89bc65ef3301c74cf32db25334421ea6adbe8f65601ea45dcaaf095abed910bb", - "sha256:89d6d3a549f405c20c9ae4dc94d7ed2de2fa77427a470674490a622070732e62", - "sha256:97521704ac7127d7d8ba22877da3c7bf4a40366587d238ec679ff38e33177498", - "sha256:a395b62d5f44ff6f633231abe568e2203b8fabf9797cd6386aa92497df912d9a", - "sha256:a6d32c37f714c3f34158f3fa659f3a8f2658d5f53c4297d45579b9677cc4d852", - "sha256:a89ee5c26f72f2d0d74b991ce49e42ddeb4ac0dc2d8c06a0f2770a1ab48f4fe0", - "sha256:b4c8b0ef3608e59317bfc501df84a61e48b5445d45f24d0391a24802de5f2d84", - "sha256:b5fcf07140219a1f71e18486b8dc28e2e1b76a441c19374805c617aa6d9a9d55", - "sha256:b86f527f00956ecebad6ab3bb30e3a75fedf1160a8716978dd8ce7adddedd86f", - "sha256:be4c4aa22ba22f70de36c98b06480e2f1697972d49eb20d525f400d204a6d272", - "sha256:c2ac7aa1a144d4e0e613ac7286dae85671e99fe7a1353954d4905629c36b811c", - "sha256:de26ef4787b5e778e8223913a3e50368b44e7480f83c76df1f51d23bd21cea16", - "sha256:e70ebcfc5372dc7b699c0110454fc4263967f30c55454397e5769eb72c0eb0ce", - "sha256:eadbd32b6bc48b67b0457fccc94c86f7ccc8178ab839f684eb285bb592dc143e", - "sha256:ecbc6dfff6db06b8b72ae8a2f25ff20fbdcb83cb543811a08f7cb555042aa729" - ], - "index": "pypi", - "version": "==2.7.5" + "sha256:0b7dae87f0b729922e06f85f667de7bf16455d411971b2043bbd9577af9d1975", + "sha256:0f2e04bd2a2ab54fa44ee67fe2d002bb90cee1c0f1cc0ebc3148af7b02034cbd", + "sha256:123c3fb684e9abfc47218d3784c7b4c47c8587951ea4dd5bc38b6636ac57f616", + "sha256:1473c0215b0613dd938db54a653f68251a45a78b05f6fc21af4326f40e8360a2", + "sha256:14db1752acdd2187d99cb2ca0a1a6dfe57fc65c3281e0f20e597aac8d2a5bd90", + "sha256:1e3a362790edc0a365385b1ac4cc0acc429a0c0d662d829a50b6ce743ae61b5a", + "sha256:1e85b74cbbb3056e3656f1cc4781294df03383127a8114cbc6531e8b8367bf1e", + "sha256:20f1ab44d8c352074e2d7ca67dc00843067788791be373e67a0911998787ce7d", + "sha256:24b0b6688b9f31a911f2361fe818492650795c9e5d3a1bc647acbd7440142a4f", + "sha256:2f62c207d1740b0bde5c4e949f857b044818f734a3d57f1d0d0edc65050532ed", + "sha256:3242b9619de955ab44581a03a64bdd7d5e470cc4183e8fcadd85ab9d3756ce7a", + "sha256:35c4310f8febe41f442d3c65066ca93cccefd75013df3d8c736c5b93ec288140", + "sha256:4235f9d5ddcab0b8dbd723dca56ea2922b485ea00e1dafacf33b0c7e840b3d32", + "sha256:542875f62bc56e91c6eac05a0deadeae20e1730be4c6334d8f04c944fcd99759", + "sha256:5ced67f1e34e1a450cdb48eb53ca73b60aa0af21c46b9b35ac3e581cf9f00e31", + "sha256:661509f51531ec125e52357a489ea3806640d0ca37d9dada461ffc69ee1e7b6e", + "sha256:7360647ea04db2e7dff1648d1da825c8cf68dc5fbd80b8fb5b3ee9f068dcd21a", + "sha256:736b8797b58febabb85494142c627bd182b50d2a7ec65322983e71065ad3034c", + "sha256:8c13d72ed6af7fd2c8acbd95661cf9477f94e381fce0792c04981a8283b52917", + "sha256:988b47ac70d204aed01589ed342303da7c4d84b56c2f4c4b8b00deda123372bf", + "sha256:995fc41ebda5a7a663a254a1dcac52638c3e847f48307b5416ee373da15075d7", + "sha256:a36c7eb6152ba5467fb264d73844877be8b0847874d4822b7cf2d3c0cb8cdcb0", + "sha256:aed4a9a7e3221b3e252c39d0bf794c438dc5453bc2963e8befe9d4cd324dff72", + "sha256:aef9aee84ec78af51107181d02fe8773b100b01c5dfde351184ad9223eab3698", + "sha256:b0221ca5a9837e040ebf61f48899926b5783668b7807419e4adae8175a31f773", + "sha256:b4d7679a08fea64573c969f6994a2631908bb2c0e69a7235648642f3d2e39a68", + "sha256:c250a7ec489b652c892e4f0a5d122cc14c3780f9f643e1a326754aedf82d9a76", + "sha256:ca86db5b561b894f9e5f115d6a159fff2a2570a652e07889d8a383b5fae66eb4", + "sha256:cfc523edecddaef56f6740d7de1ce24a2fdf94fd5e704091856a201872e37f9f", + "sha256:d92272c7c16e105788efe2cfa5d680f07e34e0c29b03c1908f8636f55d5f915a", + "sha256:da113b70f6ec40e7d81b43d1b139b9db6a05727ab8be1ee559f3a69854a69d34", + "sha256:f6fac64a38f6768e7bc7b035b9e10d8a538a9fadce06b983fb3e6fa55ac5f5ce", + "sha256:f8559617b1fcf59a9aedba2c9838b5b6aa211ffedecabca412b92a1ff75aac1a", + "sha256:fbb42a541b1093385a2d8c7eec94d26d30437d0e77c1d25dae1dcc46741a385e" + ], + "index": "pypi", + "version": "==2.9.1" }, "pyasn1": { "hashes": [ + "sha256:0d7f6e959fe53f3960a23d73f35e1fce61348b30915b6664309ca756de7c1f89", + "sha256:5a0db897b311d265cde49615cf783f1c78613138605cdd0f907ecfa5b2aba3ee", + "sha256:758cb50abddc03e4563fd9e7f03db56e3e87b58c0bd01247360326e5c0c7ffa5", + "sha256:7d626683e3d792cccc608da02498aff37ab4f3dafd8905d6bf755d11f9b26b43", + "sha256:a7efe807c4b83a859e2735c692b92ed7b567cfddc4163763412920041d876c2b", + "sha256:b5a9ca48055b9a20f6d1b3d68e38692e5431c86a0f99ea602e61294e891fee5b", + "sha256:c07d6e587b2f928366b1f67c09bda026a3e6fcc99e80a744dc67f8fca3895626", "sha256:d258b0a71994f7770599835249cece1caef3c70def868c4915e6e5ca49b67d15", - "sha256:d5cd6ed995dba16fad0c521cfe31cd2d68400b53fcc2bce93326829be73ab6d1" + "sha256:d5cd6ed995dba16fad0c521cfe31cd2d68400b53fcc2bce93326829be73ab6d1", + "sha256:d84c2aea3cf43780e9e6a19f4e4dddee9f6976519020e64e47c57e5c7a8c3dd2", + "sha256:e85895087905c65b5b594eb91f7522664c85545b147d5f4d4e7b1b07da8dcbdc", + "sha256:f81c96761fca60d64b1c9b79ec2e40cf9495a745cf570613079ef324aeb9672b" ], "index": "pypi", "version": "==0.4.2" }, "pyasn1-modules": { "hashes": [ + "sha256:041e9fbafac548d095f5b6c3b328b80792f006196e15a232b731a83c93d59493", + "sha256:0cdca76a68dcb701fff58c397de0ef9922b472b1cb3ea9695ca19d03f1869787", + "sha256:0cea139045c38f84abaa803bcb4b5e8775ea12a42af10019d942f227acc426c3", + "sha256:0f2e50d20bc670be170966638fa0ae603f0bc9ed6ebe8e97a6d1d4cef30cc889", "sha256:47fb6757ab78fe966e7c58b2030b546854f78416d653163f0ce9290cf2278e8b", - "sha256:af00ea8f2022b6287dc375b2c70f31ab5af83989fc6fe9eacd4976ce26cd7ccc" + "sha256:598a6004ec26a8ab40a39ea955068cf2a3949ad9c0030da970f2e1ca4c9f1cc9", + "sha256:72fd8b0c11191da088147c6e4678ec53e573923ecf60b57eeac9e97433e09fc2", + "sha256:854700bbdd01394e2ada9c1bfbd0ed9f5d0c551350dbbd023e88b11d2771ae06", + "sha256:af00ea8f2022b6287dc375b2c70f31ab5af83989fc6fe9eacd4976ce26cd7ccc", + "sha256:b1f395cae2d669e0830cb023aa86f9f283b7a9aa32317d7f80d8e78aa2745812", + "sha256:c6747146e95d2b14cc2a8399b2b0bde3f93778f8f9ec704690d2b589c376c137", + "sha256:f53fe5bcebdf318f51399b250fe8325ef3a26d927f012cc0c8e0f9e9af7f9deb" ], "index": "pypi", "version": "==0.2.1" @@ -337,29 +362,6 @@ "index": "pypi", "version": "==2.0.5" }, - "pylint": { - "hashes": [ - "sha256:248a7b19138b22e6390cba71adc0cb03ac6dd75a25d3544f03ea1728fa20e8f4", - "sha256:9cd70527ef3b099543eeabeb5c80ff325d86b477aa2b3d49e264e12d12153bc8" - ], - "index": "pypi", - "version": "==2.0.0" - }, - "pylint-django": { - "hashes": [ - "sha256:5dc5f85caef2c5f9e61622b9cbd89d94edd3dcf546939b2974d18de4fa90d676", - "sha256:bf313f10b68ed915a34f0f475cc9ff8c7f574a95302beb48b79c5993f7efd84c" - ], - "index": "pypi", - "version": "==2.0.2" - }, - "pylint-plugin-utils": { - "hashes": [ - "sha256:8ad25a82bcce390d1d6b7c006c123e0cb18051839c9df7b8bdb7823c53fe676e" - ], - "index": "pypi", - "version": "==0.4" - }, "pyopenssl": { "hashes": [ "sha256:07a2de1a54de07448732a81e38a55df7da109b2f47f599f8bb35b0cbec69d4bd", @@ -392,14 +394,6 @@ "index": "pypi", "version": "==6.9.0" }, - "scrapy": { - "hashes": [ - "sha256:08d86737c560dcc1c4b73ac0ac5bd8d14b3e2265c1f7b195f0b73ab13741fe03", - "sha256:31a0bf05d43198afaf3acfb9b4fb0c09c1d7d7ff641e58c66e36117f26c4b755" - ], - "index": "pypi", - "version": "==1.5.0" - }, "service-identity": { "hashes": [ "sha256:0e76f3c042cc0f5c7e6da002cf646f59dc4023962d1d1166343ce53bdad39e17", @@ -427,32 +421,39 @@ }, "typed-ast": { "hashes": [ - "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58", - "sha256:10703d3cec8dcd9eef5a630a04056bbc898abc19bac5691612acba7d1325b66d", - "sha256:1f6c4bd0bdc0f14246fd41262df7dfc018d65bb05f6e16390b7ea26ca454a291", - "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a", - "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9", - "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892", - "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9", - "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded", - "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa", - "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe", - "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd", - "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85", - "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6", - "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46", - "sha256:898f818399cafcdb93cbbe15fc83a33d05f18e29fb498ddc09b0214cdfc7cd51", - "sha256:94b091dc0f19291adcb279a108f5d38de2430411068b219f41b343c03b28fb1f", - "sha256:a26863198902cda15ab4503991e8cf1ca874219e0118cbf07c126bce7c4db129", - "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c", - "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea", - "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863", - "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559", - "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87", - "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6" - ], - "index": "pypi", - "version": "==1.1.0" + "sha256:07d49388d5bf7e863f7fa2f124b1b1d89d8aa0e2f7812faff0a5658c01c59aa1", + "sha256:14bf1522cdee369e8f5581238edac09150c765ec1cb33615855889cf33dcb92d", + "sha256:240296b27397e4e37874abb1df2a608a92df85cf3e2a04d0d4d61055c8305ba6", + "sha256:36d829b31ab67d6fcb30e185ec996e1f72b892255a745d3a82138c97d21ed1cd", + "sha256:37f48d46d733d57cc70fd5f30572d11ab8ed92da6e6b28e024e4a3edfb456e37", + "sha256:4c790331247081ea7c632a76d5b2a265e6d325ecd3179d06e9cf8d46d90dd151", + "sha256:5dcfc2e264bd8a1db8b11a892bd1647154ce03eeba94b461effe68790d8b8e07", + "sha256:7147e2a76c75f0f64c4319886e7639e490fee87c9d25cb1d4faef1d8cf83a440", + "sha256:7703620125e4fb79b64aa52427ec192822e9f45d37d4b6625ab37ef403e1df70", + "sha256:8368f83e93c7156ccd40e49a783a6a6850ca25b556c0fa0240ed0f659d2fe496", + "sha256:84aa6223d71012c68d577c83f4e7db50d11d6b1399a9c779046d75e24bed74ea", + "sha256:85f95aa97a35bdb2f2f7d10ec5bbdac0aeb9dafdaf88e17492da0504de2e6400", + "sha256:8db0e856712f79c45956da0c9a40ca4246abc3485ae0d7ecc86a20f5e4c09abc", + "sha256:9044ef2df88d7f33692ae3f18d3be63dec69c4fb1b5a4a9ac950f9b4ba571606", + "sha256:963c80b583b0661918718b095e02303d8078950b26cc00b5e5ea9ababe0de1fc", + "sha256:987f15737aba2ab5f3928c617ccf1ce412e2e321c77ab16ca5a293e7bbffd581", + "sha256:9ec45db0c766f196ae629e509f059ff05fc3148f9ffd28f3cfe75d4afb485412", + "sha256:9fc0b3cb5d1720e7141d103cf4819aea239f7d136acf9ee4a69b047b7986175a", + "sha256:a2c927c49f2029291fbabd673d51a2180038f8cd5a5b2f290f78c4516be48be2", + "sha256:a38878a223bdd37c9709d07cd357bb79f4c760b29210e14ad0fb395294583787", + "sha256:b4fcdcfa302538f70929eb7b392f536a237cbe2ed9cba88e3bf5027b39f5f77f", + "sha256:c0c74e5579af4b977c8b932f40a5464764b2f86681327410aa028a22d2f54937", + "sha256:c1c876fd795b36126f773db9cbb393f19808edd2637e00fd6caba0e25f2c7b64", + "sha256:c9aadc4924d4b5799112837b226160428524a9a45f830e0d0f184b19e4090487", + "sha256:cc7b98bf58167b7f2db91a4327da24fb93368838eb84a44c472283778fc2446b", + "sha256:cf54cfa843f297991b7388c281cb3855d911137223c6b6d2dd82a47ae5125a41", + "sha256:d003156bb6a59cda9050e983441b7fa2487f7800d76bdc065566b7d728b4581a", + "sha256:d175297e9533d8d37437abc14e8a83cbc68af93cc9c1c59c2c292ec59a0697a3", + "sha256:d746a437cdbca200622385305aedd9aef68e8a645e385cc483bdc5e488f07166", + "sha256:e683e409e5c45d5c9082dc1daf13f6374300806240719f95dc783d1fc942af10" + ], + "index": "pypi", + "version": "==1.4.2" }, "w3lib": { "hashes": [ @@ -499,5 +500,122 @@ "version": "==4.4.3" } }, - "develop": {} + "develop": { + "astroid": { + "hashes": [ + "sha256:8704779744963d56a2625ec2949eb150bd499fc099510161ddbb2b64e2d98138", + "sha256:add3fd690e7c1fe92436d17be461feeaa173e6f33e0789734310334da0f30027" + ], + "index": "pypi", + "version": "==2.0" + }, + "isort": { + "hashes": [ + "sha256:1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af", + "sha256:b9c40e9750f3d77e6e4d441d8b0266cf555e7cdabdcff33c4fd06366ca761ef8", + "sha256:ec9ef8f4a9bc6f71eec99e1806bfa2de401650d996c59330782b89a5555c1497" + ], + "index": "pypi", + "version": "==4.3.4" + }, + "lazy-object-proxy": { + "hashes": [ + "sha256:0ce34342b419bd8f018e6666bfef729aec3edf62345a53b537a4dcc115746a33", + "sha256:1b668120716eb7ee21d8a38815e5eb3bb8211117d9a90b0f8e21722c0758cc39", + "sha256:209615b0fe4624d79e50220ce3310ca1a9445fd8e6d3572a896e7f9146bbf019", + "sha256:27bf62cb2b1a2068d443ff7097ee33393f8483b570b475db8ebf7e1cba64f088", + "sha256:27ea6fd1c02dcc78172a82fc37fcc0992a94e4cecf53cb6d73f11749825bd98b", + "sha256:2c1b21b44ac9beb0fc848d3993924147ba45c4ebc24be19825e57aabbe74a99e", + "sha256:2df72ab12046a3496a92476020a1a0abf78b2a7db9ff4dc2036b8dd980203ae6", + "sha256:320ffd3de9699d3892048baee45ebfbbf9388a7d65d832d7e580243ade426d2b", + "sha256:50e3b9a464d5d08cc5227413db0d1c4707b6172e4d4d915c1c70e4de0bbff1f5", + "sha256:5276db7ff62bb7b52f77f1f51ed58850e315154249aceb42e7f4c611f0f847ff", + "sha256:61a6cf00dcb1a7f0c773ed4acc509cb636af2d6337a08f362413c76b2b47a8dd", + "sha256:6ae6c4cb59f199d8827c5a07546b2ab7e85d262acaccaacd49b62f53f7c456f7", + "sha256:7661d401d60d8bf15bb5da39e4dd72f5d764c5aff5a86ef52a042506e3e970ff", + "sha256:7bd527f36a605c914efca5d3d014170b2cb184723e423d26b1fb2fd9108e264d", + "sha256:7cb54db3535c8686ea12e9535eb087d32421184eacc6939ef15ef50f83a5e7e2", + "sha256:7f3a2d740291f7f2c111d86a1c4851b70fb000a6c8883a59660d95ad57b9df35", + "sha256:81304b7d8e9c824d058087dcb89144842c8e0dea6d281c031f59f0acf66963d4", + "sha256:933947e8b4fbe617a51528b09851685138b49d511af0b6c0da2539115d6d4514", + "sha256:94223d7f060301b3a8c09c9b3bc3294b56b2188e7d8179c762a1cda72c979252", + "sha256:ab3ca49afcb47058393b0122428358d2fbe0408cf99f1b58b295cfeb4ed39109", + "sha256:bd6292f565ca46dee4e737ebcc20742e3b5be2b01556dafe169f6c65d088875f", + "sha256:cb924aa3e4a3fb644d0c463cad5bc2572649a6a3f68a7f8e4fbe44aaa6d77e4c", + "sha256:d0fc7a286feac9077ec52a927fc9fe8fe2fabab95426722be4c953c9a8bede92", + "sha256:ddc34786490a6e4ec0a855d401034cbd1242ef186c20d79d2166d6a4bd449577", + "sha256:e34b155e36fa9da7e1b7c738ed7767fc9491a62ec6af70fe9da4a057759edc2d", + "sha256:e5b9e8f6bda48460b7b143c3821b21b452cb3a835e6bbd5dd33aa0c8d3f5137d", + "sha256:e81ebf6c5ee9684be8f2c87563880f93eedd56dd2b6146d8a725b50b7e5adb0f", + "sha256:eb91be369f945f10d3a49f5f9be8b3d0b93a4c2be8f8a5b83b0571b8123e0a7a", + "sha256:f460d1ceb0e4a5dcb2a652db0904224f367c9b3c1470d5a7683c0480e582468b" + ], + "index": "pypi", + "version": "==1.3.1" + }, + "mccabe": { + "hashes": [ + "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", + "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" + ], + "index": "pypi", + "version": "==0.6.1" + }, + "platformdirs": { + "hashes": [ + "sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2", + "sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d" + ], + "markers": "python_version >= '3.6'", + "version": "==2.4.0" + }, + "pylint": { + "hashes": [ + "sha256:0f358e221c45cbd4dad2a1e4b883e75d28acdcccd29d40c76eb72b307269b126", + "sha256:2c9843fff1a88ca0ad98a256806c82c5a8f86086e7ccbdb93297d86c3f90c436" + ], + "index": "pypi", + "version": "==2.11.1" + }, + "pylint-django": { + "hashes": [ + "sha256:aff49d9602a39c027b4ed7521a041438893205918f405800063b7ff692b7371b", + "sha256:f63f717169b0c2e4e19c28f1c32c28290647330184fcb7427805ae9b6994f3fc" + ], + "index": "pypi", + "version": "==2.4.4" + }, + "pylint-plugin-utils": { + "hashes": [ + "sha256:2f30510e1c46edf268d3a195b2849bd98a1b9433229bb2ba63b8d776e1fc4d0a", + "sha256:57625dcca20140f43731311cd8fd879318bf45a8b0fd17020717a8781714a25a" + ], + "index": "pypi", + "version": "==0.6" + }, + "toml": { + "hashes": [ + "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", + "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" + ], + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.10.2" + }, + "typing-extensions": { + "hashes": [ + "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e", + "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7", + "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34" + ], + "markers": "python_version < '3.10'", + "version": "==3.10.0.2" + }, + "wrapt": { + "hashes": [ + "sha256:d4d560d479f2c21e1b5443bbd15fe7ec4b37fe7e53d335d3b9b0a7b1226fe3c6" + ], + "index": "pypi", + "version": "==1.10.11" + } + } } diff --git a/backend/rental/migrations/0008_support_price_range.py b/backend/rental/migrations/0008_support_price_range.py new file mode 100644 index 00000000..15e0fa20 --- /dev/null +++ b/backend/rental/migrations/0008_support_price_range.py @@ -0,0 +1,23 @@ +# Generated by Django 2.1.15 on 2021-10-26 04:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('rental', '0007_more_property_type'), + ] + + operations = [ + migrations.AddField( + model_name='house', + name='min_monthly_price', + field=models.IntegerField(null=True), + ), + migrations.AddField( + model_name='housets', + name='min_monthly_price', + field=models.IntegerField(null=True), + ), + ] diff --git a/backend/rental/models.py b/backend/rental/models.py index 9675cd6a..9d4b26f8 100644 --- a/backend/rental/models.py +++ b/backend/rental/models.py @@ -92,6 +92,7 @@ class BaseHouse(models.Model): vendor_house_url = models.URLField(null=True) # price related monthly_price = models.IntegerField(null=True) + min_monthly_price = models.IntegerField(null=True) deposit_type = models.IntegerField( choices = [(tag, tag.value) for tag in DepositType], null=True diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 9618fefd..00000000 --- a/requirements.txt +++ /dev/null @@ -1,40 +0,0 @@ --i https://pypi.org/simple/ -asn1crypto==0.24.0 -astroid==2.0 -attrs==17.4.0 -automat==0.6.0 -beautifulsoup4==4.6.3 -cffi==1.11.5 -constantly==15.1.0 -cryptography==2.3 -cssselect==1.0.3 -django==2.1.15 -hyperlink==18.0.0 -idna==2.6 -incremental==17.5.0 -isort==4.3.4 -jsonfield==2.0.2 -lazy-object-proxy==1.3.1 -lxml==4.2.0 -mccabe==0.6.1 -parsel==1.4.0 -psycopg2-binary==2.7.5 -pyasn1-modules==0.2.1 -pyasn1==0.4.2 -pycparser==2.18 -pydispatcher==2.0.5 -pylint-django==2.0.2 -pylint-plugin-utils==0.4 -pylint==2.0.0 -pyopenssl==17.5.0 -pytz==2018.5 -queuelib==1.5.0 -raven==6.9.0 -scrapy==1.5.0 -service-identity==17.0.0 -six==1.11.0 -twisted==17.9.0 -typed-ast==1.1.0 -w3lib==1.19.0 -wrapt==1.10.11 -zope.interface==4.4.3 From 139ecc314dc9e62ce7aaba4f18a48454d5a406f7 Mon Sep 17 00:00:00 2001 From: ddio Date: Wed, 27 Oct 2021 00:08:52 +0800 Subject: [PATCH 2/4] feat: use scrapy_twrh in list591 spider, #89, #31 --- Pipfile | 33 +- Pipfile.lock | 775 ++++++++++++---------- README.md | 12 +- crawler/crawler/pipelines.py | 28 +- crawler/crawler/spiders/list591_spider.py | 186 +----- crawler/crawler/spiders/persist_queue.py | 178 +++++ scrapy-package/Pipfile | 13 + scrapy-package/Pipfile.lock | 728 ++++++++++++++++++++ scrapy-package/README.md | 4 +- scrapy-package/pyproject.toml | 3 + scrapy-package/requirements.txt | 38 -- scrapy-package/setup.py | 4 +- scrapy-package/trial/crawler/settings.py | 2 +- 13 files changed, 1397 insertions(+), 607 deletions(-) create mode 100644 crawler/crawler/spiders/persist_queue.py create mode 100644 scrapy-package/Pipfile create mode 100644 scrapy-package/Pipfile.lock create mode 100644 scrapy-package/pyproject.toml delete mode 100644 scrapy-package/requirements.txt diff --git a/Pipfile b/Pipfile index 710522f7..61c8007d 100644 --- a/Pipfile +++ b/Pipfile @@ -9,41 +9,12 @@ pylint = "*" pylint-plugin-utils = "*" [packages] -asn1crypto = "==0.24.0" -astroid = "==2.0" -attrs = "==17.4.0" -beautifulsoup4 = "==4.6.3" cffi = "==1.13.2" -constantly = "==15.1.0" -cryptography = "==2.3" -cssselect = "==1.0.3" django = "==2.1.15" -hyperlink = "==18.0.0" -idna = "==2.6" -incremental = "==17.5.0" -isort = "==4.3.4" jsonfield = "==2.0.2" -lazy-object-proxy = "==1.3.1" -lxml = "==4.2.0" -mccabe = "==0.6.1" -parsel = "==1.4.0" -pyasn1 = "==0.4.2" -pyasn1-modules = "==0.2.1" -pycparser = "==2.18" -pytz = "==2018.5" -queuelib = "==1.5.0" raven = "==6.9.0" -service-identity = "==17.0.0" -six = "==1.11.0" -typed-ast = "==1.4.2" -w3lib = "==1.19.0" -wrapt = "==1.10.11" -Automat = "==0.6.0" -PyDispatcher = "==2.0.5" -pyOpenSSL = "==17.5.0" -Twisted = "==17.9.0" -"zope.interface" = "==4.4.3" -psycopg2-binary = "*" +psycopg2-binary = "==2.8.6" +scrapy-tw-rental-house = "==1.1.0" [requires] python_version = "3" diff --git a/Pipfile.lock b/Pipfile.lock index 8c4b5cce..fa0d2410 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "dec808d6c7c83e9036c025b3ae3ffd59b3ee83cbc677bc5f81362360c70db0e5" + "sha256": "1e942a49ce839bb873885913dfca307972e8db9032a97d7e3bb3743ed0e819df" }, "pipfile-spec": 6, "requires": { @@ -16,46 +16,20 @@ ] }, "default": { - "asn1crypto": { - "hashes": [ - "sha256:2f1adbb7546ed199e3c90ef23ec95c5cf3585bac7d11fb7eb562a3fe89c64e87", - "sha256:9d5c20441baf0cb60a4ac34cc447c6c189024b6b4c6cd7877034f4965c464e49" - ], - "index": "pypi", - "version": "==0.24.0" - }, - "astroid": { - "hashes": [ - "sha256:8704779744963d56a2625ec2949eb150bd499fc099510161ddbb2b64e2d98138", - "sha256:add3fd690e7c1fe92436d17be461feeaa173e6f33e0789734310334da0f30027" - ], - "index": "pypi", - "version": "==2.0" - }, "attrs": { "hashes": [ - "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9", - "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450" + "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1", + "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb" ], - "index": "pypi", - "version": "==17.4.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==21.2.0" }, "automat": { "hashes": [ - "sha256:2140297df155f7990f6f4c73b2ab0583bd8150db9ed2a1b48122abe66e9908c1", - "sha256:3c1fd04ecf08ac87b4dd3feae409542e9bf7827257097b2b6ed5692f69d6f6a8" - ], - "index": "pypi", - "version": "==0.6.0" - }, - "beautifulsoup4": { - "hashes": [ - "sha256:194ec62a25438adcb3fdb06378b26559eda1ea8a747367d34c33cef9c7f48d57", - "sha256:90f8e61121d6ae58362ce3bed8cd997efb00c914eae0ff3d363c32f9a9822d10", - "sha256:f0abd31228055d698bb392a826528ea08ebb9959e6bea17c606fd9c9009db938" + "sha256:7979803c74610e11ef0c0d68a2942b152df52da55336e0c9d58daf1831cbdf33", + "sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111" ], - "index": "pypi", - "version": "==4.6.3" + "version": "==20.2.0" }, "cffi": { "hashes": [ @@ -101,41 +75,41 @@ "sha256:586372eb92059873e29eba4f9dec8381541b4d3834660707faf8ba59146dfc35", "sha256:dd2fa9d6b1a51a83f0d7dd76293d734046aa176e384bf6e33b7e44880eb37c5d" ], - "index": "pypi", "version": "==15.1.0" }, "cryptography": { "hashes": [ - "sha256:21af753934f2f6d1a10fe8f4c0a64315af209ef6adeaee63ca349797d747d687", - "sha256:27bb401a20a838d6d0ea380f08c6ead3ccd8c9d8a0232dc9adcc0e4994576a66", - "sha256:29720c4253263cff9aea64585adbbe85013ba647f6e98367efff9db2d7193ded", - "sha256:2a35b7570d8f247889784010aac8b384fd2e4a47b33e15c4a60b45a7c1944120", - "sha256:42c531a6a354407f42ee07fda5c2c0dc822cf6d52744949c182f2b295fbd4183", - "sha256:5eb86f03f9c4f0ac2336ac5431271072ddf7ecc76b338e26366732cfac58aa19", - "sha256:67f7f57eae8dede577f3f7775957f5bec93edd6bdb6ce597bb5b28e1bdf3d4fb", - "sha256:6ec84edcbc966ae460560a51a90046503ff0b5b66157a9efc61515c68059f6c8", - "sha256:7ba834564daef87557e7fcd35c3c3183a4147b0b3a57314e53317360b9b201b3", - "sha256:7d7f084cbe1fdb82be5a0545062b59b1ad3637bc5a48612ac2eb428ff31b31ea", - "sha256:82409f5150e529d699e5c33fa8fd85e965104db03bc564f5f4b6a9199e591f7c", - "sha256:87d092a7c2a44e5f7414ab02fb4145723ebba411425e1a99773531dd4c0e9b8d", - "sha256:8c56ef989342e42b9fcaba7c74b446f0cc9bed546dd00034fa7ad66fc00307ef", - "sha256:9449f5d4d7c516a6118fa9210c4a00f34384cb1d2028672100ee0c6cce49d7f6", - "sha256:bc2301170986ad82d9349a91eb8884e0e191209c45f5541b16aa7c0cfb135978", - "sha256:c132bab45d4bd0fff1d3fe294d92b0a6eb8404e93337b3127bdec9f21de117e6", - "sha256:c3d945b7b577f07a477700f618f46cbc287af3a9222cd73035c6ef527ef2c363", - "sha256:cee18beb4c807b5c0b178f4fa2fae03cef9d51821a358c6890f8b23465b7e5d2", - "sha256:d01dfc5c2b3495184f683574e03c70022674ca9a7be88589c5aba130d835ea90" + "sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6", + "sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6", + "sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c", + "sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999", + "sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e", + "sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992", + "sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d", + "sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588", + "sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa", + "sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d", + "sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd", + "sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d", + "sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953", + "sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2", + "sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8", + "sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6", + "sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9", + "sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6", + "sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad", + "sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76" ], - "index": "pypi", - "version": "==2.3" + "markers": "python_version >= '3.6'", + "version": "==35.0.0" }, "cssselect": { "hashes": [ - "sha256:066d8bc5229af09617e24b3ca4d52f1f9092d9e061931f4184cd572885c23204", - "sha256:3b5103e8789da9e936a68d993b70df732d06b8bb9a337a05ed4eb52c17ef7206" + "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf", + "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc" ], - "index": "pypi", - "version": "==1.0.3" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.1.0" }, "django": { "hashes": [ @@ -145,38 +119,72 @@ "index": "pypi", "version": "==2.1.15" }, + "h2": { + "hashes": [ + "sha256:61e0f6601fa709f35cdb730863b4e5ec7ad449792add80d1410d4174ed139af5", + "sha256:875f41ebd6f2c44781259005b157faed1a5031df3ae5aa7bcb4628a6c0782f14" + ], + "version": "==3.2.0" + }, + "hpack": { + "hashes": [ + "sha256:0edd79eda27a53ba5be2dfabf3b15780928a0dff6eb0c60a3d6767720e970c89", + "sha256:8eec9c1f4bfae3408a3f30500261f7e6a65912dc138526ea054f9ad98892e9d2" + ], + "version": "==3.0.0" + }, + "hyperframe": { + "hashes": [ + "sha256:5187962cb16dcc078f23cb5a4b110098d546c3f41ff2d4038a9896893bbd0b40", + "sha256:a9f5c17f2cc3c719b917c4f33ed1c61bd1f8dfac4b1bd23b7c80b3400971b41f" + ], + "version": "==5.2.0" + }, "hyperlink": { "hashes": [ - "sha256:98da4218a56b448c7ec7d2655cb339af1f7d751cf541469bb4fc28c4a4245b34", - "sha256:f01b4ff744f14bc5d0a22a6b9f1525ab7d6312cb0ff967f59414bbac52f0a306" + "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b", + "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4" ], - "index": "pypi", - "version": "==18.0.0" + "version": "==21.0.0" }, "idna": { "hashes": [ - "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f", - "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4" + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" ], - "index": "pypi", - "version": "==2.6" + "markers": "python_version >= '3.5'", + "version": "==3.3" }, "incremental": { "hashes": [ - "sha256:717e12246dddf231a349175f48d74d93e2897244939173b01974ab6661406b9f", - "sha256:7b751696aaf36eebfab537e458929e194460051ccad279c72b755a167eebd4b3" + "sha256:02f5de5aff48f6b9f665d99d48bfc7ec03b6e3943210de7cfc88856d755d6f57", + "sha256:92014aebc6a20b78a8084cdd5645eeaa7f74b8933f70fa3ada2cfbd1e3b54321" ], - "index": "pypi", - "version": "==17.5.0" + "version": "==21.3.0" }, - "isort": { + "itemadapter": { "hashes": [ - "sha256:1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af", - "sha256:b9c40e9750f3d77e6e4d441d8b0266cf555e7cdabdcff33c4fd06366ca761ef8", - "sha256:ec9ef8f4a9bc6f71eec99e1806bfa2de401650d996c59330782b89a5555c1497" + "sha256:695809a4e2f42174f0392dd66c2ceb2b2454d3ebbf65a930e5c85910d8d88d8f", + "sha256:f05df8da52619da4b8c7f155d8a15af19083c0c7ad941d8c1de799560ad994ca" ], - "index": "pypi", - "version": "==4.3.4" + "markers": "python_version >= '3.6'", + "version": "==0.4.0" + }, + "itemloaders": { + "hashes": [ + "sha256:1277cd8ca3e4c02dcdfbc1bcae9134ad89acfa6041bd15b4561c6290203a0c96", + "sha256:4cb46a0f8915e910c770242ae3b60b1149913ed37162804f1e40e8535d6ec497" + ], + "markers": "python_version >= '3.6'", + "version": "==1.0.4" + }, + "jmespath": { + "hashes": [ + "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9", + "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f" + ], + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", + "version": "==0.10.0" }, "jsonfield": { "hashes": [ @@ -186,205 +194,196 @@ "index": "pypi", "version": "==2.0.2" }, - "lazy-object-proxy": { + "lxml": { "hashes": [ - "sha256:0ce34342b419bd8f018e6666bfef729aec3edf62345a53b537a4dcc115746a33", - "sha256:1b668120716eb7ee21d8a38815e5eb3bb8211117d9a90b0f8e21722c0758cc39", - "sha256:209615b0fe4624d79e50220ce3310ca1a9445fd8e6d3572a896e7f9146bbf019", - "sha256:27bf62cb2b1a2068d443ff7097ee33393f8483b570b475db8ebf7e1cba64f088", - "sha256:27ea6fd1c02dcc78172a82fc37fcc0992a94e4cecf53cb6d73f11749825bd98b", - "sha256:2c1b21b44ac9beb0fc848d3993924147ba45c4ebc24be19825e57aabbe74a99e", - "sha256:2df72ab12046a3496a92476020a1a0abf78b2a7db9ff4dc2036b8dd980203ae6", - "sha256:320ffd3de9699d3892048baee45ebfbbf9388a7d65d832d7e580243ade426d2b", - "sha256:50e3b9a464d5d08cc5227413db0d1c4707b6172e4d4d915c1c70e4de0bbff1f5", - "sha256:5276db7ff62bb7b52f77f1f51ed58850e315154249aceb42e7f4c611f0f847ff", - "sha256:61a6cf00dcb1a7f0c773ed4acc509cb636af2d6337a08f362413c76b2b47a8dd", - "sha256:6ae6c4cb59f199d8827c5a07546b2ab7e85d262acaccaacd49b62f53f7c456f7", - "sha256:7661d401d60d8bf15bb5da39e4dd72f5d764c5aff5a86ef52a042506e3e970ff", - "sha256:7bd527f36a605c914efca5d3d014170b2cb184723e423d26b1fb2fd9108e264d", - "sha256:7cb54db3535c8686ea12e9535eb087d32421184eacc6939ef15ef50f83a5e7e2", - "sha256:7f3a2d740291f7f2c111d86a1c4851b70fb000a6c8883a59660d95ad57b9df35", - "sha256:81304b7d8e9c824d058087dcb89144842c8e0dea6d281c031f59f0acf66963d4", - "sha256:933947e8b4fbe617a51528b09851685138b49d511af0b6c0da2539115d6d4514", - "sha256:94223d7f060301b3a8c09c9b3bc3294b56b2188e7d8179c762a1cda72c979252", - "sha256:ab3ca49afcb47058393b0122428358d2fbe0408cf99f1b58b295cfeb4ed39109", - "sha256:bd6292f565ca46dee4e737ebcc20742e3b5be2b01556dafe169f6c65d088875f", - "sha256:cb924aa3e4a3fb644d0c463cad5bc2572649a6a3f68a7f8e4fbe44aaa6d77e4c", - "sha256:d0fc7a286feac9077ec52a927fc9fe8fe2fabab95426722be4c953c9a8bede92", - "sha256:ddc34786490a6e4ec0a855d401034cbd1242ef186c20d79d2166d6a4bd449577", - "sha256:e34b155e36fa9da7e1b7c738ed7767fc9491a62ec6af70fe9da4a057759edc2d", - "sha256:e5b9e8f6bda48460b7b143c3821b21b452cb3a835e6bbd5dd33aa0c8d3f5137d", - "sha256:e81ebf6c5ee9684be8f2c87563880f93eedd56dd2b6146d8a725b50b7e5adb0f", - "sha256:eb91be369f945f10d3a49f5f9be8b3d0b93a4c2be8f8a5b83b0571b8123e0a7a", - "sha256:f460d1ceb0e4a5dcb2a652db0904224f367c9b3c1470d5a7683c0480e582468b" - ], - "index": "pypi", - "version": "==1.3.1" + "sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d", + "sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3", + "sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2", + "sha256:1b38116b6e628118dea5b2186ee6820ab138dbb1e24a13e478490c7db2f326ae", + "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f", + "sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927", + "sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3", + "sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7", + "sha256:3082c518be8e97324390614dacd041bb1358c882d77108ca1957ba47738d9d59", + "sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f", + "sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade", + "sha256:36108c73739985979bf302006527cf8a20515ce444ba916281d1c43938b8bb96", + "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468", + "sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b", + "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4", + "sha256:4c61b3a0db43a1607d6264166b230438f85bfed02e8cff20c22e564d0faff354", + "sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83", + "sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04", + "sha256:5c8c163396cc0df3fd151b927e74f6e4acd67160d6c33304e805b84293351d16", + "sha256:64812391546a18896adaa86c77c59a4998f33c24788cadc35789e55b727a37f4", + "sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791", + "sha256:6f12e1427285008fd32a6025e38e977d44d6382cf28e7201ed10d6c1698d2a9a", + "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51", + "sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1", + "sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a", + "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f", + "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee", + "sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec", + "sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969", + "sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28", + "sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a", + "sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa", + "sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106", + "sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d", + "sha256:c1a40c06fd5ba37ad39caa0b3144eb3772e813b5fb5b084198a985431c2f1e8d", + "sha256:c47ff7e0a36d4efac9fd692cfa33fbd0636674c102e9e8d9b26e1b93a94e7617", + "sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4", + "sha256:cdaf11d2bd275bf391b5308f86731e5194a21af45fbaaaf1d9e8147b9160ea92", + "sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0", + "sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4", + "sha256:d916d31fd85b2f78c76400d625076d9124de3e4bda8b016d25a050cc7d603f24", + "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2", + "sha256:e1cbd3f19a61e27e011e02f9600837b921ac661f0c40560eefb366e4e4fb275e", + "sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0", + "sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654", + "sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2", + "sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23", + "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586" + ], + "markers": "platform_python_implementation == 'CPython'", + "version": "==4.6.3" }, - "lxml": { + "parsel": { "hashes": [ - "sha256:0aa44ffdeaaf6ba45d61980bb2c07e87d4dcac7a8b5b9d458124bc1adcda5233", - "sha256:0af9c9267b1257319d49e9c1e9abbf92a99f965bee3c4733e0f0f7578985182d", - "sha256:0cddc6cde79e1932efc71d9974a4418184ad0b8ca46c633ad772b2c5eaf36b3c", - "sha256:124a9d529eec5e10f307eb237df3efc43dd1fb7ebdb5da5e480c4ed372648b6b", - "sha256:1d1e45584353e4d563685874707fc8c85cdd11b0ef3b79d77bb38046134d68a9", - "sha256:2812bc45a7f53f366217b76a1c53e6728fbfa7f7524d16a321ea8f7131428bd1", - "sha256:29697224b2df76edf7c2de9bcd90a26dd28fe85c5fd7f0171cae84f8383b227e", - "sha256:36ffb216e2f361a5a0a7e219aea6cd44da11c64061baed273944aae21223186c", - "sha256:4626d699551f66687e5f7e7f9b79bfce611e12edebfb9fec276e2df8ec46541e", - "sha256:4c21d7304d37715e6aed756e4d0c374c99c9bb1fa8d64f546b95474b17ac23de", - "sha256:57be98177ce784495dff53f40620995ad0a56456246ed9d51977e595de58e12e", - "sha256:62bfcd0629991e1c1257ffd28df2ab31a5c44da4c06823c26ec0f472723a84ca", - "sha256:71ac6dac6835de75aaf531cae9ffa447dae0783ba1f43bf6eaccfad3680a5b9c", - "sha256:7769ac9203ebe6d8db16904c54d57d77360fcc1926ed7afaa86b04050e4afa5b", - "sha256:7d96fbb5f23a62300aa9bef7d286cd61aca8902357619c8708c0290aba5df73f", - "sha256:88583c6565c9299f617238a500f1a47510bac54daff7872d6a343f13361b659e", - "sha256:8f52c4c8f1cf15419193026e731f34a3260a3ce7977b875ba1eb2517b8a3f660", - "sha256:95b82fdfdaac71640b281da6b9a2c3700177ba5190a786881b184de744ad55de", - "sha256:988d55112f196e12341b7c5138841c2b4f21f871eaa8f138c6ac4c46f28899f9", - "sha256:9e08918b744b89d30750eca8598f37ae75b16202870db678fde970d85afed3e3", - "sha256:b46f31e806f6884bd1053ad1d78ecaca6d1bc5dd94a1b783a6ff0bb4b3a60962", - "sha256:c18f316cad969111b1ff9e84c82fbc9ae6f25f35701118182d384585940cdf80", - "sha256:cef79715f2335bfc1ef7082bcb8b2bac87271431653455221a9127fde146208c", - "sha256:cf63f590090404c52f179b7ceacb7cd549de3a1697bcfe2f79be180b2801d109", - "sha256:d06260e6102b2f18dbee3736185cd6a2e1c88c0fad782bf8e9d7a7a1b24e02b0", - "sha256:d0dc3e5737adcc9a23fd3d3d3072b887fefb48143309563f412ef7b0ebdfdb30", - "sha256:dd98d4f88ce0abda2b02c1542d1de22dd342023f3ba09874bd95841283f29433", - "sha256:f04b184984c23e0caac3c55eac2fe2dbb88726a5a1b35e23715eff6f29a4705c" + "sha256:70efef0b651a996cceebc69e55a85eb2233be0890959203ba7c3a03c72725c79", + "sha256:9e1fa8db1c0b4a878bf34b35c043d89c9d1cbebc23b4d34dbc3c0ec33f2e087d" ], - "index": "pypi", - "version": "==4.2.0" + "version": "==1.6.0" }, - "mccabe": { + "priority": { "hashes": [ - "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", - "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" + "sha256:6bc1961a6d7fcacbfc337769f1a382c8e746566aaa365e78047abe9f66b2ffbe", + "sha256:be4fcb94b5e37cdeb40af5533afe6dd603bd665fe9c8b3052610fc1001d5d1eb" ], - "index": "pypi", - "version": "==0.6.1" + "version": "==1.3.0" }, - "parsel": { + "protego": { "hashes": [ - "sha256:1a9ac0c1db8175547e1732be57ced2a2dc0714590f6b249d022ad25d918ef923", - "sha256:2f3a6813a0ff39b6ca2530b9c1ad25d83e3a33808d93dd21fbf114c6232a16a8" + "sha256:a682771bc7b51b2ff41466460896c1a5a653f9a1e71639ef365a72e66d8734b4" ], - "index": "pypi", - "version": "==1.4.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.1.16" }, "psycopg2-binary": { "hashes": [ - "sha256:0b7dae87f0b729922e06f85f667de7bf16455d411971b2043bbd9577af9d1975", - "sha256:0f2e04bd2a2ab54fa44ee67fe2d002bb90cee1c0f1cc0ebc3148af7b02034cbd", - "sha256:123c3fb684e9abfc47218d3784c7b4c47c8587951ea4dd5bc38b6636ac57f616", - "sha256:1473c0215b0613dd938db54a653f68251a45a78b05f6fc21af4326f40e8360a2", - "sha256:14db1752acdd2187d99cb2ca0a1a6dfe57fc65c3281e0f20e597aac8d2a5bd90", - "sha256:1e3a362790edc0a365385b1ac4cc0acc429a0c0d662d829a50b6ce743ae61b5a", - "sha256:1e85b74cbbb3056e3656f1cc4781294df03383127a8114cbc6531e8b8367bf1e", - "sha256:20f1ab44d8c352074e2d7ca67dc00843067788791be373e67a0911998787ce7d", - "sha256:24b0b6688b9f31a911f2361fe818492650795c9e5d3a1bc647acbd7440142a4f", - "sha256:2f62c207d1740b0bde5c4e949f857b044818f734a3d57f1d0d0edc65050532ed", - "sha256:3242b9619de955ab44581a03a64bdd7d5e470cc4183e8fcadd85ab9d3756ce7a", - "sha256:35c4310f8febe41f442d3c65066ca93cccefd75013df3d8c736c5b93ec288140", - "sha256:4235f9d5ddcab0b8dbd723dca56ea2922b485ea00e1dafacf33b0c7e840b3d32", - "sha256:542875f62bc56e91c6eac05a0deadeae20e1730be4c6334d8f04c944fcd99759", - "sha256:5ced67f1e34e1a450cdb48eb53ca73b60aa0af21c46b9b35ac3e581cf9f00e31", - "sha256:661509f51531ec125e52357a489ea3806640d0ca37d9dada461ffc69ee1e7b6e", - "sha256:7360647ea04db2e7dff1648d1da825c8cf68dc5fbd80b8fb5b3ee9f068dcd21a", - "sha256:736b8797b58febabb85494142c627bd182b50d2a7ec65322983e71065ad3034c", - "sha256:8c13d72ed6af7fd2c8acbd95661cf9477f94e381fce0792c04981a8283b52917", - "sha256:988b47ac70d204aed01589ed342303da7c4d84b56c2f4c4b8b00deda123372bf", - "sha256:995fc41ebda5a7a663a254a1dcac52638c3e847f48307b5416ee373da15075d7", - "sha256:a36c7eb6152ba5467fb264d73844877be8b0847874d4822b7cf2d3c0cb8cdcb0", - "sha256:aed4a9a7e3221b3e252c39d0bf794c438dc5453bc2963e8befe9d4cd324dff72", - "sha256:aef9aee84ec78af51107181d02fe8773b100b01c5dfde351184ad9223eab3698", - "sha256:b0221ca5a9837e040ebf61f48899926b5783668b7807419e4adae8175a31f773", - "sha256:b4d7679a08fea64573c969f6994a2631908bb2c0e69a7235648642f3d2e39a68", - "sha256:c250a7ec489b652c892e4f0a5d122cc14c3780f9f643e1a326754aedf82d9a76", - "sha256:ca86db5b561b894f9e5f115d6a159fff2a2570a652e07889d8a383b5fae66eb4", - "sha256:cfc523edecddaef56f6740d7de1ce24a2fdf94fd5e704091856a201872e37f9f", - "sha256:d92272c7c16e105788efe2cfa5d680f07e34e0c29b03c1908f8636f55d5f915a", - "sha256:da113b70f6ec40e7d81b43d1b139b9db6a05727ab8be1ee559f3a69854a69d34", - "sha256:f6fac64a38f6768e7bc7b035b9e10d8a538a9fadce06b983fb3e6fa55ac5f5ce", - "sha256:f8559617b1fcf59a9aedba2c9838b5b6aa211ffedecabca412b92a1ff75aac1a", - "sha256:fbb42a541b1093385a2d8c7eec94d26d30437d0e77c1d25dae1dcc46741a385e" - ], - "index": "pypi", - "version": "==2.9.1" + "sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c", + "sha256:0e4dc3d5996760104746e6cfcdb519d9d2cd27c738296525d5867ea695774e67", + "sha256:11b9c0ebce097180129e422379b824ae21c8f2a6596b159c7659e2e5a00e1aa0", + "sha256:15978a1fbd225583dd8cdaf37e67ccc278b5abecb4caf6b2d6b8e2b948e953f6", + "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", + "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", + "sha256:42ec1035841b389e8cc3692277a0bd81cdfe0b65d575a2c8862cec7a80e62e52", + "sha256:6422f2ff0919fd720195f64ffd8f924c1395d30f9a495f31e2392c2efafb5056", + "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", + "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", + "sha256:7d92a09b788cbb1aec325af5fcba9fed7203897bbd9269d5691bb1e3bce29550", + "sha256:833709a5c66ca52f1d21d41865a637223b368c0ee76ea54ca5bad6f2526c7679", + "sha256:89705f45ce07b2dfa806ee84439ec67c5d9a0ef20154e0e475e2b2ed392a5b83", + "sha256:8cd0fb36c7412996859cb4606a35969dd01f4ea34d9812a141cd920c3b18be77", + "sha256:950bc22bb56ee6ff142a2cb9ee980b571dd0912b0334aa3fe0fe3788d860bea2", + "sha256:a0c50db33c32594305b0ef9abc0cb7db13de7621d2cadf8392a1d9b3c437ef77", + "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", + "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", + "sha256:ac0c682111fbf404525dfc0f18a8b5f11be52657d4f96e9fcb75daf4f3984859", + "sha256:ad20d2eb875aaa1ea6d0f2916949f5c08a19c74d05b16ce6ebf6d24f2c9f75d1", + "sha256:b4afc542c0ac0db720cf516dd20c0846f71c248d2b3d21013aa0d4ef9c71ca25", + "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", + "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", + "sha256:ba381aec3a5dc29634f20692349d73f2d21f17653bda1decf0b52b11d694541f", + "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", + "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", + "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", + "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", + "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", + "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", + "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", + "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", + "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", + "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", + "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" + ], + "index": "pypi", + "version": "==2.8.6" }, "pyasn1": { "hashes": [ - "sha256:0d7f6e959fe53f3960a23d73f35e1fce61348b30915b6664309ca756de7c1f89", - "sha256:5a0db897b311d265cde49615cf783f1c78613138605cdd0f907ecfa5b2aba3ee", - "sha256:758cb50abddc03e4563fd9e7f03db56e3e87b58c0bd01247360326e5c0c7ffa5", - "sha256:7d626683e3d792cccc608da02498aff37ab4f3dafd8905d6bf755d11f9b26b43", - "sha256:a7efe807c4b83a859e2735c692b92ed7b567cfddc4163763412920041d876c2b", - "sha256:b5a9ca48055b9a20f6d1b3d68e38692e5431c86a0f99ea602e61294e891fee5b", - "sha256:c07d6e587b2f928366b1f67c09bda026a3e6fcc99e80a744dc67f8fca3895626", - "sha256:d258b0a71994f7770599835249cece1caef3c70def868c4915e6e5ca49b67d15", - "sha256:d5cd6ed995dba16fad0c521cfe31cd2d68400b53fcc2bce93326829be73ab6d1", - "sha256:d84c2aea3cf43780e9e6a19f4e4dddee9f6976519020e64e47c57e5c7a8c3dd2", - "sha256:e85895087905c65b5b594eb91f7522664c85545b147d5f4d4e7b1b07da8dcbdc", - "sha256:f81c96761fca60d64b1c9b79ec2e40cf9495a745cf570613079ef324aeb9672b" - ], - "index": "pypi", - "version": "==0.4.2" + "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359", + "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576", + "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf", + "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7", + "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", + "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00", + "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8", + "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86", + "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12", + "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776", + "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba", + "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2", + "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3" + ], + "version": "==0.4.8" }, "pyasn1-modules": { "hashes": [ - "sha256:041e9fbafac548d095f5b6c3b328b80792f006196e15a232b731a83c93d59493", - "sha256:0cdca76a68dcb701fff58c397de0ef9922b472b1cb3ea9695ca19d03f1869787", - "sha256:0cea139045c38f84abaa803bcb4b5e8775ea12a42af10019d942f227acc426c3", - "sha256:0f2e50d20bc670be170966638fa0ae603f0bc9ed6ebe8e97a6d1d4cef30cc889", - "sha256:47fb6757ab78fe966e7c58b2030b546854f78416d653163f0ce9290cf2278e8b", - "sha256:598a6004ec26a8ab40a39ea955068cf2a3949ad9c0030da970f2e1ca4c9f1cc9", - "sha256:72fd8b0c11191da088147c6e4678ec53e573923ecf60b57eeac9e97433e09fc2", - "sha256:854700bbdd01394e2ada9c1bfbd0ed9f5d0c551350dbbd023e88b11d2771ae06", - "sha256:af00ea8f2022b6287dc375b2c70f31ab5af83989fc6fe9eacd4976ce26cd7ccc", - "sha256:b1f395cae2d669e0830cb023aa86f9f283b7a9aa32317d7f80d8e78aa2745812", - "sha256:c6747146e95d2b14cc2a8399b2b0bde3f93778f8f9ec704690d2b589c376c137", - "sha256:f53fe5bcebdf318f51399b250fe8325ef3a26d927f012cc0c8e0f9e9af7f9deb" - ], - "index": "pypi", - "version": "==0.2.1" + "sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8", + "sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199", + "sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811", + "sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed", + "sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4", + "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e", + "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74", + "sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb", + "sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45", + "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd", + "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0", + "sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d", + "sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405" + ], + "version": "==0.2.8" }, "pycparser": { "hashes": [ - "sha256:99a8ca03e29851d96616ad0404b4aad7d9ee16f25c9f9708a11faf2810f7b226" + "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", + "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705" ], - "index": "pypi", - "version": "==2.18" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.20" }, "pydispatcher": { "hashes": [ "sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf", "sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433" ], - "index": "pypi", + "markers": "platform_python_implementation == 'CPython'", "version": "==2.0.5" }, "pyopenssl": { "hashes": [ - "sha256:07a2de1a54de07448732a81e38a55df7da109b2f47f599f8bb35b0cbec69d4bd", - "sha256:2c10cfba46a52c0b0950118981d61e72c1e5b1aac451ca1bc77de1a679456773" + "sha256:5e2d8c5e46d0d865ae933bef5230090bdaf5506281e9eec60fa250ee80600cb3", + "sha256:8935bd4920ab9abfebb07c41a4f58296407ed77f04bd1a92914044b848ba1ed6" ], - "index": "pypi", - "version": "==17.5.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==21.0.0" }, "pytz": { "hashes": [ - "sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053", - "sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277" + "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c", + "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326" ], - "index": "pypi", - "version": "==2018.5" + "version": "==2021.3" }, "queuelib": { "hashes": [ - "sha256:42b413295551bdc24ed9376c1a2cd7d0b1b0fa4746b77b27ca2b797a276a1a17", - "sha256:ff43b5b74b9266f8df4232a8f768dc4d67281a271905e2ed4a3689d4d304cd02" + "sha256:4b207267f2642a8699a1f806045c56eb7ad1a85a10c0e249884580d139c2fcd2", + "sha256:4b96d48f650a814c6fb2fd11b968f9c46178b683aad96d68f930fe13a8574d19" ], - "index": "pypi", - "version": "==1.5.0" + "markers": "python_version >= '3.5'", + "version": "==1.6.2" }, "raven": { "hashes": [ @@ -394,171 +393,171 @@ "index": "pypi", "version": "==6.9.0" }, - "service-identity": { + "scrapy": { "hashes": [ - "sha256:0e76f3c042cc0f5c7e6da002cf646f59dc4023962d1d1166343ce53bdad39e17", - "sha256:4001fbb3da19e0df22c47a06d29681a398473af4aa9d745eca525b3b2c2302ab" + "sha256:13af6032476ab4256158220e530411290b3b934dd602bb6dacacbf6d16141f49", + "sha256:1a9a36970004950ee3c519a14c4db945f9d9a63fecb3d593dddcda477331dde9" + ], + "markers": "python_version >= '3.6'", + "version": "==2.5.1" + }, + "scrapy-tw-rental-house": { + "hashes": [ + "sha256:ccf7adce679c092911b70cbaaeda6f257d4a201d5d0300b6db58072cee3facef", + "sha256:d95c724166e9575ddd15fe2b60420394dd1d9b1bebd50f34cf2528baf217d224" ], "index": "pypi", - "version": "==17.0.0" + "version": "==1.1.0" + }, + "service-identity": { + "hashes": [ + "sha256:6e6c6086ca271dc11b033d17c3a8bea9f24ebff920c587da090afc9519419d34", + "sha256:f0b0caac3d40627c3c04d7a51b6e06721857a0e10a8775f2d1d7e72901b3a7db" + ], + "version": "==21.1.0" }, "six": { "hashes": [ - "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", - "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "index": "pypi", - "version": "==1.11.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "version": "==1.16.0" }, "twisted": { - "hashes": [ - "sha256:0da1a7e35d5fcae37bc9c7978970b5feb3bc82822155b8654ec63925c05af75c", - "sha256:716805e624f9396fcc1f47e8aef68e629fd31599a74855b6e1636122c042458d", - "sha256:7bc3cdfd1ca5e5b84c7936db3c2cb2feb7d5b77410e713fd346da095a3b6a1d2" + "extras": [ + "http2" ], - "index": "pypi", - "version": "==17.9.0" - }, - "typed-ast": { - "hashes": [ - "sha256:07d49388d5bf7e863f7fa2f124b1b1d89d8aa0e2f7812faff0a5658c01c59aa1", - "sha256:14bf1522cdee369e8f5581238edac09150c765ec1cb33615855889cf33dcb92d", - "sha256:240296b27397e4e37874abb1df2a608a92df85cf3e2a04d0d4d61055c8305ba6", - "sha256:36d829b31ab67d6fcb30e185ec996e1f72b892255a745d3a82138c97d21ed1cd", - "sha256:37f48d46d733d57cc70fd5f30572d11ab8ed92da6e6b28e024e4a3edfb456e37", - "sha256:4c790331247081ea7c632a76d5b2a265e6d325ecd3179d06e9cf8d46d90dd151", - "sha256:5dcfc2e264bd8a1db8b11a892bd1647154ce03eeba94b461effe68790d8b8e07", - "sha256:7147e2a76c75f0f64c4319886e7639e490fee87c9d25cb1d4faef1d8cf83a440", - "sha256:7703620125e4fb79b64aa52427ec192822e9f45d37d4b6625ab37ef403e1df70", - "sha256:8368f83e93c7156ccd40e49a783a6a6850ca25b556c0fa0240ed0f659d2fe496", - "sha256:84aa6223d71012c68d577c83f4e7db50d11d6b1399a9c779046d75e24bed74ea", - "sha256:85f95aa97a35bdb2f2f7d10ec5bbdac0aeb9dafdaf88e17492da0504de2e6400", - "sha256:8db0e856712f79c45956da0c9a40ca4246abc3485ae0d7ecc86a20f5e4c09abc", - "sha256:9044ef2df88d7f33692ae3f18d3be63dec69c4fb1b5a4a9ac950f9b4ba571606", - "sha256:963c80b583b0661918718b095e02303d8078950b26cc00b5e5ea9ababe0de1fc", - "sha256:987f15737aba2ab5f3928c617ccf1ce412e2e321c77ab16ca5a293e7bbffd581", - "sha256:9ec45db0c766f196ae629e509f059ff05fc3148f9ffd28f3cfe75d4afb485412", - "sha256:9fc0b3cb5d1720e7141d103cf4819aea239f7d136acf9ee4a69b047b7986175a", - "sha256:a2c927c49f2029291fbabd673d51a2180038f8cd5a5b2f290f78c4516be48be2", - "sha256:a38878a223bdd37c9709d07cd357bb79f4c760b29210e14ad0fb395294583787", - "sha256:b4fcdcfa302538f70929eb7b392f536a237cbe2ed9cba88e3bf5027b39f5f77f", - "sha256:c0c74e5579af4b977c8b932f40a5464764b2f86681327410aa028a22d2f54937", - "sha256:c1c876fd795b36126f773db9cbb393f19808edd2637e00fd6caba0e25f2c7b64", - "sha256:c9aadc4924d4b5799112837b226160428524a9a45f830e0d0f184b19e4090487", - "sha256:cc7b98bf58167b7f2db91a4327da24fb93368838eb84a44c472283778fc2446b", - "sha256:cf54cfa843f297991b7388c281cb3855d911137223c6b6d2dd82a47ae5125a41", - "sha256:d003156bb6a59cda9050e983441b7fa2487f7800d76bdc065566b7d728b4581a", - "sha256:d175297e9533d8d37437abc14e8a83cbc68af93cc9c1c59c2c292ec59a0697a3", - "sha256:d746a437cdbca200622385305aedd9aef68e8a645e385cc483bdc5e488f07166", - "sha256:e683e409e5c45d5c9082dc1daf13f6374300806240719f95dc783d1fc942af10" + "hashes": [ + "sha256:13c1d1d2421ae556d91e81e66cf0d4f4e4e1e4a36a0486933bee4305c6a4fb9b", + "sha256:2cd652542463277378b0d349f47c62f20d9306e57d1247baabd6d1d38a109006" ], - "index": "pypi", - "version": "==1.4.2" + "markers": "python_full_version >= '3.6.7'", + "version": "==21.7.0" }, - "w3lib": { + "typing-extensions": { "hashes": [ - "sha256:55994787e93b411c2d659068b51b9998d9d0c05e0df188e6daf8f45836e1ea38", - "sha256:aaf7362464532b1036ab0092e2eee78e8fd7b56787baa9ed4967457b083d011b" + "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e", + "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7", + "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34" ], - "index": "pypi", - "version": "==1.19.0" + "version": "==3.10.0.2" }, - "wrapt": { + "w3lib": { "hashes": [ - "sha256:d4d560d479f2c21e1b5443bbd15fe7ec4b37fe7e53d335d3b9b0a7b1226fe3c6" + "sha256:0161d55537063e00d95a241663ede3395c4c6d7b777972ba2fd58bbab2001e53", + "sha256:0ad6d0203157d61149fd45aaed2e24f53902989c32fc1dccc2e2bfba371560df" ], - "index": "pypi", - "version": "==1.10.11" + "version": "==1.22.0" }, "zope.interface": { "hashes": [ - "sha256:11b068fc9916556f3820f38c2376c28d8e55e4a2c51c34915aaac38b75706d2e", - "sha256:16fe824b3d93ee0629aa1f04848a1b515d6b5dc9e98cc7a04feaa35fdb0de5f1", - "sha256:1d954d557b63124a65f2247ac6ed66fa36df18d1e8538d08c9b432e808a634de", - "sha256:3d033abd27cd54157cf42a3bfd4d8c28d7fc5c6f775df3332307d2632a79925b", - "sha256:4be05f79e952793f31a0c2d6a0672c81a3300315da587ce6a590357595217005", - "sha256:4cb1c56b0356da9a33249ef77a688c47107f54191c12a0055d284b6bee7f447e", - "sha256:5a8cc535f4212b134e66a3e1c6b93b19d453dbad0e2f89d0df2c01deefc8cad9", - "sha256:5d8813e438ab67a793b09e1223742b757dd95a4a64d466855a53cb113cc9c9c4", - "sha256:78321a6c0c8cc6ac928e44ef04d50384bc864a7f5e3c25b84110da2ede83739f", - "sha256:88e3d54e88a601f45d03e2a062d5d16852d20e0863a92c19260ae72e2586378a", - "sha256:8dfdc1588db31895f81bcba6c36dc981b4cf4a526c62eae3745bbfbe102477ef", - "sha256:9902d5fc11309e17cdce6574243dc114b9c30de5c60ab53c90f6e3e962688565", - "sha256:a16a3e07511fb6806bb48c8c661d38cdb91cd4bc6c2b6b0b173e72362ec1ceb4", - "sha256:a21d69de2ee89fc59de93e7a43c0379ecedb5149739ff94e910c2bf0ca18e181", - "sha256:a6375035a4b45d199a8b990e3a2f6b71906c318c56dfc14b2d58350b6ca59392", - "sha256:aef398a5b92e70b8152d2c4850bad0fe185adb50d948f32d0bba5694d82b67c7", - "sha256:b8f3491c9df4f0ffed32b275033e74041f420e5dcdefa4b1500d753c64ef42cf", - "sha256:bd626cd76b7e5cbecac9d3e0dd8f98e3eada15ead95713238a523f877327633d", - "sha256:d6d26d5dfbfd60c65152938fcb82f949e8dada37c041f72916fef6621ba5c5ce", - "sha256:dec19181cf6af58ccb8ba3fa3ca9d4ec555b2f3cb31f589f6e86d15df0926c31", - "sha256:f47d4138405eb67e5f059b9ab74e0a1147adc3277f5fe37d5bae5209b67e89e7", - "sha256:f6868378fffbb8651f1f8a767d17e42aed39926c8f6bb9c56f184022fe6c2090", - "sha256:ff20038fbc0e7ea050a7e28fcb8ae6ed8378a8d08ac70b848ea39960dda86bbf" - ], - "index": "pypi", - "version": "==4.4.3" + "sha256:08f9636e99a9d5410181ba0729e0408d3d8748026ea938f3b970a0249daa8192", + "sha256:0b465ae0962d49c68aa9733ba92a001b2a0933c317780435f00be7ecb959c702", + "sha256:0cba8477e300d64a11a9789ed40ee8932b59f9ee05f85276dbb4b59acee5dd09", + "sha256:0cee5187b60ed26d56eb2960136288ce91bcf61e2a9405660d271d1f122a69a4", + "sha256:0ea1d73b7c9dcbc5080bb8aaffb776f1c68e807767069b9ccdd06f27a161914a", + "sha256:0f91b5b948686659a8e28b728ff5e74b1be6bf40cb04704453617e5f1e945ef3", + "sha256:15e7d1f7a6ee16572e21e3576d2012b2778cbacf75eb4b7400be37455f5ca8bf", + "sha256:17776ecd3a1fdd2b2cd5373e5ef8b307162f581c693575ec62e7c5399d80794c", + "sha256:194d0bcb1374ac3e1e023961610dc8f2c78a0f5f634d0c737691e215569e640d", + "sha256:1c0e316c9add0db48a5b703833881351444398b04111188069a26a61cfb4df78", + "sha256:205e40ccde0f37496904572035deea747390a8b7dc65146d30b96e2dd1359a83", + "sha256:273f158fabc5ea33cbc936da0ab3d4ba80ede5351babc4f577d768e057651531", + "sha256:2876246527c91e101184f63ccd1d716ec9c46519cc5f3d5375a3351c46467c46", + "sha256:2c98384b254b37ce50eddd55db8d381a5c53b4c10ee66e1e7fe749824f894021", + "sha256:2e5a26f16503be6c826abca904e45f1a44ff275fdb7e9d1b75c10671c26f8b94", + "sha256:334701327f37c47fa628fc8b8d28c7d7730ce7daaf4bda1efb741679c2b087fc", + "sha256:3748fac0d0f6a304e674955ab1365d515993b3a0a865e16a11ec9d86fb307f63", + "sha256:3c02411a3b62668200910090a0dff17c0b25aaa36145082a5a6adf08fa281e54", + "sha256:3dd4952748521205697bc2802e4afac5ed4b02909bb799ba1fe239f77fd4e117", + "sha256:3f24df7124c323fceb53ff6168da70dbfbae1442b4f3da439cd441681f54fe25", + "sha256:469e2407e0fe9880ac690a3666f03eb4c3c444411a5a5fddfdabc5d184a79f05", + "sha256:4de4bc9b6d35c5af65b454d3e9bc98c50eb3960d5a3762c9438df57427134b8e", + "sha256:5208ebd5152e040640518a77827bdfcc73773a15a33d6644015b763b9c9febc1", + "sha256:52de7fc6c21b419078008f697fd4103dbc763288b1406b4562554bd47514c004", + "sha256:5bb3489b4558e49ad2c5118137cfeaf59434f9737fa9c5deefc72d22c23822e2", + "sha256:5dba5f530fec3f0988d83b78cc591b58c0b6eb8431a85edd1569a0539a8a5a0e", + "sha256:5dd9ca406499444f4c8299f803d4a14edf7890ecc595c8b1c7115c2342cadc5f", + "sha256:5f931a1c21dfa7a9c573ec1f50a31135ccce84e32507c54e1ea404894c5eb96f", + "sha256:63b82bb63de7c821428d513607e84c6d97d58afd1fe2eb645030bdc185440120", + "sha256:66c0061c91b3b9cf542131148ef7ecbecb2690d48d1612ec386de9d36766058f", + "sha256:6f0c02cbb9691b7c91d5009108f975f8ffeab5dff8f26d62e21c493060eff2a1", + "sha256:71aace0c42d53abe6fc7f726c5d3b60d90f3c5c055a447950ad6ea9cec2e37d9", + "sha256:7d97a4306898b05404a0dcdc32d9709b7d8832c0c542b861d9a826301719794e", + "sha256:7df1e1c05304f26faa49fa752a8c690126cf98b40b91d54e6e9cc3b7d6ffe8b7", + "sha256:8270252effc60b9642b423189a2fe90eb6b59e87cbee54549db3f5562ff8d1b8", + "sha256:867a5ad16892bf20e6c4ea2aab1971f45645ff3102ad29bd84c86027fa99997b", + "sha256:877473e675fdcc113c138813a5dd440da0769a2d81f4d86614e5d62b69497155", + "sha256:8892f89999ffd992208754851e5a052f6b5db70a1e3f7d54b17c5211e37a98c7", + "sha256:9a9845c4c6bb56e508651f005c4aeb0404e518c6f000d5a1123ab077ab769f5c", + "sha256:a1e6e96217a0f72e2b8629e271e1b280c6fa3fe6e59fa8f6701bec14e3354325", + "sha256:a8156e6a7f5e2a0ff0c5b21d6bcb45145efece1909efcbbbf48c56f8da68221d", + "sha256:a9506a7e80bcf6eacfff7f804c0ad5350c8c95b9010e4356a4b36f5322f09abb", + "sha256:af310ec8335016b5e52cae60cda4a4f2a60a788cbb949a4fbea13d441aa5a09e", + "sha256:b0297b1e05fd128d26cc2460c810d42e205d16d76799526dfa8c8ccd50e74959", + "sha256:bf68f4b2b6683e52bec69273562df15af352e5ed25d1b6641e7efddc5951d1a7", + "sha256:d0c1bc2fa9a7285719e5678584f6b92572a5b639d0e471bb8d4b650a1a910920", + "sha256:d4d9d6c1a455d4babd320203b918ccc7fcbefe308615c521062bc2ba1aa4d26e", + "sha256:db1fa631737dab9fa0b37f3979d8d2631e348c3b4e8325d6873c2541d0ae5a48", + "sha256:dd93ea5c0c7f3e25335ab7d22a507b1dc43976e1345508f845efc573d3d779d8", + "sha256:f44e517131a98f7a76696a7b21b164bcb85291cee106a23beccce454e1f433a4", + "sha256:f7ee479e96f7ee350db1cf24afa5685a5899e2b34992fb99e1f7c1b0b758d263" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==5.4.0" } }, "develop": { "astroid": { "hashes": [ - "sha256:8704779744963d56a2625ec2949eb150bd499fc099510161ddbb2b64e2d98138", - "sha256:add3fd690e7c1fe92436d17be461feeaa173e6f33e0789734310334da0f30027" + "sha256:0755c998e7117078dcb7d0bda621391dd2a85da48052d948c7411ab187325346", + "sha256:1e83a69fd51b013ebf5912d26b9338d6643a55fec2f20c787792680610eed4a2" ], - "index": "pypi", - "version": "==2.0" + "markers": "python_version ~= '3.6'", + "version": "==2.8.4" }, "isort": { "hashes": [ - "sha256:1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af", - "sha256:b9c40e9750f3d77e6e4d441d8b0266cf555e7cdabdcff33c4fd06366ca761ef8", - "sha256:ec9ef8f4a9bc6f71eec99e1806bfa2de401650d996c59330782b89a5555c1497" + "sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899", + "sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2" ], - "index": "pypi", - "version": "==4.3.4" + "markers": "python_full_version >= '3.6.1' and python_version < '4.0'", + "version": "==5.9.3" }, "lazy-object-proxy": { "hashes": [ - "sha256:0ce34342b419bd8f018e6666bfef729aec3edf62345a53b537a4dcc115746a33", - "sha256:1b668120716eb7ee21d8a38815e5eb3bb8211117d9a90b0f8e21722c0758cc39", - "sha256:209615b0fe4624d79e50220ce3310ca1a9445fd8e6d3572a896e7f9146bbf019", - "sha256:27bf62cb2b1a2068d443ff7097ee33393f8483b570b475db8ebf7e1cba64f088", - "sha256:27ea6fd1c02dcc78172a82fc37fcc0992a94e4cecf53cb6d73f11749825bd98b", - "sha256:2c1b21b44ac9beb0fc848d3993924147ba45c4ebc24be19825e57aabbe74a99e", - "sha256:2df72ab12046a3496a92476020a1a0abf78b2a7db9ff4dc2036b8dd980203ae6", - "sha256:320ffd3de9699d3892048baee45ebfbbf9388a7d65d832d7e580243ade426d2b", - "sha256:50e3b9a464d5d08cc5227413db0d1c4707b6172e4d4d915c1c70e4de0bbff1f5", - "sha256:5276db7ff62bb7b52f77f1f51ed58850e315154249aceb42e7f4c611f0f847ff", - "sha256:61a6cf00dcb1a7f0c773ed4acc509cb636af2d6337a08f362413c76b2b47a8dd", - "sha256:6ae6c4cb59f199d8827c5a07546b2ab7e85d262acaccaacd49b62f53f7c456f7", - "sha256:7661d401d60d8bf15bb5da39e4dd72f5d764c5aff5a86ef52a042506e3e970ff", - "sha256:7bd527f36a605c914efca5d3d014170b2cb184723e423d26b1fb2fd9108e264d", - "sha256:7cb54db3535c8686ea12e9535eb087d32421184eacc6939ef15ef50f83a5e7e2", - "sha256:7f3a2d740291f7f2c111d86a1c4851b70fb000a6c8883a59660d95ad57b9df35", - "sha256:81304b7d8e9c824d058087dcb89144842c8e0dea6d281c031f59f0acf66963d4", - "sha256:933947e8b4fbe617a51528b09851685138b49d511af0b6c0da2539115d6d4514", - "sha256:94223d7f060301b3a8c09c9b3bc3294b56b2188e7d8179c762a1cda72c979252", - "sha256:ab3ca49afcb47058393b0122428358d2fbe0408cf99f1b58b295cfeb4ed39109", - "sha256:bd6292f565ca46dee4e737ebcc20742e3b5be2b01556dafe169f6c65d088875f", - "sha256:cb924aa3e4a3fb644d0c463cad5bc2572649a6a3f68a7f8e4fbe44aaa6d77e4c", - "sha256:d0fc7a286feac9077ec52a927fc9fe8fe2fabab95426722be4c953c9a8bede92", - "sha256:ddc34786490a6e4ec0a855d401034cbd1242ef186c20d79d2166d6a4bd449577", - "sha256:e34b155e36fa9da7e1b7c738ed7767fc9491a62ec6af70fe9da4a057759edc2d", - "sha256:e5b9e8f6bda48460b7b143c3821b21b452cb3a835e6bbd5dd33aa0c8d3f5137d", - "sha256:e81ebf6c5ee9684be8f2c87563880f93eedd56dd2b6146d8a725b50b7e5adb0f", - "sha256:eb91be369f945f10d3a49f5f9be8b3d0b93a4c2be8f8a5b83b0571b8123e0a7a", - "sha256:f460d1ceb0e4a5dcb2a652db0904224f367c9b3c1470d5a7683c0480e582468b" - ], - "index": "pypi", - "version": "==1.3.1" + "sha256:17e0967ba374fc24141738c69736da90e94419338fd4c7c7bef01ee26b339653", + "sha256:1fee665d2638491f4d6e55bd483e15ef21f6c8c2095f235fef72601021e64f61", + "sha256:22ddd618cefe54305df49e4c069fa65715be4ad0e78e8d252a33debf00f6ede2", + "sha256:24a5045889cc2729033b3e604d496c2b6f588c754f7a62027ad4437a7ecc4837", + "sha256:410283732af311b51b837894fa2f24f2c0039aa7f220135192b38fcc42bd43d3", + "sha256:4732c765372bd78a2d6b2150a6e99d00a78ec963375f236979c0626b97ed8e43", + "sha256:489000d368377571c6f982fba6497f2aa13c6d1facc40660963da62f5c379726", + "sha256:4f60460e9f1eb632584c9685bccea152f4ac2130e299784dbaf9fae9f49891b3", + "sha256:5743a5ab42ae40caa8421b320ebf3a998f89c85cdc8376d6b2e00bd12bd1b587", + "sha256:85fb7608121fd5621cc4377a8961d0b32ccf84a7285b4f1d21988b2eae2868e8", + "sha256:9698110e36e2df951c7c36b6729e96429c9c32b3331989ef19976592c5f3c77a", + "sha256:9d397bf41caad3f489e10774667310d73cb9c4258e9aed94b9ec734b34b495fd", + "sha256:b579f8acbf2bdd9ea200b1d5dea36abd93cabf56cf626ab9c744a432e15c815f", + "sha256:b865b01a2e7f96db0c5d12cfea590f98d8c5ba64ad222300d93ce6ff9138bcad", + "sha256:bf34e368e8dd976423396555078def5cfc3039ebc6fc06d1ae2c5a65eebbcde4", + "sha256:c6938967f8528b3668622a9ed3b31d145fab161a32f5891ea7b84f6b790be05b", + "sha256:d1c2676e3d840852a2de7c7d5d76407c772927addff8d742b9808fe0afccebdf", + "sha256:d7124f52f3bd259f510651450e18e0fd081ed82f3c08541dffc7b94b883aa981", + "sha256:d900d949b707778696fdf01036f58c9876a0d8bfe116e8d220cfd4b15f14e741", + "sha256:ebfd274dcd5133e0afae738e6d9da4323c3eb021b3e13052d8cbd0e457b1256e", + "sha256:ed361bb83436f117f9917d282a456f9e5009ea12fd6de8742d1a4752c3017e93", + "sha256:f5144c75445ae3ca2057faac03fda5a902eff196702b0a24daf1d6ce0650514b" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==1.6.0" }, "mccabe": { "hashes": [ "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" ], - "index": "pypi", "version": "==0.6.1" }, "platformdirs": { @@ -598,7 +597,7 @@ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", "version": "==0.10.2" }, "typing-extensions": { @@ -607,15 +606,57 @@ "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7", "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34" ], - "markers": "python_version < '3.10'", "version": "==3.10.0.2" }, "wrapt": { "hashes": [ - "sha256:d4d560d479f2c21e1b5443bbd15fe7ec4b37fe7e53d335d3b9b0a7b1226fe3c6" - ], - "index": "pypi", - "version": "==1.10.11" + "sha256:0473d1558b93e314e84313cc611f6c86be779369f9d3734302bf185a4d2625b1", + "sha256:0582180566e7a13030f896c2f1ac6a56134ab5f3c3f4c5538086f758b1caf3f2", + "sha256:15eee0e6fd07f48af2f66d0e6f2ff1916ffe9732d464d5e2390695296872cad9", + "sha256:1c5c4cf188b5643a97e87e2110bbd4f5bc491d54a5b90633837b34d5df6a03fe", + "sha256:1eb657ed84f4d3e6ad648483c8a80a0cf0a78922ef94caa87d327e2e1ad49b48", + "sha256:22142afab65daffc95863d78effcbd31c19a8003eca73de59f321ee77f73cadb", + "sha256:283e402e5357e104ac1e3fba5791220648e9af6fb14ad7d9cc059091af2b31d2", + "sha256:3de7b4d3066cc610054e7aa2c005645e308df2f92be730aae3a47d42e910566a", + "sha256:3e0d16eedc242d01a6f8cf0623e9cdc3b869329da3f97a15961d8864111d8cf0", + "sha256:3e33c138d1e3620b1e0cc6fd21e46c266393ed5dae0d595b7ed5a6b73ed57aa0", + "sha256:3f87042623530bcffea038f824b63084180513c21e2e977291a9a7e65a66f13b", + "sha256:53c6706a1bcfb6436f1625511b95b812798a6d2ccc51359cd791e33722b5ea32", + "sha256:593cb049ce1c391e0288523b30426c4430b26e74c7e6f6e2844bd99ac7ecc831", + "sha256:6e6d1a8eeef415d7fb29fe017de0e48f45e45efd2d1bfda28fc50b7b330859ef", + "sha256:724ed2bc9c91a2b9026e5adce310fa60c6e7c8760b03391445730b9789b9d108", + "sha256:728e2d9b7a99dd955d3426f237b940fc74017c4a39b125fec913f575619ddfe9", + "sha256:7574de567dcd4858a2ffdf403088d6df8738b0e1eabea220553abf7c9048f59e", + "sha256:8164069f775c698d15582bf6320a4f308c50d048c1c10cf7d7a341feaccf5df7", + "sha256:81a4cf257263b299263472d669692785f9c647e7dca01c18286b8f116dbf6b38", + "sha256:82223f72eba6f63eafca87a0f614495ae5aa0126fe54947e2b8c023969e9f2d7", + "sha256:8318088860968c07e741537030b1abdd8908ee2c71fbe4facdaade624a09e006", + "sha256:83f2793ec6f3ef513ad8d5b9586f5ee6081cad132e6eae2ecb7eac1cc3decae0", + "sha256:87ee3c73bdfb4367b26c57259995935501829f00c7b3eed373e2ad19ec21e4e4", + "sha256:8860c8011a6961a651b1b9f46fdbc589ab63b0a50d645f7d92659618a3655867", + "sha256:9adee1891253670575028279de8365c3a02d3489a74a66d774c321472939a0b1", + "sha256:a0cdedf681db878416c05e1831ec69691b0e6577ac7dca9d4f815632e3549580", + "sha256:a70d876c9aba12d3bd7f8f1b05b419322c6789beb717044eea2c8690d35cb91b", + "sha256:ada5e29e59e2feb710589ca1c79fd989b1dd94d27079dc1d199ec954a6ecc724", + "sha256:af9480de8e63c5f959a092047aaf3d7077422ded84695b3398f5d49254af3e90", + "sha256:b20703356cae1799080d0ad15085dc3213c1ac3f45e95afb9f12769b98231528", + "sha256:bc85d17d90201afd88e3d25421da805e4e135012b5d1f149e4de2981394b2a52", + "sha256:bff0a59387a0a2951cb869251257b6553663329a1b5525b5226cab8c88dcbe7e", + "sha256:c65e623ea7556e39c4f0818200a046cbba7575a6b570ff36122c276fdd30ab0a", + "sha256:c6ee5f8734820c21b9b8bf705e99faba87f21566d20626568eeb0d62cbeaf23c", + "sha256:c7ac2c7a8e34bd06710605b21dd1f3576764443d68e069d2afba9b116014d072", + "sha256:ccb34ce599cab7f36a4c90318697ead18312c67a9a76327b3f4f902af8f68ea1", + "sha256:d0d717e10f952df7ea41200c507cc7e24458f4c45b56c36ad418d2e79dacd1d4", + "sha256:d90520616fce71c05dedeac3a0fe9991605f0acacd276e5f821842e454485a70", + "sha256:dca56cc5963a5fd7c2aa8607017753f534ee514e09103a6c55d2db70b50e7447", + "sha256:df3eae297a5f1594d1feb790338120f717dac1fa7d6feed7b411f87e0f2401c7", + "sha256:e634136f700a21e1fcead0c137f433dde928979538c14907640607d43537d468", + "sha256:fbad5ba74c46517e6488149514b2e2348d40df88cd6b52a83855b7a8bf04723f", + "sha256:fbe6aebc9559fed7ea27de51c2bf5c25ba2a4156cf0017556f72883f2496ee9a", + "sha256:fdede980273aeca591ad354608778365a3a310e0ecdd7a3587b38bc5be9b1808" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.13.2" } } } diff --git a/README.md b/README.md index 990b4f4f..f3d55f03 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,9 @@ #### 環境需求 -1. Python3.5+ +1. Python3.8+ 2. pip -3. pipenv (選用) +3. pipenv 4. [PostgreSQL](https://www.postgresql.org) 9.5+ - 使用 PostgresSQL 以外的資料庫時,爬蟲可以順利執行,但使用內建的匯出指令時無法用 `-u --unique` 去除重複物件 5. GeoDjango ,目前[主要的關聯式資料庫都有支援](https://docs.djangoproject.com/en/2.1/ref/contrib/gis/db-api/) @@ -39,13 +39,9 @@ #### 資料庫設定 ```sh -# 使用 virtualenv 安裝相關套件 -virtualenv -p python3 . -pip install -r requirements.txt -. ./bin/activate - -# 也可使用 pipenv 安裝相關套件 +# 使用 pipenv 安裝相關套件 pipenv install +pipenv shell cd backend # 設定資料庫(預設使用 sqlite) diff --git a/crawler/crawler/pipelines.py b/crawler/crawler/pipelines.py index 46fc68c7..8edad5b4 100644 --- a/crawler/crawler/pipelines.py +++ b/crawler/crawler/pipelines.py @@ -8,14 +8,24 @@ import logging import traceback from django.utils import timezone -from rental.models import HouseTS, House, HouseEtc +from rental.models import HouseTS, House, HouseEtc, Vendor, Author from rental.enums import DealStatusType -from .items import GenericHouseItem, RawHouseItem +from scrapy_twrh.items import GenericHouseItem, RawHouseItem +from django.contrib.gis.geos import Point from crawler.utils import now_tuple class CrawlerPipeline(object): + def __init__(self) -> None: + super().__init__() + self.vendorMap = {} + for vendor in Vendor.objects.all(): + self.vendorMap[vendor.name] = vendor + + def item_vendor (self, item): + return self.vendorMap[item['vendor']] + def process_item(self, item, spider): y, m, d, h = now_tuple() @@ -24,13 +34,13 @@ def process_item(self, item, spider): house, created = House.objects.get_or_create( vendor_house_id=item['house_id'], - vendor=item['vendor'] + vendor=self.item_vendor(item) ) house_etc, created = HouseEtc.objects.get_or_create( house=house, vendor_house_id=item['house_id'], - vendor=item['vendor'] + vendor=self.item_vendor(item) ) if 'raw' in item: @@ -49,12 +59,12 @@ def process_item(self, item, spider): house_ts, created = HouseTS.objects.get_or_create( year=y, month=m, day=d, hour=h, vendor_house_id=item['vendor_house_id'], - vendor=item['vendor'] + vendor=self.item_vendor(item) ) house, created = House.objects.get_or_create( vendor_house_id=item['vendor_house_id'], - vendor=item['vendor'] + vendor=self.item_vendor(item) ) to_db = item.copy() @@ -69,6 +79,12 @@ def process_item(self, item, spider): house.deal_status == DealStatusType.DEAL: should_rollback_house_deal_status = True + if 'rough_coordinate' in to_db: + to_db['rough_coordinate'] = Point(to_db['rough_coordinate'], srid=4326) + if 'author' in to_db: + author_info, created = Author.objects.get_or_create(truth=to_db['author']) + to_db['author'] = author_info + for attr in to_db: setattr(house_ts, attr, to_db[attr]) setattr(house, attr, to_db[attr]) diff --git a/crawler/crawler/spiders/list591_spider.py b/crawler/crawler/spiders/list591_spider.py index 3f704836..181a1445 100644 --- a/crawler/crawler/spiders/list591_spider.py +++ b/crawler/crawler/spiders/list591_spider.py @@ -1,171 +1,53 @@ -import json -import scrapy -from ..items import RawHouseItem, GenericHouseItem -from rental.enums import PropertyType, TopRegionType, SubRegionType -from .house_spider import HouseSpider -from .all_591_cities import all_591_cities +from scrapy import Request +from scrapy_twrh.spiders.rental591 import Rental591Spider, util +from .persist_queue import PersistQueue -class List591Spider(HouseSpider): - ENDPOINT = 'https://rent.591.com.tw/home/search/rsList?is_new_list=1&type=1&kind=0&searchtype=1' - SESSION_ENDPOINT = 'https://rent.591.com.tw/?kind=0®ion=6' - N_PAGE = 30 +class List591Spider(Rental591Spider): name = 'list591' def __init__(self, **kwargs): super().__init__( - vendor='591 租屋網', - is_list=True, - request_generator=self.gen_request_params, - response_parser=self.parse_list, + start_list=self.start_list_from_persist_queue, + # parse_list=self.parse_list_and_stop, **kwargs ) - self.csrf_token = None - self.session_token = None - - def gen_request_params(self, seed): - city = seed['region'] - - return { - 'url': "{}®ion={}&firstRow={}".format( - self.ENDPOINT, - city['id'], - seed['page'] * self.N_PAGE - ), - 'headers': { - 'Cookie': 'urlJumpIp={}; 591_new_session={};'.format(city['id'], self.session_token), - 'X-CSRF-TOKEN': self.csrf_token - }, - 'priority': self.clean_number(city['id']), - 'meta': {'seed': seed} - } - - def start_requests(self): - # 591 require a valid session to start request, #27 - yield scrapy.Request( - url=self.SESSION_ENDPOINT, - dont_filter=True, - callback=self.handle_session_init, + self.persist_queue = PersistQueue( + vendor='591 租屋網', + is_list=True, + logger=self.logger, + seed_parser=self.parse_seed, + generate_request_args=self.gen_list_request_args, + parse_response=self.parse_list_and_stop ) - def handle_session_init(self, response): - self.csrf_token = response.css('meta[name="csrf-token"]').xpath('@content').extract_first() + def parse_seed (self, seed): + return util.ListRequestMeta(*seed) - for cookie in response.headers.getlist('Set-Cookie'): - cookie_tokens = cookie.decode('utf-8').split('; ') - if cookie_tokens and cookie_tokens[0].startswith('591_new_session='): - self.session_token = cookie_tokens[0].split('=')[1] - break - - if not self.has_request() and not self.has_record(): - for region in all_591_cities: - # let's do DFS - self.gen_persist_request({ - 'region': region, - 'page': 0 - }) + def start_list_from_persist_queue (self): + if not self.persist_queue.has_request() and not self.persist_queue.has_record(): + for city in self.target_cities: + # let's do BFS + self.persist_queue.gen_persist_request([ + city['id'], + city['city'], + 0 + ]) while True: - next_request = self.next_request() + next_request = self.persist_queue.next_request() if next_request: yield next_request else: break - def get_val(self, house, regular_attr, top_attr=None, clean_number=False): - ret = None - - if regular_attr in house: - ret = house[regular_attr] - elif top_attr in house: - ret = house[top_attr] - - if clean_number and ret is not None: - ret = self.clean_number(ret) - - return ret - - def gen_shared_attrs(self, house, seed={}): - house_id = self.get_val(house, 'id', 'post_id') - - url = '{}/rent-detail-{}.html'.format( - self.vendor.site_url, house_id) - - if 'region_name' in house: - # topData doesn't contain region_name for some reason.. - top_region = self.get_enum( - TopRegionType, house_id, house['region_name']) - else: - top_region = self.get_enum( - TopRegionType, house_id, seed['region']['city']) - - sub_region = self.get_enum( - SubRegionType, - house_id, - '{}{}'.format( - TopRegionType(top_region).name, - self.get_val(house, 'section_name', 'section_str') - ) - ) - - property_type = self.get_enum( - PropertyType, house_id, self.get_val(house, 'kind_name', 'kind_str')) - - generic_house = { - 'vendor': self.vendor, - 'vendor_house_id': house_id, - 'vendor_house_url': url, - 'imgs': [self.get_val(house, 'cover', 'img_src')], - 'top_region': top_region, - 'sub_region': sub_region, - 'property_type': property_type, - 'floor_ping': self.clean_number(house['area']), - 'floor': self.get_val(house, 'floor', clean_number=True), - 'total_floor': self.get_val(house, 'allfloor', clean_number=True), - 'monthly_price': self.get_val(house, 'price', clean_number=True) - } - - # 99 and 100 are magic number in 591... - # https://github.com/g0v/tw-rental-house-data/issues/11 - if generic_house['floor'] == 99: - generic_house['floor'] = 0 - elif generic_house['floor'] == 100 and generic_house['total_floor']: - generic_house['floor'] = generic_house['total_floor']+1 - - empty_keys = [] - for key in generic_house: - if generic_house[key] is None: - empty_keys.append(key) - - for key in empty_keys: - del generic_house[key] - - return generic_house - - def parse_list(self, response): - data = json.loads(response.text) - count = self.clean_number(data['records']) - page = response.meta['seed']['page'] - - if page == 0: - cur_page = 1 - while cur_page * self.N_PAGE < count: - self.gen_persist_request({ - 'region': response.meta['seed']['region'], - 'page': cur_page - }) - cur_page += 1 - - houses = data['data']['topData'] + data['data']['data'] - - for house in houses: - house['is_vip'] = 'id' not in house - yield RawHouseItem( - house_id=house['post_id'], - vendor=self.vendor, - is_list=True, - raw=json.dumps(house, ensure_ascii=False) - ) - yield GenericHouseItem(**self.gen_shared_attrs(house, response.meta['seed'])) - + def parse_list_and_stop(self, response): + for item in self.default_parse_list(response): + if isinstance(item, Request): + meta = item.meta['rental'] + if isinstance(meta, util.ListRequestMeta): + self.persist_queue.gen_persist_request(meta) + continue + else: + yield item yield True diff --git a/crawler/crawler/spiders/persist_queue.py b/crawler/crawler/spiders/persist_queue.py new file mode 100644 index 00000000..a4c4f1a2 --- /dev/null +++ b/crawler/crawler/spiders/persist_queue.py @@ -0,0 +1,178 @@ +import uuid +import scrapy +import traceback +from django.db import connection +from rental.models import HouseTS, Vendor +from rental import models +from crawlerrequest.models import RequestTS +from crawlerrequest.enums import RequestType + +class PersistQueue(object): + queue_length = 30 + n_live_spider = 0 + + def __init__( + self, + vendor, + is_list, + logger, + seed_parser, + generate_request_args, + parse_response, + **kwargs + ): + super().__init__(**kwargs) + y = models.current_year() + m = models.current_month() + d = models.current_day() + h = models.current_stepped_hour() + + self.spider_id = str(uuid.uuid4()) + self.logger = logger + self.seed_parser = seed_parser + self.generate_request_args = generate_request_args + self.parse_response = parse_response + try: + self.vendor = Vendor.objects.get( + name = vendor + ) + except Vendor.DoesNotExist: + raise Exception('Vendor "{}" is not defined.'.format(vendor)) + + if is_list: + self.request_type = RequestType.LIST + else: + self.request_type = RequestType.DETAIL + + self.ts = { + 'y': y, + 'm': m, + 'd': d, + 'h': h + } + + def has_request(self): + undone_requests = RequestTS.objects.filter( + year = self.ts['y'], + month = self.ts['m'], + day = self.ts['d'], + hour = self.ts['h'], + # Ignore pending request since we will generate new one and rerun it anyway + is_pending = False, + vendor = self.vendor, + request_type = self.request_type + )[:1] + + return undone_requests.count() > 0 + + def has_record(self): + today_houses = HouseTS.objects.filter( + year = self.ts['y'], + month = self.ts['m'], + day = self.ts['d'], + hour = self.ts['h'], + vendor = self.vendor + )[:1] + + return today_houses.count() > 0 + + def gen_persist_request(self, seed): + RequestTS.objects.create( + request_type=self.request_type, + vendor=self.vendor, + seed=seed + ) + + def next_request(self): + if self.n_live_spider >= self.queue_length: + # At most self.queue_length in memory + return None + + # #21, temp workaround to get next_request ASAP + # this operation is still not atomic, different session may get the same request + with connection.cursor() as cursor: + sql = ( + 'update request_ts set owner = %s where id = (' + 'select id from request_ts where year = %s and month = %s ' + 'and day = %s and hour = %s and vendor_id = %s and request_type = %s ' + 'and is_pending = %s and owner is null order by id limit 1)' + ) + a = cursor.execute(sql, [ + self.spider_id, + self.ts['y'], + self.ts['m'], + self.ts['d'], + self.ts['h'], + self.vendor.id, + self.request_type.value, + False + ]) + + next_row = RequestTS.objects.filter( + year=self.ts['y'], + month=self.ts['m'], + day=self.ts['d'], + hour=self.ts['h'], + vendor=self.vendor, + request_type=self.request_type, + is_pending=False, + owner=self.spider_id + ).order_by('created') + + next_row = next_row.first() + + if next_row is None: + return None + + next_row.is_pending = True + next_row.save() + self.n_live_spider += 1 + + rental_meta = self.seed_parser(next_row.seed) + + request_args = { + **self.generate_request_args(rental_meta), + 'callback': self.parser_wrapper, + 'meta': { + 'rental': rental_meta, + 'db_request': next_row + } + } + + return scrapy.Request(**request_args) + + def parser_wrapper(self, response): + db_request = response.meta['db_request'] + db_request.last_status = response.status + db_request.save() + + seed = response.meta.get('seed', {}) + + try: + for item in self.parse_response(response): + if item is True: + db_request.delete() + else: + yield item + except: + self.logger.error( + 'Parser error in {} when handle meta {}. [{}] - {:.128}'.format( + self.vendor.name, + seed, + response.status, + response.text + ) + ) + traceback.print_exc() + + self.n_live_spider -= 1 + # quick fix for concurrency issue + mercy = 10 + while True: + next_request = self.next_request() + if next_request: + yield next_request + elif mercy < 0: + break + else: + mercy -= 1 diff --git a/scrapy-package/Pipfile b/scrapy-package/Pipfile new file mode 100644 index 00000000..585f11ca --- /dev/null +++ b/scrapy-package/Pipfile @@ -0,0 +1,13 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +scrapy-tw-rental-house = {editable = true, path = "."} + +[dev-packages] +twine = "*" + +[requires] +python_version = "3.8" diff --git a/scrapy-package/Pipfile.lock b/scrapy-package/Pipfile.lock new file mode 100644 index 00000000..65b21034 --- /dev/null +++ b/scrapy-package/Pipfile.lock @@ -0,0 +1,728 @@ +{ + "_meta": { + "hash": { + "sha256": "c13a615bd60641b606769765624d68df345ed212160e235638a26de2cb4ace35" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.8" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "attrs": { + "hashes": [ + "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1", + "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==21.2.0" + }, + "automat": { + "hashes": [ + "sha256:7979803c74610e11ef0c0d68a2942b152df52da55336e0c9d58daf1831cbdf33", + "sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111" + ], + "version": "==20.2.0" + }, + "cffi": { + "hashes": [ + "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3", + "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2", + "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636", + "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20", + "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728", + "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27", + "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66", + "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443", + "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0", + "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7", + "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39", + "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605", + "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a", + "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37", + "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029", + "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139", + "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc", + "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df", + "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14", + "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880", + "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2", + "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a", + "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e", + "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474", + "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024", + "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8", + "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0", + "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e", + "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a", + "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e", + "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032", + "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6", + "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e", + "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b", + "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e", + "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954", + "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962", + "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c", + "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4", + "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55", + "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962", + "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023", + "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c", + "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6", + "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8", + "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382", + "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7", + "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc", + "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997", + "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796" + ], + "version": "==1.15.0" + }, + "constantly": { + "hashes": [ + "sha256:586372eb92059873e29eba4f9dec8381541b4d3834660707faf8ba59146dfc35", + "sha256:dd2fa9d6b1a51a83f0d7dd76293d734046aa176e384bf6e33b7e44880eb37c5d" + ], + "version": "==15.1.0" + }, + "cryptography": { + "hashes": [ + "sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6", + "sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6", + "sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c", + "sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999", + "sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e", + "sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992", + "sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d", + "sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588", + "sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa", + "sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d", + "sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd", + "sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d", + "sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953", + "sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2", + "sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8", + "sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6", + "sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9", + "sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6", + "sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad", + "sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76" + ], + "markers": "python_version >= '3.6'", + "version": "==35.0.0" + }, + "cssselect": { + "hashes": [ + "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf", + "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.1.0" + }, + "h2": { + "hashes": [ + "sha256:61e0f6601fa709f35cdb730863b4e5ec7ad449792add80d1410d4174ed139af5", + "sha256:875f41ebd6f2c44781259005b157faed1a5031df3ae5aa7bcb4628a6c0782f14" + ], + "version": "==3.2.0" + }, + "hpack": { + "hashes": [ + "sha256:0edd79eda27a53ba5be2dfabf3b15780928a0dff6eb0c60a3d6767720e970c89", + "sha256:8eec9c1f4bfae3408a3f30500261f7e6a65912dc138526ea054f9ad98892e9d2" + ], + "version": "==3.0.0" + }, + "hyperframe": { + "hashes": [ + "sha256:5187962cb16dcc078f23cb5a4b110098d546c3f41ff2d4038a9896893bbd0b40", + "sha256:a9f5c17f2cc3c719b917c4f33ed1c61bd1f8dfac4b1bd23b7c80b3400971b41f" + ], + "version": "==5.2.0" + }, + "hyperlink": { + "hashes": [ + "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b", + "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4" + ], + "version": "==21.0.0" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3.5'", + "version": "==3.3" + }, + "incremental": { + "hashes": [ + "sha256:02f5de5aff48f6b9f665d99d48bfc7ec03b6e3943210de7cfc88856d755d6f57", + "sha256:92014aebc6a20b78a8084cdd5645eeaa7f74b8933f70fa3ada2cfbd1e3b54321" + ], + "version": "==21.3.0" + }, + "itemadapter": { + "hashes": [ + "sha256:695809a4e2f42174f0392dd66c2ceb2b2454d3ebbf65a930e5c85910d8d88d8f", + "sha256:f05df8da52619da4b8c7f155d8a15af19083c0c7ad941d8c1de799560ad994ca" + ], + "markers": "python_version >= '3.6'", + "version": "==0.4.0" + }, + "itemloaders": { + "hashes": [ + "sha256:1277cd8ca3e4c02dcdfbc1bcae9134ad89acfa6041bd15b4561c6290203a0c96", + "sha256:4cb46a0f8915e910c770242ae3b60b1149913ed37162804f1e40e8535d6ec497" + ], + "markers": "python_version >= '3.6'", + "version": "==1.0.4" + }, + "jmespath": { + "hashes": [ + "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9", + "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f" + ], + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.10.0" + }, + "lxml": { + "hashes": [ + "sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d", + "sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3", + "sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2", + "sha256:1b38116b6e628118dea5b2186ee6820ab138dbb1e24a13e478490c7db2f326ae", + "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f", + "sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927", + "sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3", + "sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7", + "sha256:3082c518be8e97324390614dacd041bb1358c882d77108ca1957ba47738d9d59", + "sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f", + "sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade", + "sha256:36108c73739985979bf302006527cf8a20515ce444ba916281d1c43938b8bb96", + "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468", + "sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b", + "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4", + "sha256:4c61b3a0db43a1607d6264166b230438f85bfed02e8cff20c22e564d0faff354", + "sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83", + "sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04", + "sha256:5c8c163396cc0df3fd151b927e74f6e4acd67160d6c33304e805b84293351d16", + "sha256:64812391546a18896adaa86c77c59a4998f33c24788cadc35789e55b727a37f4", + "sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791", + "sha256:6f12e1427285008fd32a6025e38e977d44d6382cf28e7201ed10d6c1698d2a9a", + "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51", + "sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1", + "sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a", + "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f", + "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee", + "sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec", + "sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969", + "sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28", + "sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a", + "sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa", + "sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106", + "sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d", + "sha256:c1a40c06fd5ba37ad39caa0b3144eb3772e813b5fb5b084198a985431c2f1e8d", + "sha256:c47ff7e0a36d4efac9fd692cfa33fbd0636674c102e9e8d9b26e1b93a94e7617", + "sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4", + "sha256:cdaf11d2bd275bf391b5308f86731e5194a21af45fbaaaf1d9e8147b9160ea92", + "sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0", + "sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4", + "sha256:d916d31fd85b2f78c76400d625076d9124de3e4bda8b016d25a050cc7d603f24", + "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2", + "sha256:e1cbd3f19a61e27e011e02f9600837b921ac661f0c40560eefb366e4e4fb275e", + "sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0", + "sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654", + "sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2", + "sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23", + "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586" + ], + "markers": "platform_python_implementation == 'CPython'", + "version": "==4.6.3" + }, + "parsel": { + "hashes": [ + "sha256:70efef0b651a996cceebc69e55a85eb2233be0890959203ba7c3a03c72725c79", + "sha256:9e1fa8db1c0b4a878bf34b35c043d89c9d1cbebc23b4d34dbc3c0ec33f2e087d" + ], + "version": "==1.6.0" + }, + "priority": { + "hashes": [ + "sha256:6bc1961a6d7fcacbfc337769f1a382c8e746566aaa365e78047abe9f66b2ffbe", + "sha256:be4fcb94b5e37cdeb40af5533afe6dd603bd665fe9c8b3052610fc1001d5d1eb" + ], + "version": "==1.3.0" + }, + "protego": { + "hashes": [ + "sha256:a682771bc7b51b2ff41466460896c1a5a653f9a1e71639ef365a72e66d8734b4" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.1.16" + }, + "pyasn1": { + "hashes": [ + "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359", + "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576", + "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf", + "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7", + "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", + "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00", + "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8", + "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86", + "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12", + "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776", + "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba", + "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2", + "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3" + ], + "version": "==0.4.8" + }, + "pyasn1-modules": { + "hashes": [ + "sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8", + "sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199", + "sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811", + "sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed", + "sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4", + "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e", + "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74", + "sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb", + "sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45", + "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd", + "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0", + "sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d", + "sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405" + ], + "version": "==0.2.8" + }, + "pycparser": { + "hashes": [ + "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", + "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.20" + }, + "pydispatcher": { + "hashes": [ + "sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf", + "sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433" + ], + "markers": "platform_python_implementation == 'CPython'", + "version": "==2.0.5" + }, + "pyopenssl": { + "hashes": [ + "sha256:5e2d8c5e46d0d865ae933bef5230090bdaf5506281e9eec60fa250ee80600cb3", + "sha256:8935bd4920ab9abfebb07c41a4f58296407ed77f04bd1a92914044b848ba1ed6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==21.0.0" + }, + "queuelib": { + "hashes": [ + "sha256:4b207267f2642a8699a1f806045c56eb7ad1a85a10c0e249884580d139c2fcd2", + "sha256:4b96d48f650a814c6fb2fd11b968f9c46178b683aad96d68f930fe13a8574d19" + ], + "markers": "python_version >= '3.5'", + "version": "==1.6.2" + }, + "scrapy": { + "hashes": [ + "sha256:13af6032476ab4256158220e530411290b3b934dd602bb6dacacbf6d16141f49", + "sha256:1a9a36970004950ee3c519a14c4db945f9d9a63fecb3d593dddcda477331dde9" + ], + "markers": "python_version >= '3.6'", + "version": "==2.5.1" + }, + "scrapy-tw-rental-house": { + "editable": true, + "path": "." + }, + "service-identity": { + "hashes": [ + "sha256:6e6c6086ca271dc11b033d17c3a8bea9f24ebff920c587da090afc9519419d34", + "sha256:f0b0caac3d40627c3c04d7a51b6e06721857a0e10a8775f2d1d7e72901b3a7db" + ], + "version": "==21.1.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "twisted": { + "extras": [ + "http2" + ], + "hashes": [ + "sha256:13c1d1d2421ae556d91e81e66cf0d4f4e4e1e4a36a0486933bee4305c6a4fb9b", + "sha256:2cd652542463277378b0d349f47c62f20d9306e57d1247baabd6d1d38a109006" + ], + "markers": "python_full_version >= '3.6.7'", + "version": "==21.7.0" + }, + "typing-extensions": { + "hashes": [ + "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e", + "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7", + "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34" + ], + "version": "==3.10.0.2" + }, + "w3lib": { + "hashes": [ + "sha256:0161d55537063e00d95a241663ede3395c4c6d7b777972ba2fd58bbab2001e53", + "sha256:0ad6d0203157d61149fd45aaed2e24f53902989c32fc1dccc2e2bfba371560df" + ], + "version": "==1.22.0" + }, + "zope.interface": { + "hashes": [ + "sha256:08f9636e99a9d5410181ba0729e0408d3d8748026ea938f3b970a0249daa8192", + "sha256:0b465ae0962d49c68aa9733ba92a001b2a0933c317780435f00be7ecb959c702", + "sha256:0cba8477e300d64a11a9789ed40ee8932b59f9ee05f85276dbb4b59acee5dd09", + "sha256:0cee5187b60ed26d56eb2960136288ce91bcf61e2a9405660d271d1f122a69a4", + "sha256:0ea1d73b7c9dcbc5080bb8aaffb776f1c68e807767069b9ccdd06f27a161914a", + "sha256:0f91b5b948686659a8e28b728ff5e74b1be6bf40cb04704453617e5f1e945ef3", + "sha256:15e7d1f7a6ee16572e21e3576d2012b2778cbacf75eb4b7400be37455f5ca8bf", + "sha256:17776ecd3a1fdd2b2cd5373e5ef8b307162f581c693575ec62e7c5399d80794c", + "sha256:194d0bcb1374ac3e1e023961610dc8f2c78a0f5f634d0c737691e215569e640d", + "sha256:1c0e316c9add0db48a5b703833881351444398b04111188069a26a61cfb4df78", + "sha256:205e40ccde0f37496904572035deea747390a8b7dc65146d30b96e2dd1359a83", + "sha256:273f158fabc5ea33cbc936da0ab3d4ba80ede5351babc4f577d768e057651531", + "sha256:2876246527c91e101184f63ccd1d716ec9c46519cc5f3d5375a3351c46467c46", + "sha256:2c98384b254b37ce50eddd55db8d381a5c53b4c10ee66e1e7fe749824f894021", + "sha256:2e5a26f16503be6c826abca904e45f1a44ff275fdb7e9d1b75c10671c26f8b94", + "sha256:334701327f37c47fa628fc8b8d28c7d7730ce7daaf4bda1efb741679c2b087fc", + "sha256:3748fac0d0f6a304e674955ab1365d515993b3a0a865e16a11ec9d86fb307f63", + "sha256:3c02411a3b62668200910090a0dff17c0b25aaa36145082a5a6adf08fa281e54", + "sha256:3dd4952748521205697bc2802e4afac5ed4b02909bb799ba1fe239f77fd4e117", + "sha256:3f24df7124c323fceb53ff6168da70dbfbae1442b4f3da439cd441681f54fe25", + "sha256:469e2407e0fe9880ac690a3666f03eb4c3c444411a5a5fddfdabc5d184a79f05", + "sha256:4de4bc9b6d35c5af65b454d3e9bc98c50eb3960d5a3762c9438df57427134b8e", + "sha256:5208ebd5152e040640518a77827bdfcc73773a15a33d6644015b763b9c9febc1", + "sha256:52de7fc6c21b419078008f697fd4103dbc763288b1406b4562554bd47514c004", + "sha256:5bb3489b4558e49ad2c5118137cfeaf59434f9737fa9c5deefc72d22c23822e2", + "sha256:5dba5f530fec3f0988d83b78cc591b58c0b6eb8431a85edd1569a0539a8a5a0e", + "sha256:5dd9ca406499444f4c8299f803d4a14edf7890ecc595c8b1c7115c2342cadc5f", + "sha256:5f931a1c21dfa7a9c573ec1f50a31135ccce84e32507c54e1ea404894c5eb96f", + "sha256:63b82bb63de7c821428d513607e84c6d97d58afd1fe2eb645030bdc185440120", + "sha256:66c0061c91b3b9cf542131148ef7ecbecb2690d48d1612ec386de9d36766058f", + "sha256:6f0c02cbb9691b7c91d5009108f975f8ffeab5dff8f26d62e21c493060eff2a1", + "sha256:71aace0c42d53abe6fc7f726c5d3b60d90f3c5c055a447950ad6ea9cec2e37d9", + "sha256:7d97a4306898b05404a0dcdc32d9709b7d8832c0c542b861d9a826301719794e", + "sha256:7df1e1c05304f26faa49fa752a8c690126cf98b40b91d54e6e9cc3b7d6ffe8b7", + "sha256:8270252effc60b9642b423189a2fe90eb6b59e87cbee54549db3f5562ff8d1b8", + "sha256:867a5ad16892bf20e6c4ea2aab1971f45645ff3102ad29bd84c86027fa99997b", + "sha256:877473e675fdcc113c138813a5dd440da0769a2d81f4d86614e5d62b69497155", + "sha256:8892f89999ffd992208754851e5a052f6b5db70a1e3f7d54b17c5211e37a98c7", + "sha256:9a9845c4c6bb56e508651f005c4aeb0404e518c6f000d5a1123ab077ab769f5c", + "sha256:a1e6e96217a0f72e2b8629e271e1b280c6fa3fe6e59fa8f6701bec14e3354325", + "sha256:a8156e6a7f5e2a0ff0c5b21d6bcb45145efece1909efcbbbf48c56f8da68221d", + "sha256:a9506a7e80bcf6eacfff7f804c0ad5350c8c95b9010e4356a4b36f5322f09abb", + "sha256:af310ec8335016b5e52cae60cda4a4f2a60a788cbb949a4fbea13d441aa5a09e", + "sha256:b0297b1e05fd128d26cc2460c810d42e205d16d76799526dfa8c8ccd50e74959", + "sha256:bf68f4b2b6683e52bec69273562df15af352e5ed25d1b6641e7efddc5951d1a7", + "sha256:d0c1bc2fa9a7285719e5678584f6b92572a5b639d0e471bb8d4b650a1a910920", + "sha256:d4d9d6c1a455d4babd320203b918ccc7fcbefe308615c521062bc2ba1aa4d26e", + "sha256:db1fa631737dab9fa0b37f3979d8d2631e348c3b4e8325d6873c2541d0ae5a48", + "sha256:dd93ea5c0c7f3e25335ab7d22a507b1dc43976e1345508f845efc573d3d779d8", + "sha256:f44e517131a98f7a76696a7b21b164bcb85291cee106a23beccce454e1f433a4", + "sha256:f7ee479e96f7ee350db1cf24afa5685a5899e2b34992fb99e1f7c1b0b758d263" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==5.4.0" + } + }, + "develop": { + "bleach": { + "hashes": [ + "sha256:0900d8b37eba61a802ee40ac0061f8c2b5dee29c1927dd1d233e075ebf5a71da", + "sha256:4d2651ab93271d1129ac9cbc679f524565cc8a1b791909c4a51eac4446a15994" + ], + "markers": "python_version >= '3.6'", + "version": "==4.1.0" + }, + "certifi": { + "hashes": [ + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + ], + "version": "==2021.10.8" + }, + "cffi": { + "hashes": [ + "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3", + "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2", + "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636", + "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20", + "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728", + "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27", + "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66", + "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443", + "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0", + "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7", + "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39", + "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605", + "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a", + "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37", + "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029", + "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139", + "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc", + "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df", + "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14", + "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880", + "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2", + "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a", + "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e", + "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474", + "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024", + "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8", + "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0", + "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e", + "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a", + "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e", + "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032", + "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6", + "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e", + "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b", + "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e", + "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954", + "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962", + "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c", + "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4", + "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55", + "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962", + "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023", + "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c", + "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6", + "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8", + "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382", + "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7", + "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc", + "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997", + "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796" + ], + "version": "==1.15.0" + }, + "charset-normalizer": { + "hashes": [ + "sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0", + "sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b" + ], + "markers": "python_version >= '3'", + "version": "==2.0.7" + }, + "colorama": { + "hashes": [ + "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b", + "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.4.4" + }, + "cryptography": { + "hashes": [ + "sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6", + "sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6", + "sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c", + "sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999", + "sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e", + "sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992", + "sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d", + "sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588", + "sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa", + "sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d", + "sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd", + "sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d", + "sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953", + "sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2", + "sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8", + "sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6", + "sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9", + "sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6", + "sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad", + "sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76" + ], + "markers": "python_version >= '3.6'", + "version": "==35.0.0" + }, + "docutils": { + "hashes": [ + "sha256:a31688b2ea858517fa54293e5d5df06fbb875fb1f7e4c64529271b77781ca8fc", + "sha256:c1d5dab2b11d16397406a282e53953fe495a46d69ae329f55aa98a5c4e3c5fbb" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.18" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3.5'", + "version": "==3.3" + }, + "importlib-metadata": { + "hashes": [ + "sha256:b618b6d2d5ffa2f16add5697cf57a46c76a56229b0ed1c438322e4e95645bd15", + "sha256:f284b3e11256ad1e5d03ab86bb2ccd6f5339688ff17a4d797a0fe7df326f23b1" + ], + "markers": "python_version >= '3.6'", + "version": "==4.8.1" + }, + "jeepney": { + "hashes": [ + "sha256:1b5a0ea5c0e7b166b2f5895b91a08c14de8915afda4407fb5022a195224958ac", + "sha256:fa9e232dfa0c498bd0b8a3a73b8d8a31978304dcef0515adc859d4e096f96f4f" + ], + "markers": "sys_platform == 'linux'", + "version": "==0.7.1" + }, + "keyring": { + "hashes": [ + "sha256:6334aee6073db2fb1f30892697b1730105b5e9a77ce7e61fca6b435225493efe", + "sha256:bd2145a237ed70c8ce72978b497619ddfcae640b6dcf494402d5143e37755c6e" + ], + "markers": "python_version >= '3.6'", + "version": "==23.2.1" + }, + "packaging": { + "hashes": [ + "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7", + "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14" + ], + "markers": "python_version >= '3.6'", + "version": "==21.0" + }, + "pkginfo": { + "hashes": [ + "sha256:37ecd857b47e5f55949c41ed061eb51a0bee97a87c969219d144c0e023982779", + "sha256:e7432f81d08adec7297633191bbf0bd47faf13cd8724c3a13250e51d542635bd" + ], + "version": "==1.7.1" + }, + "pycparser": { + "hashes": [ + "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", + "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.20" + }, + "pygments": { + "hashes": [ + "sha256:b8e67fe6af78f492b3c4b3e2970c0624cbf08beb1e493b2c99b9fa1b67a20380", + "sha256:f398865f7eb6874156579fdf36bc840a03cab64d1cde9e93d68f46a425ec52c6" + ], + "markers": "python_version >= '3.5'", + "version": "==2.10.0" + }, + "pyparsing": { + "hashes": [ + "sha256:84196357aa3566d64ad123d7a3c67b0e597a115c4934b097580e5ce220b91531", + "sha256:fd93fc45c47893c300bd98f5dd1b41c0e783eaeb727e7cea210dcc09d64ce7c3" + ], + "markers": "python_version >= '3.6'", + "version": "==3.0.1" + }, + "readme-renderer": { + "hashes": [ + "sha256:3286806450d9961d6e3b5f8a59f77e61503799aca5155c8d8d40359b4e1e1adc", + "sha256:8299700d7a910c304072a7601eafada6712a5b011a20139417e1b1e9f04645d8" + ], + "version": "==30.0" + }, + "requests": { + "hashes": [ + "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24", + "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==2.26.0" + }, + "requests-toolbelt": { + "hashes": [ + "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f", + "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0" + ], + "version": "==0.9.1" + }, + "rfc3986": { + "hashes": [ + "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835", + "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97" + ], + "version": "==1.5.0" + }, + "secretstorage": { + "hashes": [ + "sha256:422d82c36172d88d6a0ed5afdec956514b189ddbfb72fefab0c8a1cee4eaf71f", + "sha256:fd666c51a6bf200643495a04abb261f83229dcb6fd8472ec393df7ffc8b6f195" + ], + "markers": "sys_platform == 'linux'", + "version": "==3.3.1" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "tqdm": { + "hashes": [ + "sha256:8dd278a422499cd6b727e6ae4061c40b48fce8b76d1ccbf5d34fca9b7f925b0c", + "sha256:d359de7217506c9851b7869f3708d8ee53ed70a1b8edbba4dbcb47442592920d" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==4.62.3" + }, + "twine": { + "hashes": [ + "sha256:087328e9bb405e7ce18527a2dca4042a84c7918658f951110b38bc135acab218", + "sha256:4caec0f1ed78dc4c9b83ad537e453d03ce485725f2aea57f1bb3fdde78dae936" + ], + "index": "pypi", + "version": "==3.4.2" + }, + "urllib3": { + "hashes": [ + "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece", + "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==1.26.7" + }, + "webencodings": { + "hashes": [ + "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", + "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" + ], + "version": "==0.5.1" + }, + "zipp": { + "hashes": [ + "sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832", + "sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc" + ], + "markers": "python_version >= '3.6'", + "version": "==3.6.0" + } + } +} diff --git a/scrapy-package/README.md b/scrapy-package/README.md index ce43cd58..80677291 100644 --- a/scrapy-package/README.md +++ b/scrapy-package/README.md @@ -7,12 +7,12 @@ Although this package provide the ability to crawl rental house website, it's de ## Requirement -1. Python 3.5+ +1. Python 3.8+ ## Installation ```bash -pip install scrapy-tw-rental-house +pipenv install scrapy-tw-rental-house ``` ## Basic Usage diff --git a/scrapy-package/pyproject.toml b/scrapy-package/pyproject.toml new file mode 100644 index 00000000..b0471b7f --- /dev/null +++ b/scrapy-package/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta:__legacy__" \ No newline at end of file diff --git a/scrapy-package/requirements.txt b/scrapy-package/requirements.txt deleted file mode 100644 index 86e412f8..00000000 --- a/scrapy-package/requirements.txt +++ /dev/null @@ -1,38 +0,0 @@ -asn1crypto==0.24.0 -attrs==19.1.0 -Automat==0.7.0 -bleach==3.1.0 -certifi==2019.3.9 -cffi==1.12.3 -chardet==3.0.4 -constantly==15.1.0 -cryptography==2.7 -cssselect==1.0.3 -docutils==0.14 -hyperlink==19.0.0 -idna==2.8 -incremental==17.5.0 -lxml==4.3.3 -parsel==1.5.1 -pkginfo==1.5.0.1 -pyasn1==0.4.5 -pyasn1-modules==0.2.5 -pycparser==2.19 -PyDispatcher==2.0.5 -Pygments==2.4.2 -PyHamcrest==1.9.0 -pyOpenSSL==19.0.0 -queuelib==1.5.0 -readme-renderer==24.0 -requests==2.22.0 -requests-toolbelt==0.9.1 -Scrapy==1.6.0 -service-identity==18.1.0 -six==1.12.0 -tqdm==4.32.1 -twine==1.13.0 -Twisted==19.7.0 -urllib3==1.25.3 -w3lib==1.20.0 -webencodings==0.5.1 -zope.interface==4.6.0 diff --git a/scrapy-package/setup.py b/scrapy-package/setup.py index fbdf7c1e..45c236a2 100644 --- a/scrapy-package/setup.py +++ b/scrapy-package/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="scrapy-tw-rental-house", - version="1.0.0", + version="1.1.0", author="ddio", author_email="ddio@ddio.io", description="Scrapy spider for TW Rental House", @@ -17,7 +17,7 @@ exclude=['trial', 'examples'] ), install_requires=[ - 'Scrapy>=1' + 'Scrapy>=2.5' ], classifiers=[ "Programming Language :: Python :: 3", diff --git a/scrapy-package/trial/crawler/settings.py b/scrapy-package/trial/crawler/settings.py index 2428da91..0e6ed934 100644 --- a/scrapy-package/trial/crawler/settings.py +++ b/scrapy-package/trial/crawler/settings.py @@ -35,7 +35,7 @@ # Configure item pipelines # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html ITEM_PIPELINES = { - # 'crawler.pipelines.CrawlerPipeline': 300 + 'crawler.pipelines.CrawlerPipeline': 300 } EXTENSIONS = { From 3651c6f9cb17dbb16e32bdc4b8b83270f53f96dd Mon Sep 17 00:00:00 2001 From: ddio Date: Wed, 27 Oct 2021 11:57:54 +0800 Subject: [PATCH 3/4] feat: use scrapy_twrh in detail591 spider, #89, #31 --- Pipfile | 2 +- Pipfile.lock | 16 +- crawler/crawler/spiders/detail591_spider.py | 759 +----------------- crawler/crawler/spiders/list591_spider.py | 1 - crawler/crawler/spiders/persist_queue.py | 16 +- .../spiders/rental591/detail_mixin.py | 15 +- scrapy-package/setup.py | 2 +- 7 files changed, 54 insertions(+), 757 deletions(-) diff --git a/Pipfile b/Pipfile index 61c8007d..6acdbb47 100644 --- a/Pipfile +++ b/Pipfile @@ -14,7 +14,7 @@ django = "==2.1.15" jsonfield = "==2.0.2" raven = "==6.9.0" psycopg2-binary = "==2.8.6" -scrapy-tw-rental-house = "==1.1.0" +scrapy-tw-rental-house = "==1.1.1" [requires] python_version = "3" diff --git a/Pipfile.lock b/Pipfile.lock index fa0d2410..d701ec58 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "1e942a49ce839bb873885913dfca307972e8db9032a97d7e3bb3743ed0e819df" + "sha256": "b550970a17bba890cd0aaf2996af64f185cb29f35eea0aab6837232a80ea72b9" }, "pipfile-spec": 6, "requires": { @@ -183,7 +183,7 @@ "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9", "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==0.10.0" }, "jsonfield": { @@ -403,11 +403,11 @@ }, "scrapy-tw-rental-house": { "hashes": [ - "sha256:ccf7adce679c092911b70cbaaeda6f257d4a201d5d0300b6db58072cee3facef", - "sha256:d95c724166e9575ddd15fe2b60420394dd1d9b1bebd50f34cf2528baf217d224" + "sha256:2639ad2747b986b534b84025739d42aee856d0b14569c6d493dc5844b22ed6e5", + "sha256:4f9e191299f5440fb9b84fa77359f3e2aff0c530c0c1a03ef453b51a4ff3d32f" ], "index": "pypi", - "version": "==1.1.0" + "version": "==1.1.1" }, "service-identity": { "hashes": [ @@ -421,7 +421,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "twisted": { @@ -522,7 +522,7 @@ "sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899", "sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2" ], - "markers": "python_full_version >= '3.6.1' and python_version < '4.0'", + "markers": "python_version < '4.0' and python_full_version >= '3.6.1'", "version": "==5.9.3" }, "lazy-object-proxy": { @@ -597,7 +597,7 @@ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==0.10.2" }, "typing-extensions": { diff --git a/crawler/crawler/spiders/detail591_spider.py b/crawler/crawler/spiders/detail591_spider.py index e90bc049..c0c63340 100644 --- a/crawler/crawler/spiders/detail591_spider.py +++ b/crawler/crawler/spiders/detail591_spider.py @@ -1,78 +1,34 @@ -import re -from decimal import Decimal -from functools import partial -from urllib.parse import urlparse, parse_qs import traceback -from django.utils import timezone from django.db import transaction -from django.contrib.gis.geos import Point +from rental.models import House from rental import enums -from rental.models import House, Author -from ..items import GenericHouseItem, RawHouseItem -from .house_spider import HouseSpider +from scrapy_twrh.spiders.rental591 import Rental591Spider, util +from .persist_queue import PersistQueue -class Detail591Spider(HouseSpider): +class Detail591Spider(Rental591Spider): name = "detail591" - zh_number_dict = { - '零': 0, - '一': 1, - '二': 2, - '三': 3, - '四': 4, - '五': 5, - '六': 6, - '七': 7, - '八': 8, - '九': 9, - '十': 10 - } - - apt_features = { - 'n_living_room': '廳', - 'n_bed_room': '房', - 'n_balcony': '陽台', - 'n_bath_room': '衛' - } def __init__(self, **kwargs): super().__init__( - vendor='591 租屋網', - is_list=False, - request_generator=self.gen_request_params, - response_router=self.route_parser, + start_list=self.start_detail_requests, **kwargs ) - self.BASE_URL = self.vendor.site_url - - def gen_request_params(self, seed): - if 'gps' in seed: - # https://rent.591.com.tw/map-houseRound.html?type=1&detail=detail&version=1&post_id=6635655 - url = "{}/map-houseRound.html?type=1&detail=detail&version=1&post_id={}".format(self.BASE_URL, seed['house_id']) - - #19, the house may be closed in 3 hours when we found it.... - return { - 'url': url, - 'meta': { - 'seed': seed, - 'handle_httpstatus_list': [404] - } - } - else: - # https://rent.591.com.tw/rent-detail-6635655.html - url = "{}/rent-detail-{}.html".format(self.BASE_URL, seed['house_id']) + self.persist_queue = PersistQueue( + vendor='591 租屋網', + is_list=False, + logger=self.logger, + seed_parser=self.parse_seed, + generate_request_args=self.gen_detail_request_args, + parse_response=self.parse_detail + ) - return { - 'url': url, - 'meta': { - 'seed': seed, - 'handle_httpstatus_list': [400, 404, 302, 301] - } - } + def parse_seed(self, seed): + return util.DetailRequestMeta(*seed) - def start_requests(self): + def start_detail_requests(self): - if not self.has_request(): + if not self.persist_queue.has_request(): # find all opened houses and crawl all of them houses = House.objects.filter( deal_status = enums.DealStatusType.OPENED @@ -83,694 +39,17 @@ def start_requests(self): with transaction.atomic(): try: for house in houses: - self.gen_persist_request({ - 'house_id': house['vendor_house_id'] - }) + self.persist_queue.gen_persist_request([house['vendor_house_id']]) except: traceback.print_exc() # quick fix for concurrency issue mercy = 10 while True: - next_request = self.next_request() + next_request = self.persist_queue.next_request() if next_request: yield next_request elif mercy < 0: break else: mercy -= 1 - - def dict_from_tuple(self, keys, values): - min_length = min(len(keys), len(values)) - ret = {} - - for i in range(min_length): - ret[keys[i]] = values[i] - - return ret - - def split_string_to_dict(self, string, seperator): - tokens = string.split(seperator) - if len(tokens) >= 2: - return {tokens[0]: tokens[1]} - else: - return None - - def collect_dict(self, response): - # title - title = self.css_first(response, '.houseInfoTitle', deep_text=True) - - # region 首頁/租屋/xx市/xx區 - breadcromb = self.css(response, '#propNav a', deep_text=True) - if len(breadcromb) >= 4: - if breadcromb[2] == '出租' and len(breadcromb) >= 5: - # 首頁 > 店面 > 出租 > 台北市 > 大安區 > 台北市大安區安和路二段 - top_region = breadcromb[3] - sub_region = breadcromb[4] - else: - # 首頁 > 租屋 > 台北市 > 大安區 > 獨立套房 > 20000-30000元 > 台北市大安區仁愛路四段50號 - top_region = breadcromb[2] - sub_region = breadcromb[3] - else: - top_region = '__UNKNOWN__' - sub_region = '__UNKNOWN__' - - # rough address - address = self.css_first(response, '#propNav .addr', deep_text=True) - - # image, it's in a hidden input - imgs = self.css_first( - response, - '#hid_imgArr::attr(value)', - allow_empty=True - ).replace('"', '').split(',') - - if imgs[0] == "": - imgs.pop(0) - - # top meta, including 押金, 法定用途, etc.. - top_meta_keys = self.css(response, '.labelList-1 .one', deep_text=True) - top_meta_values = self.css(response, '.labelList-1 .two em', deep_text=True) - top_metas = self.dict_from_tuple(top_meta_keys, top_meta_values) - - if '身份要求' in top_metas: - top_metas['身份要求'] = top_metas['身份要求'].split('、') - - # facilities, including 衣櫃、沙發, etc.. - fa_status = self.css(response, '.facility li span::attr(class)') - fa_text = self.css(response, '.facility li', deep_text=True) - fa = [] - without_fa = [] - for index, key in enumerate(fa_text): - if fa_status[index] != 'no': - fa.append(key) - else: - without_fa.append(key) - - # environment - #

生活機能:近便利商店;傳統市場;夜市

- env_keys = self.css(response, '.lifeBox > p strong', deep_text=True) - env_desps = self.css(response, '.lifeBox > p', deep_text=True) - env_desps = list(map(lambda desp: re.sub('.*:', '', desp).split(';'), env_desps)) - env = self.dict_from_tuple(env_keys, env_desps) - - # neighbor - nei_selector = response.css('.lifeBox.community') - nei = {} - if nei_selector: - nei['name'] = self.css_first(nei_selector, '.communityName a', deep_text=True) - nei['desp'] = self.css_first(nei_selector, '.communityIntroduce::text', deep_text=True, allow_empty=True) - nei['url'] = self.BASE_URL +\ - self.css_first(nei_selector, '.communityIntroduce a::attr(href)', allow_empty=True) - nei_keys = self.css(nei_selector, '.communityDetail p::text') - nei_values = self.css(nei_selector, '.communityDetail p > *', deep_text=True) - nei['info'] = self.dict_from_tuple(nei_keys, nei_values) - - # sublets 分租套房、雅房 - sublets_keys = self.css(response, '.list-title span', deep_text=True) - sublets_list = response.css('.house-list') - sublets = [] - for sublet in sublets_list: - texts = self.css(sublet, 'li', deep_text=True) - sublet_dict = self.dict_from_tuple(sublets_keys, texts) - if '租金' in sublet_dict: - sublet_dict['租金'] = self.clean_number(sublet_dict['租金']) - if '坪數' in sublet_dict: - sublet_dict['坪數'] = self.clean_number(sublet_dict['坪數']) - - sublets.append(sublet_dict) - - # desp - desp = self.css(response, '.houseIntro *', deep_text=True) - - # q and a - # TODO - # TODO: format correct - - # price - #
14,500 元/月
- price = self.css_first(response, '.price i', deep_text=True) - - # built-in facility - price_includes = self.css_first( - response, - '.detailInfo .price+.explain', - deep_text=True, - allow_empty=True - ).split('/') - - # lease status - is_deal = len(response.css('.filled').extract()) > 0 - # house_state = 'OPENED' - # deal_at = None - # if is_deal: - # house_state = 'DEAL' - # deal_at = timezone.localtime() - - # side meta - sides = self.css(response, '.detailInfo .attr li', deep_text=True) - side_metas = {} - for side in sides: - tokens = side.split(':') - if len(tokens) >= 2: - side_metas[tokens[0]] = ':'.join(tokens[1::]) - - # 格局 : 3房2廳2衛2陽台 - if '格局' in side_metas: - # TODO: 開放式格局 - parts = re.findall( - r'(\d)([^\d]+)', - side_metas['格局'] - ) - parts_dict = {} - for part in parts: - parts_dict[part[1]] = part[0] - side_metas['格局'] = parts_dict - if '坪數' in side_metas: - side_metas['坪數'] = self.clean_number(side_metas['坪數']) - if '權狀坪數' in side_metas: - side_metas['權狀坪數'] = self.clean_number(side_metas['權狀坪數']) - - # due day - due_day = self.css_first(response, '.explain .ft-rt', deep_text=True) - due_day = due_day.replace('有效期:', '') - - # owner - owner = {} - owner['name'] = self.css_first(response, '.avatarRight i', deep_text=True) - owner['comment'] = self.css_first(response, '.avatarRight div', deep_text=True) - agent_info = self.css(response, '.avatarRight .auatarSonBox p', deep_text=True) - make_agent_info = partial(self.split_string_to_dict, seperator=':') - agent_info = list(map(make_agent_info, agent_info)) - owner['isAgent'] = len(agent_info) > 0 - owner['agent'] = agent_info - - phone_ext = self.css_first(response, '.phone-hide .num', deep_text=True, allow_empty=True) - phone_url = response.css('.phone-hide .num img').xpath('@src').extract_first() - - if phone_ext: - # phone will be pure text when owner use 591 built-in phone number - # TODO: check is the ext is identical for the same owner - owner['id'] = phone_ext - elif phone_url: - # or it will be an img, the src would be identical for the same owner - # url is sth like - # statics.591.com.tw/tools/showPhone.php?info_data=%2BbRfNLlKoLNhHOKui2zb%2FBxYO6A&type=rLEFMu4XrrpgEw - parsed_url = urlparse(phone_url) - qs = parse_qs(parsed_url.query) - if 'info_data' in qs and len(qs['info_data']) > 0: - owner['id'] = qs['info_data'][0] - else: - # sth strange happened, such as it's already dealt - # let's try if there's avatar - avatar = response.css('.userInfo .avatar img').xpath('@src').extract_first() - if avatar and 'no-photo-new.png' not in avatar: - owner['id'] = avatar - else: - # last try, search description to see if there's phone number - phone = re.search(r'09[0-9]{8}', ' '.join(desp)) - if phone: - phone = phone.group() - owner['id'] = phone - - return { - 'house_id': response.meta['seed']['house_id'], - 'n_views': self.css_first(response, '.pageView b', deep_text=True), - 'top_region': top_region, - 'sub_region': sub_region, - 'address': address, - 'title': title, - 'imgs': imgs, - 'top_metas': top_metas, - 'facilities': fa, - 'without_facilities': without_fa, - 'environment': env, - 'sublets': sublets, - 'neighbor': nei, - 'desp': desp, - 'price': price, - 'price_includes': price_includes, - 'is_deal': is_deal, - 'side_metas': side_metas, - 'due_day': due_day, - 'owner': owner - } - - def from_zh_number(self, zh_number): - if zh_number in self.zh_number_dict: - return self.zh_number_dict[zh_number] - else: - raise Exception('ZH number {} not defined.'.format(zh_number)) - - def get_shared_price(self, detail_dict, house, basic_info): - ret = {} - - # deposit_type, n_month_deposit - if '押金' in detail_dict['top_metas']: - deposit = detail_dict['top_metas']['押金'] - month_deposit = deposit.split('個月') - if len(month_deposit) == 2: - ret['deposit_type'] = enums.DepositType.月 - ret['n_month_deposit'] = self.from_zh_number(month_deposit[0]) - ret['deposit'] = ret['n_month_deposit'] * detail_dict['price'] - elif deposit.replace(',', '').isdigit(): - ret['deposit'] = self.clean_number(deposit) - n_month = ret['deposit'] / detail_dict['price'] - ret['deposit_type'] = enums.DepositType.定額 - ret['n_month_deposit'] = n_month - elif deposit == '面議': - ret['deposit_type'] = enums.DepositType.面議 - ret['n_month_deposit'] = None - ret['deposit'] = None - else: - ret['deposit_type'] = enums.DepositType.其他 - ret['n_month_deposit'] = None - ret['deposit'] = None - - # is_remanagement_fee, monthly_management_fee - if '管理費' in detail_dict['price_includes']: - ret['is_require_management_fee'] = False - ret['monthly_management_fee'] = 0 - elif '管理費' in detail_dict['top_metas']: - mgmt_fee = detail_dict['top_metas']['管理費'] - # could be xxx元/月, --, -, !@$#$%... - if '元/月' in mgmt_fee: - ret['is_require_management_fee'] = True - ret['monthly_management_fee'] = self.clean_number(mgmt_fee) - else: - ret['is_require_management_fee'] = False - ret['monthly_management_fee'] = 0 - - # *_parking* - if '車 位' in detail_dict['top_metas']: - parking_str = detail_dict['top_metas']['車 位'] - parking = self.clean_number(parking_str) - - ret['has_parking'] = True - if parking: - ret['is_require_parking_fee'] = True - ret['monthly_parking_fee'] = parking - elif '已含' in parking_str: - ret['is_require_parking_fee'] = False - ret['monthly_parking_fee'] = 0 - elif '費用另計' in parking_str: - ret['is_require_parking_fee'] = True - ret['monthly_parking_fee'] = 0 - elif '無' == parking_str: - ret['has_parking'] = False - - # per ping price - if 'floor_ping' in basic_info: - mgmt = ret.get('monthly_management_fee', 0) - parking = ret.get('monthly_parking_fee', 0) - price = detail_dict['price'] - total_price = price + mgmt + parking - ret['per_ping_price'] = total_price / basic_info['floor_ping'] - - return ret - - def get_shared_basic(self, detail_dict, house): - ret = {} - - # top_region, sub_region - if 'top_region' in detail_dict: - ret['top_region'] = self.get_enum( - enums.TopRegionType, - detail_dict['house_id'], - detail_dict['top_region'] - ) - - ret['sub_region'] = self.get_enum( - enums.SubRegionType, - detail_dict['house_id'], - '{}{}'.format( - detail_dict['top_region'], - detail_dict['sub_region'] - ) - ) - - if 'address' in detail_dict: - ret['rough_address'] = detail_dict['address'] - - # deal_status - if detail_dict['is_deal']: - # Issue #15, update only deal_status in crawler - # let `syncstateful` to update the rest - ret['deal_status'] = enums.DealStatusType.DEAL - else: - # Issue #14, always update deal status since item may be reopened - ret['deal_status'] = enums.DealStatusType.OPENED - - # building_type, 公寓 / 電梯大樓 / 透天 - if '型態' in detail_dict['side_metas']: - building_type = detail_dict['side_metas']['型態'] - if building_type == '別墅' or building_type == '透天厝': - ret['building_type'] = enums.BuildingType.透天 - elif building_type == '住宅大樓': - ret['building_type'] = enums.BuildingType.電梯大樓 - else: - ret['building_type'] = self.get_enum( - enums.BuildingType, - detail_dict['house_id'], - building_type - ) - - # property type - if '現況' in detail_dict['side_metas']: - ret['property_type'] = self.get_enum( - enums.PropertyType, - detail_dict['house_id'], - detail_dict['side_metas']['現況'] - ) - - # is_rooftop, floor, total_floor - # TODO: use title to detect rooftop - if '樓層' in detail_dict['side_metas']: - # floor_info = 1F/2F or 頂樓加蓋/2F or 整棟/2F - floor_info = detail_dict['side_metas']['樓層'].split('/') - floor = self.clean_number(floor_info[0]) - ret['floor'] = 0 - ret['total_floor'] = self.clean_number(floor_info[1]) - ret['is_rooftop'] = False - - if floor_info[0] == '頂樓加蓋': - ret['is_rooftop'] = True - ret['floor'] = ret['total_floor'] + 1 - elif 'B' in floor_info[0] and floor: - # basement - ret['floor'] = -floor - elif floor: - ret['floor'] = floor - - ret['dist_to_highest_floor'] = ret['total_floor'] - ret['floor'] - - if '坪數' in detail_dict['side_metas']: - ret['floor_ping'] = self.clean_number( - detail_dict['side_metas']['坪數']) - - if '格局' in detail_dict['side_metas']: - apt_feature = detail_dict['side_metas']['格局'] - - for name in self.apt_features: - if self.apt_features[name] in apt_feature: - ret[name] = self.clean_number( - apt_feature[self.apt_features[name]]) - else: - ret[name] = 0 - - ret['apt_feature_code'] = '{:02d}{:02d}{:02d}{:02d}'.format( - ret['n_balcony'], - ret['n_bath_room'], - ret['n_bed_room'], - ret['n_living_room'] - ) - - # TODO: rough_address - - return ret - - def count_keyword_in_list(self, haystack, list, must_not_match=False): - counter = 0 - if must_not_match: - for item in list: - if haystack in item and haystack != item: - counter += 1 - else: - for item in list: - if haystack in item: - counter += 1 - return counter - - def get_shared_environment(self, detail_dict, house): - # additional fee - price_includes = detail_dict['price_includes'] - - additional_fee = { - 'eletricity': '電費' not in price_includes, - 'water': '水費' not in price_includes, - 'gas': '瓦斯費' not in price_includes, - 'internet': '網路' not in price_includes, - 'cable_tv': '第四台' not in price_includes - } - - # living_functions - living_functions = {} - if '生活機能' in detail_dict['environment']: - living = detail_dict['environment']['生活機能'] - living_functions = { - 'school': '學校' in living, - 'park': '公園綠地' in living, - 'dept_store': '百貨公司' in living, - 'conv_store': '便利商店' in living, - 'traditional_mkt': '傳統市場' in living, - 'night_mkt': '夜市' in living, - 'hospital': '醫療機構' in living, - # not provided XDDD - 'police_office': False - } - - lower_desp = [] - for line in detail_dict['desp']: - lower_desp.append(line.lower()) - - transportation = {} - if '附近交通' in detail_dict['environment']: - tp = detail_dict['environment']['附近交通'] - transportation = { - 'subway': self.count_keyword_in_list('捷運站', tp), - 'bus': self.count_keyword_in_list('公車站', tp) + - self.count_keyword_in_list('路', tp), - 'train': self.count_keyword_in_list('火車站', tp), - 'hsr': self.count_keyword_in_list('高速鐵路', tp), - 'public_bike': self.count_keyword_in_list('bike', lower_desp) - } - - ret = { - 'additional_fee': additional_fee, - 'living_functions': living_functions, - 'transportation': transportation - } - - return ret - - def get_shared_boolean_info(self, detail_dict, house): - ret = {} - - # has_tenant_restriction - ret['has_tenant_restriction'] = False - if '身份要求' in detail_dict['top_metas']: - if len(detail_dict['top_metas']['身份要求']) > 0: - ret['has_tenant_restriction'] = True - - # has_gender_restriction - ret['has_gender_restriction'] = False - ret['gender_restriction'] = enums.GenderType.不限 - if '性別要求' in detail_dict['top_metas']: - gender = detail_dict['top_metas']['性別要求'] - if gender == '女生': - ret['has_gender_restriction'] = True - ret['gender_restriction'] = enums.GenderType.女 - elif gender == '男生': - ret['has_gender_restriction'] = True - ret['gender_restriction'] = enums.GenderType.男 - elif '不限' not in gender and '男女生皆可' not in gender: - ret['has_gender_restriction'] = True - ret['gender_restriction'] = enums.GenderType.其他 - - # can_cook - if '開伙' in detail_dict['top_metas']: - ret['can_cook'] = detail_dict['top_metas']['開伙'] == '可以' - else: - ret['can_cook'] = None - - # allow pet - if '養寵物' in detail_dict['top_metas']: - ret['allow_pet'] = detail_dict['top_metas']['養寵物'] == '可以' - else: - ret['allow_pet'] = None - - # has_perperty_registration - ret['has_perperty_registration'] = detail_dict['top_metas']\ - .get('產權登記', '') == '已辦' - - return ret - - def get_shared_misc(self, detail_dict, house): - ret = {} - - # facilities - facilities = {} - for item in detail_dict['facilities']: - facilities[item] = True - - for item in detail_dict['without_facilities']: - facilities[item] = False - - ret['facilities'] = facilities - - # contact, agent, and author - owner = detail_dict['owner'] - if '代理人' in owner['comment']: - ret['contact'] = enums.ContactType.代理人 - elif owner['isAgent']: - ret['contact'] = enums.ContactType.房仲 - else: - ret['contact'] = enums.ContactType.屋主 - - if owner['isAgent']: - agent = {} - for item in owner['agent']: - for key in item: - agent[key] = item[key] - - if '公司名' in agent: - ret['agent_org'] = agent['公司名'] - elif '經濟業' in agent: - ret['agent_org'] = agent['經濟業'] - else: - ret['agent_org'] = '/'.join(agent.values()) - - if 'id' in detail_dict['owner'] and detail_dict['owner']['id']: - author_info, created = Author.objects.get_or_create(truth=detail_dict['owner']['id']) - ret['author'] = author_info - - return ret - - def gen_shared_attrs(self, detail_dict, house=None): - - if house == None: - house = House.objects.get( - vendor = self.vendor, - vendor_house_id = detail_dict['house_id'] - ) - - detail_dict['price'] = self.clean_number(detail_dict['price']) - - detail_dict['price_includes'] = list(map( - lambda x: x.replace('含', ''), - detail_dict['price_includes'] - )) - - if '生活機能' in detail_dict['environment']: - detail_dict['environment']['生活機能'] = list(map( - lambda x: x.replace('近', ''), - detail_dict['environment']['生活機能'] - )) - - if '附近交通' in detail_dict['environment']: - detail_dict['environment']['附近交通'] = list(map( - lambda x: re.sub('[  ]', '', x.replace('近', '')), - detail_dict['environment']['附近交通'] - )) - - ret = { - 'vendor': self.vendor, - 'vendor_house_id': detail_dict['house_id'], - 'monthly_price': detail_dict['price'], - 'imgs': detail_dict['imgs'] - } - - basic_info = self.get_shared_basic(detail_dict, house) - price_info = self.get_shared_price(detail_dict, house, basic_info) - env_info = self.get_shared_environment(detail_dict, house) - boolean_info = self.get_shared_boolean_info(detail_dict, house) - misc_info = self.get_shared_misc(detail_dict, house) - - ret = { - **ret, - **price_info, - **basic_info, - **env_info, - **boolean_info, - **misc_info - } - - return ret - - def route_parser(self, seed): - if 'gps' in seed: - return self.parse_gps_response - else: - return self.parse_main_response - - def parse_gps_response(self, response): - house_id = response.meta['seed']['house_id'] - - if response.status == 404: - self.logger.info( - 'GPS {} not found by receiving status code {}' - .format(house_id, response.status) - ) - yield True - return - - gmap_url = self.css_first(response, '#main .propMapBarMap iframe::attr(src)') - # example url: //maps.google.com.tw/maps?f=q&hl=zh-TW&q=25.0268980,121.5542323&z=17&output=embed - - parsed_url = urlparse(gmap_url) - qs = parse_qs(parsed_url.query) - if 'q' not in qs or len(qs['q']) == 0: - self.logger.info( - 'Invalid GPS page in house: {}' - .format(house_id) - ) - yield True - return - - gps_str = qs['q'][0] - coordinate = list(map(lambda x: Decimal(x), gps_str.split(','))) - - if len(coordinate) == 2: - yield GenericHouseItem( - vendor=self.vendor, - vendor_house_id=house_id, - rough_coordinate=Point(coordinate, srid=4326) - ) - - yield True - - def parse_main_response(self, response): - house_id = response.meta['seed']['house_id'] - - if response.status == 400: - self.logger.info("I'm getting blocked -___-") - elif response.status != 200: - self.logger.info( - 'House {} not found by receiving status code {}' - .format(house_id, response.status) - ) - yield GenericHouseItem( - vendor=self.vendor, - vendor_house_id=house_id, - deal_status=enums.DealStatusType.NOT_FOUND - ) - else: - # regular 200 response - yield RawHouseItem( - house_id=house_id, - vendor=self.vendor, - is_list=False, - raw=response.body - ) - - detail_dict = self.collect_dict(response) - - yield RawHouseItem( - house_id=house_id, - vendor=self.vendor, - is_list=False, - dict=detail_dict - ) - - yield GenericHouseItem( - **self.gen_shared_attrs(detail_dict) - ) - - # get gps only when the house existed - self.gen_persist_request({ - 'house_id': house_id, - 'gps': True - }) - - if response.status != 400: - yield True diff --git a/crawler/crawler/spiders/list591_spider.py b/crawler/crawler/spiders/list591_spider.py index 181a1445..5d645d8f 100644 --- a/crawler/crawler/spiders/list591_spider.py +++ b/crawler/crawler/spiders/list591_spider.py @@ -8,7 +8,6 @@ class List591Spider(Rental591Spider): def __init__(self, **kwargs): super().__init__( start_list=self.start_list_from_persist_queue, - # parse_list=self.parse_list_and_stop, **kwargs ) diff --git a/crawler/crawler/spiders/persist_queue.py b/crawler/crawler/spiders/persist_queue.py index a4c4f1a2..41134124 100644 --- a/crawler/crawler/spiders/persist_queue.py +++ b/crawler/crawler/spiders/persist_queue.py @@ -132,12 +132,18 @@ def next_request(self): request_args = { **self.generate_request_args(rental_meta), - 'callback': self.parser_wrapper, - 'meta': { + # overwrite callback directly, + # as we know where to find real parser + 'callback': self.parser_wrapper + } + + if 'meta' not in request_args: + request_args['meta'] = { 'rental': rental_meta, 'db_request': next_row } - } + elif 'db_request' not in request_args['meta']: + request_args['meta']['db_request'] = next_row return scrapy.Request(**request_args) @@ -146,7 +152,7 @@ def parser_wrapper(self, response): db_request.last_status = response.status db_request.save() - seed = response.meta.get('seed', {}) + meta = response.meta.get('rental', {}) try: for item in self.parse_response(response): @@ -158,7 +164,7 @@ def parser_wrapper(self, response): self.logger.error( 'Parser error in {} when handle meta {}. [{}] - {:.128}'.format( self.vendor.name, - seed, + meta, response.status, response.text ) diff --git a/scrapy-package/scrapy_twrh/spiders/rental591/detail_mixin.py b/scrapy-package/scrapy_twrh/spiders/rental591/detail_mixin.py index e630830b..cb1e40f0 100644 --- a/scrapy-package/scrapy_twrh/spiders/rental591/detail_mixin.py +++ b/scrapy-package/scrapy_twrh/spiders/rental591/detail_mixin.py @@ -86,7 +86,20 @@ def default_parse_detail(self, response): self.logger.error('Invalid detail response for 591 house: {}' .format(response.meta['rental'].id) ) - return False + return None + if isinstance(jsonResp['data'], str): + if jsonResp.get('msg', '') == '物件不存在': + yield GenericHouseItem( + vendor=self.vendor, + vendor_house_id=house_id, + deal_status=enums.DealStatusType.NOT_FOUND + ) + else: + self.logger.error( + 'House {} not found by receiving status code {}' + .format(house_id, response.status) + ) + return None detail_dict = jsonResp['data'] detail_dict['house_id'] = house_id diff --git a/scrapy-package/setup.py b/scrapy-package/setup.py index 45c236a2..6e61cb50 100644 --- a/scrapy-package/setup.py +++ b/scrapy-package/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="scrapy-tw-rental-house", - version="1.1.0", + version="1.1.1", author="ddio", author_email="ddio@ddio.io", description="Scrapy spider for TW Rental House", From 7d36465efd6ee7fe9edf0037ed0380723cdf68d4 Mon Sep 17 00:00:00 2001 From: ddio Date: Wed, 27 Oct 2021 18:52:17 +0800 Subject: [PATCH 4/4] feat: remove unused module and adapy utils to pipenv, #89 --- Pipfile | 2 +- Pipfile.lock | 8 +- backend/rental/libs/export/field.py | 16 +- crawler/crawler/items.py | 73 ----- crawler/crawler/spiders/all_591_cities.py | 86 ------ crawler/crawler/spiders/house_spider.py | 290 ------------------ crawler/go.sh | 11 +- .../spiders/rental591/detail_mixin.py | 2 +- scrapy-package/setup.py | 2 +- 9 files changed, 20 insertions(+), 470 deletions(-) delete mode 100644 crawler/crawler/items.py delete mode 100644 crawler/crawler/spiders/all_591_cities.py delete mode 100644 crawler/crawler/spiders/house_spider.py diff --git a/Pipfile b/Pipfile index 6acdbb47..3662a8a4 100644 --- a/Pipfile +++ b/Pipfile @@ -14,7 +14,7 @@ django = "==2.1.15" jsonfield = "==2.0.2" raven = "==6.9.0" psycopg2-binary = "==2.8.6" -scrapy-tw-rental-house = "==1.1.1" +scrapy-tw-rental-house = "==1.1.2" [requires] python_version = "3" diff --git a/Pipfile.lock b/Pipfile.lock index d701ec58..d2094bf0 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b550970a17bba890cd0aaf2996af64f185cb29f35eea0aab6837232a80ea72b9" + "sha256": "ff1dfcfbb8f590c849421d7b440a7ba02ee38c5715d1cbb22350bb276693706b" }, "pipfile-spec": 6, "requires": { @@ -403,11 +403,11 @@ }, "scrapy-tw-rental-house": { "hashes": [ - "sha256:2639ad2747b986b534b84025739d42aee856d0b14569c6d493dc5844b22ed6e5", - "sha256:4f9e191299f5440fb9b84fa77359f3e2aff0c530c0c1a03ef453b51a4ff3d32f" + "sha256:02a4ddc1e1fb78deded0273236f4c681855a1bbb17ab5889883ca9bb100b6339", + "sha256:c5231be11f29280cab54ef93505d829e5d819f168c04061fc29ad9e8e210f43c" ], "index": "pypi", - "version": "==1.1.1" + "version": "==1.1.2" }, "service-identity": { "hashes": [ diff --git a/backend/rental/libs/export/field.py b/backend/rental/libs/export/field.py index 90e75969..683bf66c 100644 --- a/backend/rental/libs/export/field.py +++ b/backend/rental/libs/export/field.py @@ -36,13 +36,13 @@ def to_human(self, val, use_tf=True): if self.fn: val = self.fn(val) - if type(val) is datetime: + if isinstance(val, datetime): val = timezone.localtime(val).strftime('%Y-%m-%d %H:%M:%S %Z') - elif val is '' or val is None: + elif val == '' or val is None: val = '-' - elif val is True or val == 'true': + elif val == True or val == 'true': val = 'T' if use_tf else 1 - elif val is False or val == 'false': + elif val == False or val == 'false': val = 'F' if use_tf else 0 return val @@ -51,13 +51,13 @@ def to_machine(self, val): if self.fn: val = self.fn(val) - if type(val) is datetime: + if isinstance(val, datetime): pass - elif val is '' or val is None: + elif val == '' or val is None: val = None - elif val is True or val == 'true': + elif val == True or val == 'true': val = True - elif val is False or val == 'false': + elif val == False or val == 'false': val = False return val diff --git a/crawler/crawler/items.py b/crawler/crawler/items.py deleted file mode 100644 index c5200978..00000000 --- a/crawler/crawler/items.py +++ /dev/null @@ -1,73 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your scraped items -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/items.html - -from scrapy import Field, Item - - -class GenericHouseItem(Item): - top_region = Field() - sub_region = Field() - deal_time = Field() - deal_status = Field() - n_day_deal = Field() - vendor = Field() - vendor_house_id = Field() - vendor_house_url = Field() - # price related - monthly_price = Field() - deposit_type = Field() - n_month_deposit = Field() - deposit = Field() - is_require_management_fee = Field() - monthly_management_fee = Field() - has_parking = Field() - is_require_parking_fee = Field() - monthly_parking_fee = Field() - per_ping_price = Field() - # other basic info - building_type = Field() - property_type = Field() - is_rooftop = Field() - floor = Field() - total_floor = Field() - dist_to_highest_floor = Field() - floor_ping = Field() - n_living_room = Field() - n_bed_room = Field() - n_bath_room = Field() - n_balcony = Field() - apt_feature_code = Field() - rough_address = Field() - rough_coordinate = Field() - # boolean map - # eletricity: true, water: true, gas: true, internet: true, cable_tv: true - additional_fee = Field() - # school, park, dept_store, conv_store, traditional_mkt, night_mkt, - # hospital, police_office - living_functions = Field() - # subway, bus, public_bike, train, hsr - transportation = Field() - has_tenant_restriction = Field() - has_gender_restriction = Field() - gender_restriction = Field() - can_cook = Field() - allow_pet = Field() - has_perperty_registration = Field() - # undermined for now - facilities = Field() - contact = Field() - author = Field() - agent_org = Field() - imgs = Field() - - -class RawHouseItem(Item): - house_id = Field() - vendor = Field() - is_list = Field() - raw = Field() - dict = Field() diff --git a/crawler/crawler/spiders/all_591_cities.py b/crawler/crawler/spiders/all_591_cities.py deleted file mode 100644 index 46cb93aa..00000000 --- a/crawler/crawler/spiders/all_591_cities.py +++ /dev/null @@ -1,86 +0,0 @@ -all_591_cities = [ - { - "city": "台北市", - "id": "1" - }, - { - "city": "新北市", - "id": "3" - }, - { - "city": "桃園市", - "id": "6" - }, - { - "city": "新竹市", - "id": "4" - }, - { - "city": "新竹縣", - "id": "5" - }, - { - "city": "基隆市", - "id": "2" - }, - { - "city": "宜蘭縣", - "id": "21" - }, - { - "city": "台中市", - "id": "8" - }, - { - "city": "彰化縣", - "id": "10" - }, - { - "city": "苗栗縣", - "id": "7" - }, - { - "city": "雲林縣", - "id": "14" - }, - { - "city": "南投縣", - "id": "11" - }, - { - "city": "高雄市", - "id": "17" - }, - { - "city": "台南市", - "id": "15" - }, - { - "city": "嘉義市", - "id": "12" - }, - { - "city": "屏東縣", - "id": "19" - }, - { - "city": "嘉義縣", - "id": "13" - }, - { - "city": "花蓮縣", - "id": "23" - }, - { - "city": "台東縣", - "id": "22" - }, - { - "city": "金門縣", - "id": "25" - }, - { - "city": "澎湖縣", - "id": "24" - } -] diff --git a/crawler/crawler/spiders/house_spider.py b/crawler/crawler/spiders/house_spider.py deleted file mode 100644 index 21e57cec..00000000 --- a/crawler/crawler/spiders/house_spider.py +++ /dev/null @@ -1,290 +0,0 @@ -import scrapy -import re -import traceback -import uuid -from django.db import connection -from scrapy.spidermiddlewares.httperror import HttpError -from rental.models import HouseTS, Vendor -from rental import models -from crawlerrequest.models import RequestTS -from crawlerrequest.enums import RequestType -from rental.enums import UNKNOWN_ENUM - -# TODO: yield request - -class HouseSpider(scrapy.Spider): - queue_length = 30 - n_live_spider = 0 - - def __init__( - self, - vendor, - is_list, - request_generator, - response_router=None, - response_parser=None, - **kwargs - ): - ''' - request_gerator: - parameter: accept seed as variable - return: dictionary of request parameter - - errback, meta.db_request, dont_filter, callback - will be added beforehand - - response_parser: - Standard spider parser, don't need to handle request error and - exception. - Will be set as default request callback - ''' - super().__init__(**kwargs) - y = models.current_year() - m = models.current_month() - d = models.current_day() - h = models.current_stepped_hour() - - self.spider_id = str(uuid.uuid4()) - - try: - self.vendor = Vendor.objects.get( - name = vendor - ) - except Vendor.DoesNotExist: - raise Exception('Vendor "{}" is not defined.'.format(vendor)) - - if is_list: - self.request_type = RequestType.LIST - else: - self.request_type = RequestType.DETAIL - - self.request_generator = request_generator - - if response_router: - self.response_router = response_router - elif response_parser: - self.response_router = lambda x: response_parser - else: - raise Exception('No response router or parser given') - - self.ts = { - 'y': y, - 'm': m, - 'd': d, - 'h': h - } - - def has_request(self): - undone_requests = RequestTS.objects.filter( - year = self.ts['y'], - month = self.ts['m'], - day = self.ts['d'], - hour = self.ts['h'], - # Ignore pending request since we will generate new one and rerun it anyway - is_pending = False, - vendor = self.vendor, - request_type = self.request_type - )[:1] - - return undone_requests.count() > 0 - - def has_record(self): - today_houses = HouseTS.objects.filter( - year = self.ts['y'], - month = self.ts['m'], - day = self.ts['d'], - hour = self.ts['h'], - vendor = self.vendor - )[:1] - - return today_houses.count() > 0 - - def gen_persist_request(self, seed): - RequestTS.objects.create( - request_type=self.request_type, - vendor=self.vendor, - seed=seed - ) - - def next_request(self, request_generator=None): - if self.n_live_spider >= self.queue_length: - # At most self.queue_length in memory - return None - - # #21, temp workaround to get next_request ASAP - # this operation is still not atomic, different session may get the same request - with connection.cursor() as cursor: - sql = ( - 'update request_ts set owner = %s where id = (' - 'select id from request_ts where year = %s and month = %s ' - 'and day = %s and hour = %s and vendor_id = %s and request_type = %s ' - 'and is_pending = %s and owner is null order by id limit 1)' - ) - a = cursor.execute(sql, [ - self.spider_id, - self.ts['y'], - self.ts['m'], - self.ts['d'], - self.ts['h'], - self.vendor.id, - self.request_type.value, - False - ]) - - next_row = RequestTS.objects.filter( - year=self.ts['y'], - month=self.ts['m'], - day=self.ts['d'], - hour=self.ts['h'], - vendor=self.vendor, - request_type=self.request_type, - is_pending=False, - owner=self.spider_id - ).order_by('created') - - next_row = next_row.first() - - if next_row is None: - return None - - next_row.is_pending = True - next_row.save() - self.n_live_spider += 1 - - requestArgs = { - 'dont_filter': True, - 'errback': self.error_handler, - 'callback': self.parser_wrapper, - 'meta': {} - } - - if not request_generator: - request_generator = self.request_generator - - requestArgs = { - **requestArgs, - **request_generator(next_row.seed) - } - - if 'db_request' not in requestArgs['meta']: - requestArgs['meta']['db_request'] = next_row - - return scrapy.Request(**requestArgs) - - def parser_wrapper(self, response): - db_request = response.meta['db_request'] - db_request.last_status = response.status - db_request.save() - - seed = response.meta.get('seed', {}) - - try: - response_parser = self.response_router(seed) - for item in response_parser(response): - if item is True: - db_request.delete() - else: - yield item - except: - self.logger.error( - 'Parser error in {} when handle meta {}. [{}] - {:.128}'.format( - self.name, - seed, - response.status, - response.text - ) - ) - traceback.print_exc() - - self.n_live_spider -= 1 - # quick fix for concurrency issue - mercy = 10 - while True: - next_request = self.next_request() - if next_request: - yield next_request - elif mercy < 0: - break - else: - mercy -= 1 - - def error_handler(self, failure): - self.n_live_spider -= 1 - if failure.check(HttpError): - response = failure.value.response - self.logger.error('[Live|{}] HttpError on {}[{}]'.format( - self.n_live_spider, response.url, response.status)) - - request = failure.value.response.request.meta['db_request'] - request.last_status = response.status - - if response.status == 599: - request.is_pending = False - - request.save() - else: - self.logger.error( - '[Live|{}] Error: {}'.format(self.n_live_spider, failure)) - - def clean_number(self, number_string): - if number_string is None or number_string == '': - return None - - number_string = '{}'.format(number_string) - pure_number = re.sub('[^\\d.-]', '', number_string) - if pure_number == '': - # it could be '' if no digit included - return None - elif pure_number.isdigit(): - return int(pure_number, base=10) - else: - return float(pure_number) - - def get_enum(self, EnumCls, house_id, value): - try: - enum = EnumCls[value] - except KeyError: - self.logger.error('Unknown property: {}/{} in house {}'.format( - value, - EnumCls.__name__, - house_id - )) - enum = UNKNOWN_ENUM - - return enum - - def css_first(self, base, selector, default='', allow_empty=False, deep_text=False): - # Check how to find if there's missing attribute - css = self.css(base, selector, [default], deep_text=deep_text) - if css: - return css[0] - - if not allow_empty: - self.logger.info( - 'Fail to get css first from {}({})'.format( - base, - selector - ) - ) - - return '' - - def css(self, base, selector, default=None, deep_text=False): - # Issue #30, we may get innerHTML like "some of target string" - # deep_text=True retrieve text in the way different from ::text, which will also get all child text. - if deep_text: - ret = map(lambda dom: ''.join(dom.css('*::text').extract()), base.css(selector)) - else: - ret = base.css(selector).extract() - - if not ret: - ret = [] if default is None else default - - ret = self.clean_string(ret) - return list(ret) - - def clean_string(self, strings): - # remove empty and strip - strings = filter(lambda str: str.replace(u'\xa0', '').strip(), strings) - strings = map(lambda str: str.replace(u'\xa0', '').strip(), strings) - return strings diff --git a/crawler/go.sh b/crawler/go.sh index e03b0dc5..57ad5692 100755 --- a/crawler/go.sh +++ b/crawler/go.sh @@ -3,23 +3,22 @@ now=`date +'%Y.%m.%d.%H%M'` mkdir -p ../logs -. ../bin/activate echo '===== LIST =====' -scrapy crawl list591 -L INFO +pipenv run scrapy crawl list591 -L INFO mv scrapy.log ../logs/$now.list.log echo '===== DETAIL =====' -scrapy crawl detail591 -L INFO +pipenv run scrapy crawl detail591 -L INFO mv scrapy.log ../logs/$now.detail.log echo '===== STATEFUL UPDATE =====' -python ../backend/manage.py syncstateful -ts +pipenv run python ../backend/manage.py syncstateful -ts echo '===== CHECK EXPORT =====' -python ../backend/manage.py export -p +pipenv run python ../backend/manage.py export -p echo '===== GENERATE STATISTICS =====' -python ../backend/manage.py statscheck +pipenv run python ../backend/manage.py statscheck echo '===== FINALIZE =====' diff --git a/scrapy-package/scrapy_twrh/spiders/rental591/detail_mixin.py b/scrapy-package/scrapy_twrh/spiders/rental591/detail_mixin.py index cb1e40f0..7a22a600 100644 --- a/scrapy-package/scrapy_twrh/spiders/rental591/detail_mixin.py +++ b/scrapy-package/scrapy_twrh/spiders/rental591/detail_mixin.py @@ -99,7 +99,7 @@ def default_parse_detail(self, response): 'House {} not found by receiving status code {}' .format(house_id, response.status) ) - return None + return None detail_dict = jsonResp['data'] detail_dict['house_id'] = house_id diff --git a/scrapy-package/setup.py b/scrapy-package/setup.py index 6e61cb50..130e5b36 100644 --- a/scrapy-package/setup.py +++ b/scrapy-package/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="scrapy-tw-rental-house", - version="1.1.1", + version="1.1.2", author="ddio", author_email="ddio@ddio.io", description="Scrapy spider for TW Rental House",