From ce10c470baa17c118d3b71c60abee5e5d1288a71 Mon Sep 17 00:00:00 2001 From: Jonathan McKenzie Date: Thu, 14 Dec 2023 17:28:00 -0500 Subject: [PATCH] Updating .env.template, adding ability to write to the DB in addition to CSV output, update README, added very basic logging, updated requirements --- .env.template | 19 ++-- .gitignore | 3 + .python-version | 1 + Pipfile | 18 ++-- Pipfile.lock | 257 +++++++++++++++++++++++++++++------------------ README.md | 21 +++- map => map.py | 19 +++- requirements.txt | 18 ++-- 8 files changed, 228 insertions(+), 128 deletions(-) create mode 100644 .python-version rename map => map.py (86%) diff --git a/.env.template b/.env.template index cd2bd70..79100dc 100644 --- a/.env.template +++ b/.env.template @@ -1,8 +1,13 @@ -OUT_FILE_PATH='.' -OUT_FILE_NAME='test,csv' -DB_USER='rental-listing-aggregator' -DB_PASSWORD='password' -DB_HOST='127.0.0.1' +MAPPER_YEAR=2020 +MAPPER_MONTH=1 +#MAPPER_QUARTER=1 + +OUT_FILE_PATH=./output/ +OUT_FILE_NAME=mapped.csv +OUT_TABLE_NAME=mapped + +DB_USER=rental-listing-aggregator +DB_PASSWORD=password +DB_HOST=127.0.0.1 DB_PORT=5432 -DB_NAME='rental-listing-aggregator' -MAPPER_MONTH='6' +DB_NAME=rental-listing-aggregator diff --git a/.gitignore b/.gitignore index be47843..f4c0598 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ .DS_Store .env + +virtualenv_* +output/** diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..cc1923a --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.8 diff --git a/Pipfile b/Pipfile index f522d17..fc56c80 100644 --- a/Pipfile +++ b/Pipfile @@ -6,14 +6,16 @@ verify_ssl = true [dev-packages] [packages] -numpy = "==1.19.0" -pandas = "==1.0.5" -psycopg2 = "==2.8.5" -python-dateutil = "==2.8.1" -python-dotenv = "==0.13.0" -pytz = "==2020.1" -six = "==1.15.0" -SQLAlchemy = "==1.3.18" +numpy = "==1.24.4" +pandas = "==2.0.3" +psycopg2 = "==2.9.9" +python-dateutil = "==2.8.2" +python-dotenv = "==1.0.0" +pytz = "==2023.3.post1" +six = "==1.16.0" +tzdata = "==2023.3" +sqlalchemy = "==2.0.23" +typing-extensions = "==4.9.0" [requires] python_version = "3.8" diff --git a/Pipfile.lock b/Pipfile.lock index 18098d7..36523f2 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "bb4d35de3cc881c7740ed5358a841db530841d4674be927889185ca986376152" + "sha256": "77b2b0f682edc0e9c45b328631e2239105248347a0bf993a13f677b94f36554d" }, "pipfile-spec": 6, "requires": { @@ -18,142 +18,199 @@ "default": { "numpy": { "hashes": [ - "sha256:13af0184177469192d80db9bd02619f6fa8b922f9f327e077d6f2a6acb1ce1c0", - "sha256:26a45798ca2a4e168d00de75d4a524abf5907949231512f372b217ede3429e98", - "sha256:26f509450db547e4dfa3ec739419b31edad646d21fb8d0ed0734188b35ff6b27", - "sha256:30a59fb41bb6b8c465ab50d60a1b298d1cd7b85274e71f38af5a75d6c475d2d2", - "sha256:33c623ef9ca5e19e05991f127c1be5aeb1ab5cdf30cb1c5cf3960752e58b599b", - "sha256:356f96c9fbec59974a592452ab6a036cd6f180822a60b529a975c9467fcd5f23", - "sha256:3c40c827d36c6d1c3cf413694d7dc843d50997ebffbc7c87d888a203ed6403a7", - "sha256:4d054f013a1983551254e2379385e359884e5af105e3efe00418977d02f634a7", - "sha256:63d971bb211ad3ca37b2adecdd5365f40f3b741a455beecba70fd0dde8b2a4cb", - "sha256:658624a11f6e1c252b2cd170d94bf28c8f9410acab9f2fd4369e11e1cd4e1aaf", - "sha256:76766cc80d6128750075378d3bb7812cf146415bd29b588616f72c943c00d598", - "sha256:7b57f26e5e6ee2f14f960db46bd58ffdca25ca06dd997729b1b179fddd35f5a3", - "sha256:7b852817800eb02e109ae4a9cef2beda8dd50d98b76b6cfb7b5c0099d27b52d4", - "sha256:8cde829f14bd38f6da7b2954be0f2837043e8b8d7a9110ec5e318ae6bf706610", - "sha256:a2e3a39f43f0ce95204beb8fe0831199542ccab1e0c6e486a0b4947256215632", - "sha256:a86c962e211f37edd61d6e11bb4df7eddc4a519a38a856e20a6498c319efa6b0", - "sha256:a8705c5073fe3fcc297fb8e0b31aa794e05af6a329e81b7ca4ffecab7f2b95ef", - "sha256:b6aaeadf1e4866ca0fdf7bb4eed25e521ae21a7947c59f78154b24fc7abbe1dd", - "sha256:be62aeff8f2f054eff7725f502f6228298891fd648dc2630e03e44bf63e8cee0", - "sha256:c2edbb783c841e36ca0fa159f0ae97a88ce8137fb3a6cd82eae77349ba4b607b", - "sha256:cbe326f6d364375a8e5a8ccb7e9cd73f4b2f6dc3b2ed205633a0db8243e2a96a", - "sha256:d34fbb98ad0d6b563b95de852a284074514331e6b9da0a9fc894fb1cdae7a79e", - "sha256:d97a86937cf9970453c3b62abb55a6475f173347b4cde7f8dcdb48c8e1b9952d", - "sha256:dd53d7c4a69e766e4900f29db5872f5824a06827d594427cf1a4aa542818b796", - "sha256:df1889701e2dfd8ba4dc9b1a010f0a60950077fb5242bb92c8b5c7f1a6f2668a", - "sha256:fa1fe75b4a9e18b66ae7f0b122543c42debcf800aaafa0212aaff3ad273c2596" + "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f", + "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61", + "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7", + "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400", + "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef", + "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2", + "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d", + "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc", + "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835", + "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706", + "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5", + "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4", + "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6", + "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", + "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a", + "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f", + "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e", + "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e", + "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694", + "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8", + "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64", + "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d", + "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc", + "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254", + "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2", + "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1", + "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810", + "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9" ], "index": "pypi", - "version": "==1.19.0" + "markers": "python_version >= '3.8'", + "version": "==1.24.4" }, "pandas": { "hashes": [ - "sha256:02f1e8f71cd994ed7fcb9a35b6ddddeb4314822a0e09a9c5b2d278f8cb5d4096", - "sha256:13f75fb18486759da3ff40f5345d9dd20e7d78f2a39c5884d013456cec9876f0", - "sha256:35b670b0abcfed7cad76f2834041dcf7ae47fd9b22b63622d67cdc933d79f453", - "sha256:4c73f373b0800eb3062ffd13d4a7a2a6d522792fa6eb204d67a4fad0a40f03dc", - "sha256:5759edf0b686b6f25a5d4a447ea588983a33afc8a0081a0954184a4a87fd0dd7", - "sha256:5a7cf6044467c1356b2b49ef69e50bf4d231e773c3ca0558807cdba56b76820b", - "sha256:69c5d920a0b2a9838e677f78f4dde506b95ea8e4d30da25859db6469ded84fa8", - "sha256:8778a5cc5a8437a561e3276b85367412e10ae9fff07db1eed986e427d9a674f8", - "sha256:9871ef5ee17f388f1cb35f76dc6106d40cb8165c562d573470672f4cdefa59ef", - "sha256:9c31d52f1a7dd2bb4681d9f62646c7aa554f19e8e9addc17e8b1b20011d7522d", - "sha256:ab8173a8efe5418bbe50e43f321994ac6673afc5c7c4839014cf6401bbdd0705", - "sha256:ae961f1f0e270f1e4e2273f6a539b2ea33248e0e3a11ffb479d757918a5e03a9", - "sha256:b3c4f93fcb6e97d993bf87cdd917883b7dab7d20c627699f360a8fb49e9e0b91", - "sha256:c9410ce8a3dee77653bc0684cfa1535a7f9c291663bd7ad79e39f5ab58f67ab3", - "sha256:f69e0f7b7c09f1f612b1f8f59e2df72faa8a6b41c5a436dde5b615aaf948f107", - "sha256:faa42a78d1350b02a7d2f0dbe3c80791cf785663d6997891549d0f86dc49125e" + "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682", + "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc", + "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b", + "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089", + "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5", + "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26", + "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210", + "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b", + "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641", + "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd", + "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78", + "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", + "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e", + "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061", + "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0", + "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e", + "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8", + "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d", + "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0", + "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c", + "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183", + "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df", + "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8", + "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f", + "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02" ], "index": "pypi", - "version": "==1.0.5" + "markers": "python_version >= '3.8'", + "version": "==2.0.3" }, "psycopg2": { "hashes": [ - "sha256:132efc7ee46a763e68a815f4d26223d9c679953cd190f1f218187cb60decf535", - "sha256:2327bf42c1744a434ed8ed0bbaa9168cac7ee5a22a9001f6fc85c33b8a4a14b7", - "sha256:27c633f2d5db0fc27b51f1b08f410715b59fa3802987aec91aeb8f562724e95c", - "sha256:2c0afb40cfb4d53487ee2ebe128649028c9a78d2476d14a67781e45dc287f080", - "sha256:2df2bf1b87305bd95eb3ac666ee1f00a9c83d10927b8144e8e39644218f4cf81", - "sha256:440a3ea2c955e89321a138eb7582aa1d22fe286c7d65e26a2c5411af0a88ae72", - "sha256:6a471d4d2a6f14c97a882e8d3124869bc623f3df6177eefe02994ea41fd45b52", - "sha256:6b306dae53ec7f4f67a10942cf8ac85de930ea90e9903e2df4001f69b7833f7e", - "sha256:a0984ff49e176062fcdc8a5a2a670c9bb1704a2f69548bce8f8a7bad41c661bf", - "sha256:ac5b23d0199c012ad91ed1bbb971b7666da651c6371529b1be8cbe2a7bf3c3a9", - "sha256:acf56d564e443e3dea152efe972b1434058244298a94348fc518d6dd6a9fb0bb", - "sha256:d3b29d717d39d3580efd760a9a46a7418408acebbb784717c90d708c9ed5f055", - "sha256:f7d46240f7a1ae1dd95aab38bd74f7428d46531f69219954266d669da60c0818" + "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981", + "sha256:38a8dcc6856f569068b47de286b472b7c473ac7977243593a288ebce0dc89516", + "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3", + "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa", + "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a", + "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693", + "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372", + "sha256:bac58c024c9922c23550af2a581998624d6e02350f4ae9c5f0bc642c633a2d5e", + "sha256:c92811b2d4c9b6ea0285942b2e7cac98a59e166d59c588fe5cfe1eda58e72d59", + "sha256:d1454bde93fb1e224166811694d600e746430c006fbb031ea06ecc2ea41bf156", + "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024", + "sha256:de80739447af31525feddeb8effd640782cf5998e1a4e9192ebdf829717e3913", + "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c" ], "index": "pypi", - "version": "==2.8.5" + "markers": "python_version >= '3.7'", + "version": "==2.9.9" }, "python-dateutil": { "hashes": [ - "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", - "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], "index": "pypi", - "version": "==2.8.1" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.2" }, "python-dotenv": { "hashes": [ - "sha256:25c0ff1a3e12f4bde8d592cc254ab075cfe734fc5dd989036716fd17ee7e5ec7", - "sha256:3b9909bc96b0edc6b01586e1eed05e71174ef4e04c71da5786370cebea53ad74" + "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba", + "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a" ], "index": "pypi", - "version": "==0.13.0" + "markers": "python_version >= '3.8'", + "version": "==1.0.0" }, "pytz": { "hashes": [ - "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", - "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048" + "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b", + "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7" ], "index": "pypi", - "version": "==2020.1" + "version": "==2023.3.post1" }, "six": { "hashes": [ - "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", - "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], "index": "pypi", - "version": "==1.15.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" }, "sqlalchemy": { "hashes": [ - "sha256:0942a3a0df3f6131580eddd26d99071b48cfe5aaf3eab2783076fbc5a1c1882e", - "sha256:0ec575db1b54909750332c2e335c2bb11257883914a03bc5a3306a4488ecc772", - "sha256:109581ccc8915001e8037b73c29590e78ce74be49ca0a3630a23831f9e3ed6c7", - "sha256:16593fd748944726540cd20f7e83afec816c2ac96b082e26ae226e8f7e9688cf", - "sha256:427273b08efc16a85aa2b39892817e78e3ed074fcb89b2a51c4979bae7e7ba98", - "sha256:50c4ee32f0e1581828843267d8de35c3298e86ceecd5e9017dc45788be70a864", - "sha256:512a85c3c8c3995cc91af3e90f38f460da5d3cade8dc3a229c8e0879037547c9", - "sha256:57aa843b783179ab72e863512e14bdcba186641daf69e4e3a5761d705dcc35b1", - "sha256:621f58cd921cd71ba6215c42954ffaa8a918eecd8c535d97befa1a8acad986dd", - "sha256:6ac2558631a81b85e7fb7a44e5035347938b0a73f5fdc27a8566777d0792a6a4", - "sha256:716754d0b5490bdcf68e1e4925edc02ac07209883314ad01a137642ddb2056f1", - "sha256:736d41cfebedecc6f159fc4ac0769dc89528a989471dc1d378ba07d29a60ba1c", - "sha256:8619b86cb68b185a778635be5b3e6018623c0761dde4df2f112896424aa27bd8", - "sha256:87fad64529cde4f1914a5b9c383628e1a8f9e3930304c09cf22c2ae118a1280e", - "sha256:89494df7f93b1836cae210c42864b292f9b31eeabca4810193761990dc689cce", - "sha256:8cac7bb373a5f1423e28de3fd5fc8063b9c8ffe8957dc1b1a59cb90453db6da1", - "sha256:8fd452dc3d49b3cc54483e033de6c006c304432e6f84b74d7b2c68afa2569ae5", - "sha256:adad60eea2c4c2a1875eb6305a0b6e61a83163f8e233586a4d6a55221ef984fe", - "sha256:c26f95e7609b821b5f08a72dab929baa0d685406b953efd7c89423a511d5c413", - "sha256:cbe1324ef52ff26ccde2cb84b8593c8bf930069dfc06c1e616f1bfd4e47f48a3", - "sha256:d05c4adae06bd0c7f696ae3ec8d993ed8ffcc4e11a76b1b35a5af8a099bd2284", - "sha256:d98bc827a1293ae767c8f2f18be3bb5151fd37ddcd7da2a5f9581baeeb7a3fa1", - "sha256:da2fb75f64792c1fc64c82313a00c728a7c301efe6a60b7a9fe35b16b4368ce7", - "sha256:e4624d7edb2576cd72bb83636cd71c8ce544d8e272f308bd80885056972ca299", - "sha256:e89e0d9e106f8a9180a4ca92a6adde60c58b1b0299e1b43bd5e0312f535fbf33", - "sha256:f11c2437fb5f812d020932119ba02d9e2bc29a6eca01a055233a8b449e3e1e7d", - "sha256:f57be5673e12763dd400fea568608700a63ce1c6bd5bdbc3cc3a2c5fdb045274", - "sha256:fc728ece3d5c772c196fd338a99798e7efac7a04f9cb6416299a3638ee9a94cd" + "sha256:0666031df46b9badba9bed00092a1ffa3aa063a5e68fa244acd9f08070e936d3", + "sha256:0a8c6aa506893e25a04233bc721c6b6cf844bafd7250535abb56cb6cc1368884", + "sha256:0e680527245895aba86afbd5bef6c316831c02aa988d1aad83c47ffe92655e74", + "sha256:14aebfe28b99f24f8a4c1346c48bc3d63705b1f919a24c27471136d2f219f02d", + "sha256:1e018aba8363adb0599e745af245306cb8c46b9ad0a6fc0a86745b6ff7d940fc", + "sha256:227135ef1e48165f37590b8bfc44ed7ff4c074bf04dc8d6f8e7f1c14a94aa6ca", + "sha256:31952bbc527d633b9479f5f81e8b9dfada00b91d6baba021a869095f1a97006d", + "sha256:3e983fa42164577d073778d06d2cc5d020322425a509a08119bdcee70ad856bf", + "sha256:42d0b0290a8fb0165ea2c2781ae66e95cca6e27a2fbe1016ff8db3112ac1e846", + "sha256:42ede90148b73fe4ab4a089f3126b2cfae8cfefc955c8174d697bb46210c8306", + "sha256:4895a63e2c271ffc7a81ea424b94060f7b3b03b4ea0cd58ab5bb676ed02f4221", + "sha256:4af79c06825e2836de21439cb2a6ce22b2ca129bad74f359bddd173f39582bf5", + "sha256:5f94aeb99f43729960638e7468d4688f6efccb837a858b34574e01143cf11f89", + "sha256:616fe7bcff0a05098f64b4478b78ec2dfa03225c23734d83d6c169eb41a93e55", + "sha256:62d9e964870ea5ade4bc870ac4004c456efe75fb50404c03c5fd61f8bc669a72", + "sha256:638c2c0b6b4661a4fd264f6fb804eccd392745c5887f9317feb64bb7cb03b3ea", + "sha256:63bfc3acc970776036f6d1d0e65faa7473be9f3135d37a463c5eba5efcdb24c8", + "sha256:6463aa765cf02b9247e38b35853923edbf2f6fd1963df88706bc1d02410a5577", + "sha256:64ac935a90bc479fee77f9463f298943b0e60005fe5de2aa654d9cdef46c54df", + "sha256:683ef58ca8eea4747737a1c35c11372ffeb84578d3aab8f3e10b1d13d66f2bc4", + "sha256:75eefe09e98043cff2fb8af9796e20747ae870c903dc61d41b0c2e55128f958d", + "sha256:787af80107fb691934a01889ca8f82a44adedbf5ef3d6ad7d0f0b9ac557e0c34", + "sha256:7c424983ab447dab126c39d3ce3be5bee95700783204a72549c3dceffe0fc8f4", + "sha256:7e0dc9031baa46ad0dd5a269cb7a92a73284d1309228be1d5935dac8fb3cae24", + "sha256:87a3d6b53c39cd173990de2f5f4b83431d534a74f0e2f88bd16eabb5667e65c6", + "sha256:89a01238fcb9a8af118eaad3ffcc5dedaacbd429dc6fdc43fe430d3a941ff965", + "sha256:9585b646ffb048c0250acc7dad92536591ffe35dba624bb8fd9b471e25212a35", + "sha256:964971b52daab357d2c0875825e36584d58f536e920f2968df8d581054eada4b", + "sha256:967c0b71156f793e6662dd839da54f884631755275ed71f1539c95bbada9aaab", + "sha256:9ca922f305d67605668e93991aaf2c12239c78207bca3b891cd51a4515c72e22", + "sha256:a86cb7063e2c9fb8e774f77fbf8475516d270a3e989da55fa05d08089d77f8c4", + "sha256:aeb397de65a0a62f14c257f36a726945a7f7bb60253462e8602d9b97b5cbe204", + "sha256:b41f5d65b54cdf4934ecede2f41b9c60c9f785620416e8e6c48349ab18643855", + "sha256:bd45a5b6c68357578263d74daab6ff9439517f87da63442d244f9f23df56138d", + "sha256:c14eba45983d2f48f7546bb32b47937ee2cafae353646295f0e99f35b14286ab", + "sha256:c1bda93cbbe4aa2aa0aa8655c5aeda505cd219ff3e8da91d1d329e143e4aff69", + "sha256:c4722f3bc3c1c2fcc3702dbe0016ba31148dd6efcd2a2fd33c1b4897c6a19693", + "sha256:c80c38bd2ea35b97cbf7c21aeb129dcbebbf344ee01a7141016ab7b851464f8e", + "sha256:cabafc7837b6cec61c0e1e5c6d14ef250b675fa9c3060ed8a7e38653bd732ff8", + "sha256:cc1d21576f958c42d9aec68eba5c1a7d715e5fc07825a629015fe8e3b0657fb0", + "sha256:d0f7fb0c7527c41fa6fcae2be537ac137f636a41b4c5a4c58914541e2f436b45", + "sha256:d4041ad05b35f1f4da481f6b811b4af2f29e83af253bf37c3c4582b2c68934ab", + "sha256:d5578e6863eeb998980c212a39106ea139bdc0b3f73291b96e27c929c90cd8e1", + "sha256:e3b5036aa326dc2df50cba3c958e29b291a80f604b1afa4c8ce73e78e1c9f01d", + "sha256:e599a51acf3cc4d31d1a0cf248d8f8d863b6386d2b6782c5074427ebb7803bda", + "sha256:f3420d00d2cb42432c1d0e44540ae83185ccbbc67a6054dcc8ab5387add6620b", + "sha256:f48ed89dd11c3c586f45e9eec1e437b355b3b6f6884ea4a4c3111a3358fd0c18", + "sha256:f508ba8f89e0a5ecdfd3761f82dda2a3d7b678a626967608f4273e0dba8f07ac", + "sha256:fd54601ef9cc455a0c61e5245f690c8a3ad67ddb03d3b91c361d076def0b4c60" ], "index": "pypi", - "version": "==1.3.18" + "markers": "python_version >= '3.7'", + "version": "==2.0.23" + }, + "typing-extensions": { + "hashes": [ + "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783", + "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==4.9.0" + }, + "tzdata": { + "hashes": [ + "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a", + "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda" + ], + "index": "pypi", + "markers": "python_version >= '2'", + "version": "==2023.3" } }, "develop": {} diff --git a/README.md b/README.md index 49705e4..8ec25c4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,20 @@ # Rental Listing Mapper -This repository pulls the table that is populated by the -[scraper](https://github.com/mapc/rental-listing-scraper) module -and maps the data into a format consumable by the [cleaner](https://github.com/mapc/rental-listing-cleaner) +## Overview + +This repository pulls the table that is populated by the [scraper](https://github.com/mapc/rental-listing-scraper) module and maps the data into a format consumable by the [cleaner](https://github.com/mapc/rental-listing-cleaner) + +In addition to creating a CSV in this format, this process also writes the mapped data to a `mapped` table in the `rental-listings-aggregator` database. + +## Running the code + +### Setup +First, you'll need to set up your environment variables. These can be set using a `.env` file in the root of this project. A template (`.env.template`) has been provided as an example. The production values are saved as a secure note in Dashlane. + +Create a virtual environment if you haven't already: `python -m venv virtualenv_mapper` + +Enter the virtual environment: `source virtualenv_mapper/bin/activate` + +Then, install the requirements: `pip install -r requirements.txt` + +With all of these steps completed, you can run the mapper: `python map.py` diff --git a/map b/map.py similarity index 86% rename from map rename to map.py index 03d4884..b4c7b12 100755 --- a/map +++ b/map.py @@ -1,26 +1,33 @@ #!/usr/bin/env python3 import json -import sqlalchemy -import pandas as pd from sys import exit from os import environ, path from datetime import datetime from datetime import date from dateutil.relativedelta import * +import sqlalchemy +import pandas as pd +from dotenv import load_dotenv + + +load_dotenv() + def longmap(record): if 'lng' in record: return float(record['lng']) else: return float(record['Longitude']) + def latmap(record): if 'lat' in record: return float(record['lat']) else: return float(record['Latitude']) + if 'MAPPER_YEAR' in environ: YEAR = int(environ['MAPPER_YEAR']) else: @@ -50,8 +57,11 @@ def latmap(record): next_month=datetime.now().strftime('%m') ) +print("Reading raw listings from DB...") engine = sqlalchemy.create_engine('postgresql://{}:{}@{}:{}/{}'.format(environ['DB_USER'], environ['DB_PASSWORD'], environ['DB_HOST'], environ['DB_PORT'], environ['DB_NAME'])) df = pd.read_sql_query(sqlalchemy.text('SELECT * FROM listings WHERE \'{range}\'::tsrange @> last_seen'.format(range=RANGE)), engine) + +print("Mapping data to output format...") df.rename(columns={'posting_date': 'post_at'}, inplace=True) df = df.dropna(subset=['payload']) @@ -61,4 +71,9 @@ def latmap(record): mapped = df[['uid', 'ask', 'bedrooms', 'title', 'address', 'post_at', 'created_at', 'updated_at', 'source_id', 'survey_id', 'latitude', 'longitude']] +print(f"Writing to {environ['OUT_FILE_PATH']}{environ['OUT_FILE_NAME']}...") mapped.to_csv(path.join(environ['OUT_FILE_PATH'], environ['OUT_FILE_NAME']), index=False, header=False) +print(f"Writing to {environ['OUT_TABLE_NAME']} table...") +mapped.to_sql(environ['OUT_TABLE_NAME'], engine, if_exists='append', index=False, chunksize=1000) + +print("Done.") diff --git a/requirements.txt b/requirements.txt index 1c37202..efb2a0e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,10 @@ -numpy==1.19.0 -pandas==1.0.5 -psycopg2==2.8.5 -python-dateutil==2.8.1 -python-dotenv==0.13.0 -pytz==2020.1 -six==1.15.0 -SQLAlchemy==1.3.18 +numpy==1.24.4 +pandas==2.0.3 +psycopg2==2.9.9 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +pytz==2023.3.post1 +six==1.16.0 +SQLAlchemy==2.0.23 +typing_extensions==4.9.0 +tzdata==2023.3