diff --git a/README.md b/README.md index b05bf12..1436da1 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,21 @@ # pastebin-scraper -A tool that leverages the API of https://psbdmp.ws/ to find emails and domains dumped in pastebin which could lead to finding some juicy information. +A tool that leverages the API of https://psbdmp.ws/ to find any text dumped in pastebin which could lead to finding some juicy information. ## Usage ``` $ ./scrape.sh Usage: -Search for domains - ./script.sh -d domain.com -Search for emails - ./script.sh -e foo@bar.com General search - ./script.sh -g foobar ``` ## Example ``` -$ ./scrape.sh -d facebook.com +$ ./scrape.sh -g foobar Searching pastebin... -Task completed. Output present in ./output/domain/facebook.com/ +Saving in ./output/general/foobar/results.txt ``` ## Installation @@ -27,13 +25,13 @@ $ git clone https://github.com/streaak/pastebin-scraper.git $ cd pastebin-scraper $ sudo chmod +x ./scrape.sh $ sudo apt-get install jq -$ mkdir -p output/domain/ output/email/ output/general/ +$ mkdir output/general/ ``` ## Output -Outputs will either be stored in `./output/domain`, `./output/email` or `./output/general depending` depending on what you search for. The output will contain 2 files, `output.json` and `urls.txt`. `output.json` will contain the original json returned by the API in a beautified format and `urls.txt` will contain the URLs returned by the API in text format. +Outputs will be stored in `./output/general`. The output will contain 2 files, `results.json` and `urls.txt`. `results.json` will contain the dumped text value returned by the API and `urls.txt` will contain the URLs returned by the API. ## TODO diff --git a/scrape.sh b/scrape.sh index 6017060..2abe38c 100755 --- a/scrape.sh +++ b/scrape.sh @@ -1,55 +1,47 @@ - echo -e "$(tput setaf 1) ____ ____ _____ __ ____ ____ ____ ___ ____ +echo "$(tput setaf 1) ____ ____ _____ __ ____ ____ ____ ___ ____ | \| \ / ___/ / ]| \ / || \ / _]| \ | o ) o ) _____( \_ / / | D )| o || o ) [_ | D ) | _/| || |\__ |/ / | / | || _/ _]| / | | | O ||_____|/ \ / \_ | \ | _ || | | [_ | \ | | | | \ \ || . \| | || | | || . \ - |__| |_____| \___|\____||__|\_||__|__||__| |_____||__|\_|$(tput sgr0)" + |__| |_____| \___|\____||__|\_||__|__||__| |_____||__|\_| +$(tput sgr0)" -echo -e "\n\t\t\t\t By @Streaak" -if [ $# -eq 0 ] || [ $1 == '-h' ]; then - echo -e "$(tput setaf 2)\nUsage:$(tput sgr0)" - echo "Search for domains - ./script.sh -d domain.com" - echo "Search for emails - ./script.sh -e foo@bar.com" +echo "\n\t\t\t\t By @Streaak" + +if [ "$#" -eq 0 ] || [ "$1" = '-h' ]; then + echo "$(tput setaf 2)\nUsage:$(tput sgr0)" echo "General search - ./script.sh -g foobar" exit 0 fi -function scrape() { - echo "Searching pastebin..." - curl -s -X GET "$url" | python -m json.tool > ./output/$dir/output.json - cat ./output/$dir/output.json | jq -r '.data[] | .id' | awk '{print "https://psbdmp.ws/" $1 }' > ./output/$dir/urls.txt - echo "Task completed. Output present in ./output/$dir/$2" - exit 0 - return 0 -} - -if [[ $1 == '-g' ]] && [[ $2 != '' ]] ; then -dir=general/$2 -url=https://psbdmp.ws/api/search/$2 -mkdir -p ./output/$dir/ -scrape -elif [[ $2 == '' ]]; then - echo "Missing Value. Try to run ./script.sh -g " - exit 0 -fi +scrape() { + url="$1" + dir="$2" + echo "Searching pastebin...\n" + response=$(curl -s -X GET $url) + if [ "$response" = "[]" ]; then + echo "Nothing found in response." + exit 0 + fi + curl -s -X GET $url | jq -r '.[] | .id' | awk '{print "https://psbdmp.ws/api/v3/dump/" $1 }' > ./output/$dir/urls.txt + curl -s -X GET $url | jq -r '.[] | .text' > "./output/$dir/results.txt" + line_count=$(wc -l < "./output/$dir/results.txt") -if [[ $1 == '-e' ]] && [[ $2 != '' ]] ; then -dir=email/$2 -url=https://psbdmp.ws/api/search/email/$2 -mkdir -p ./output/$dir/ -scrape -elif [[ $2 == '' ]]; then - echo "Missing Value. Try to run ./script.sh -e " - exit 0 -fi + if [ "$line_count" -le 100 ]; then + cat "./output/$dir/results.txt" + else + + echo "Output More than 100 lines\n Saving in ./output/$dir/results.txt" + fi +} -if [[ $1 == '-d' ]] && [[ $2 != '' ]] ; then -dir=domain/$2 -url=https://psbdmp.ws/api/search/domain/$2 -mkdir -p ./output/$dir/ -scrape -elif [[ $2 == '' ]]; then - echo "Missing Value. Try to run ./script.sh -d " - exit 0 +if [ "$1" = '-g' ] && [ -n "$2" ]; then + dir="general/$2" + url="https://psbdmp.ws/api/v3/search/$2" + mkdir -p "./output/$dir/" + scrape "$url" "$dir" +else + echo "Missing Value. Try to run ./script.sh -g " + exit 0 fi