Fail Proxy list fetcher in BASH

1 reply [Last post]
skraps
Offline
Neophyte
Joined: 2011/10/12

Proxy list fetcher

httplist="proxy.http.list"
socks4list="proxy.socks4.list"
socks5list="proxy.socks5.list"
#remove original list.
if [ -e "proxy.list" ]; then
        rm -f ./proxy.list
fi

touch proxy.list
#[www.samair.ru]############################################################
for a in {1..9};do
        echo $a;
        if [ a -lt "9" ];then
                wget http://www.samair.ru/proxy/proxy-${a}.htm
        else
                wget http://www.samair.ru/proxy/proxy-${a}.htm
        fi

        cat ./proxy-${a}.htm | sed -e 's/</ /g' -e 's/>/ /g' -e 's/\// /g' -e 's/ /\n/g' | egrep '[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}' >> ./proxy.list
        rm ./proxy-${a}.htm
done

#[proxies.my-proxy.com]#####################################################
#We need to add a browser user agent to this one becaus ethe server blocks
#wget and GET requests so people do not automate or leech the list.

wget --user-agent='Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)' <a href="http://proxies.my-proxy.com/</p>
<p>cat"
title="http://proxies.my-proxy.com/</p>
<p>cat"
>http://proxies.my-proxy.com/</p>
<p>cat</p></a> index.html | sed -e 's/</ /g' -e 's/>/ /g' -e 's/ /\n/g' | egrep '[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}' >> proxy.list

rm ./index.html

#[www.proxylists.net]#######################################################
#untested
wget <a href="http://www.proxylists.net/socks4.txt<br />
wget"
title="http://www.proxylists.net/socks4.txt<br />
wget"
>http://www.proxylists.net/socks4.txt<br />
wget</a> <a href="http://www.proxylists.net/socks5.txt<br />
wget"
title="http://www.proxylists.net/socks5.txt<br />
wget"
>http://www.proxylists.net/socks5.txt<br />
wget</a> <a href="http://www.proxylists.net/http_highanon.txt</p>
<p>cat"
title="http://www.proxylists.net/http_highanon.txt</p>
<p>cat"
>http://www.proxylists.net/http_highanon.txt</p>
<p>cat</p></a> ./http_highanon.txt >> ./proxy.list
cat ./socks4.txt >> ./proxy.list
cat ./socks5.txt >> ./proxy.list

rm ./{http_highanon.txt,socks4.txt,socks5.txt}

#Remove all duplicates from the list and recreate###########################

cat proxy.list | uniq > ./proxy.list.tmp
mv ./proxy.list.tmp ./proxy.list
rm ./proxy.list.tmp

#remove ports and create a tmp list so we canping the hosts ################
cat ./proxy.list | sed -e 's/:/ /g' -e 's/ /\n/g' | egrep '[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}' > ./proxy.list.tmp

for a in `cat ./proxy.list.tmp`;do
        ping -c 1 ${a}
                if [ $? == "0" ];then
                        echo ${a} >> ./proxy.list.tmp.2
                fi
done

# The hosts that pinged we will compare to the original list and
# create a second list of hosts we know are online.

for a in `cat ./proxy.list.tmp.2`;do

        grep $a ./proxy.list > proxy.list.pinged

done
# Remove junk files.
rm -f {*.tmp,*.tmp.*}

#cat ./proxy.list | sed -e '/^$/d' > ./proxy.list