Pazartesi, Kasım 4, 2024

Python ile Proxy Checker

yüksek çözünürlüklü python logosu

İnternet üzerinden bol miktarda ücretsiz proxy bulup bir takım işler yürütürken hangi proxy’nin çalışıp hangisinin çalışmadığına bakmamız çok önemli

Temelde 1 adet bağımlılığı bulunuyor, geri kalanı Python 3 içerisinde yer alan bağımlılıklar

Gerekli olan bağımlılıkları kurmak için ben pip kullandım, siz kullandığınız paket yöneticiyle bu işlemi yapabilirsiniz.

pip install requests[socks]
ShellScript

Nihai kodumuz şu şekilde olmakla beraber, birden çok thread ile çalışmaktadır.

proxychecker.py <input> <output> <number-of-threads>
ShellScript

İşlemcinize göre thread belirleyebilirsiniz.

# Requirements:
#    - pip install requests[socks]

import logging
import re
import sys
import threading
from time import sleep

import requests
import urllib3
from requests import ReadTimeout
from requests.exceptions import (
    ProxyError,
    ConnectTimeout,
    ConnectionError,
    TooManyRedirects,
    Timeout,
)
from urllib3.contrib.socks import SOCKSHTTPSConnectionPool

# Disable TLS warnings
urllib3.disable_warnings()

logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[logging.FileHandler("proxy-check.log"), logging.StreamHandler()],
    encoding="utf-8",
)

logger = logging.getLogger(__name__)

# Regex pattern to check if the proxy is valid
REGEX_PATTERN = r"<HTML><HEAD><TITLE>Success</TITLE></HEAD><BODY>Success</BODY></HTML>$"

regex = re.compile(REGEX_PATTERN)
# Timeout in seconds
TIMEOUT = (5, 8)


class Counter:
    """Class that handles the counter and the list of proxies"""

    def __init__(self):
        """Initialize the counter and the list of proxies"""
        self.counter = 0
        self.found = 0
        self.list = self.read_list()

    def add_counter(self) -> None:
        """Add 1 to counter"""
        self.counter = self.counter + 1

    def add_founded_proxies(self) -> None:
        """Add 1 to found"""
        self.found = self.found + 1

    @staticmethod
    def read_list() -> list[str]:
        """Reads the list of proxies from the file"""
        with open(sys.argv[1], "r", encoding="utf-8") as file:
            # Remove empty lines and strip the rest of the lines
            lines = [
                line.rstrip("\n").lstrip().rstrip()
                for line in file
                if not line.startswith("#") and line != "\n"
            ]
            # Remove duplicates
            lines = list(dict.fromkeys(lines))
            # Remove empty lines
            lines = list(filter(None, lines))
            # Remove lines that are not in the correct format
            lines = [
                line
                for line in lines
                if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+$", line)
            ]
            # Close the file
            file.close()
        return lines

    @staticmethod
    def write_founded_proxies(line: str) -> None:
        """Writes the valid proxy to the file"""
        with open(sys.argv[2], "a", encoding="utf-8") as file:
            file.write(line + "\n")
        file.close()

    @staticmethod
    def partition(
        divide_list: list[str],
        divider: int,
    ) -> list[list[str]]:
        """Divide the list into n sublists"""
        return [divide_list[i::divider] for i in range(divider)]

    def check_proxy(
        self,
        line_list: list[str],
        content: re.Pattern,
    ):
        for line in line_list:
            try:
                session = requests.Session()
                session.headers["User-Agent"] = (
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                    "(KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.188"
                )
                session.verify = False
                session.max_redirects = 5
                self.add_counter()
                logger.info(
                    f"[* {str(self.found)} WORKING*][{str(self.counter)}/{len(self.list)}] trying {line}..."
                )
                request = session.get(
                    "https://captive.apple.com/",
                    proxies={
                        # HTTP and HTTPS proxies
                        "http": f"http://{line}",
                        "https": f"https://{line}",
                        # SOCKS4 proxies
                        "http": f"socks4://{line}",
                        "https": f"socks4://{line}",
                        # SOCKS5 proxies
                        "http": f"socks5://{line}",
                        "https": f"socks5://{line}",
                    },
                    timeout=TIMEOUT,
                    allow_redirects=True,
                )
                if content.match(request.text):
                    logger.info(
                        f"[* {str(self.found)} WORKING*][{str(self.counter)}/{len(self.list)}] "
                        f"{line} is a valid proxy! Saving. "
                    )
                    self.add_founded_proxies()
                    self.write_founded_proxies(line)
            except ProxyError:
                logger.error(f"[*] Proxy Error {line}")
            except ConnectTimeout or Timeout:
                logger.error(f"[*] Timeout Error {line}")
            except ConnectionError:
                logger.error(f"[*] Connection Error {line}")
            except ReadTimeout:
                logger.error(f"[*] Read Timeout Error {line}")
            except SOCKSHTTPSConnectionPool:
                logger.error(f"[*] SOCKSHTTPSConnectionPool Error {line}")
            except TooManyRedirects:
                logger.error(f"[*] TooManyRedirects Error {line}")
            except ConnectionRefusedError:
                logger.error(f"[*] ConnectionRefusedError Error {line}")
            except ConnectionResetError:
                logger.error(f"[*] ConnectionResetError Error {line}")


def main() -> None:
    if len(sys.argv) < 4 or sys.argv[1] == sys.argv[2]:
        print("Usage: proxychecker.py <input> <output> <number-of-threads>")
        sys.exit(1)
    number_of_threads = int(sys.argv[3]) or 4
    counter = Counter()
    divided_list = counter.partition(counter.list, number_of_threads)
    logger.info(
        f"[*] {len(counter.list)} proxies divided into {number_of_threads} "
        f"threads and sublists (approx. {len(divided_list[0])} each)"
    )
    logger.info("[*] Starting...")
    logger.info(f"[*] Input file: {sys.argv[1]}" + "\n[*] Output file: " + sys.argv[2])
    input("[*] Press enter to start!" + "\n" + "[*] Press CTRL+C to stop!")

    for number in range(number_of_threads):
        thread = threading.Thread(
            target=counter.check_proxy, args=(divided_list[number], regex)
        )
        thread.daemon = True
        thread.start()

    try:
        if counter.counter < len(counter.list):
            while counter.counter < len(counter.list):
                sleep(1)
            logger.info(
                f"[*] Finished! {counter.found} proxies found and saved to {sys.argv[2]}"
            )
        sys.exit(0)
    except KeyboardInterrupt:
        logger.info("[*] Exiting program please wait...")
        sys.exit(0)


if __name__ == "__main__":
    main()
Python

Kullanmak için ise terminal üzerinden aşağıdaki gibi çalıştırabilirsiniz.

proxychecker.py proxy.txt valid.txt 16
ShellScript

Github üzerinden erişmek isteyenler için tıklayın.