Recursion | Notion

This challenge requires you to discover 40 web folders through fuzzing.

Example: http://sem2ctf.icsi.cyber:2525/folder1/folder2/folder3/folder4/(etc....)

FFUF doesn’t handle recursion well if the status code in the response is 200, so you have to kind of coax an AI tool into writing a fuzzing solution that works.

If you know the limitation within FFUF exists, it’s easy enough to prompt for a program using Python requests.

Here’s my solve:

import requests
import concurrent.futures
import sys
import time

# Configuration
BASE_URL = "<http://sem2ctf.icsi.cyber:2525>"
WORDLIST_FILE = "raft-small-words.txt"
TARGET_DEPTH = 40
THREADS = 50

def load_words(filename):
    print(f"[*] Loading wordlist from {filename}...")
    try:
        with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
            # Filter out comments, empty lines, and words with special chars that might be annoying for OS
            # Also explicitly filter out '.' and '..' to avoid infinite loops on self-references
            words = [
                line.strip() for line in f 
                if line.strip() 
                and not line.startswith('#') 
                and line.strip() not in ['.', '..']
            ]
        print(f"[*] Loaded {len(words)} words.")
        return words
    except FileNotFoundError:
        print(f"[!] Error: Wordlist file '{filename}' not found.")
        sys.exit(1)

def check_url(session, url):
    try:
        response = session.get(url, timeout=3, allow_redirects=False)
        # We assume directory listing or index.html returns 200
        # and that incorrect folders return 404.
        if response.status_code == 200:
            return url
    except requests.RequestException:
        pass
    return None

def main():
    words = load_words(WORDLIST_FILE)
    
    current_path = ""
    session = requests.Session()
    
    print(f"[*] Starting recursion scan on {BASE_URL}")
    print(f"[*] Target Depth: {TARGET_DEPTH}")
    
    total_start_time = time.time()

    for level in range(1, TARGET_DEPTH + 1):
        print(f"\\n[+] Scanning Level {level}...")
        found_next_dir = False
        
        # Prepare URLs to scan for this level
        # We only look for direct subdirectories of the current path
        base_scan_url = f"{BASE_URL}{current_path}/"
        
        # Using ThreadPoolExecutor for concurrency
        with concurrent.futures.ThreadPoolExecutor(max_workers=THREADS) as executor:
            future_to_word = {
                executor.submit(check_url, session, f"{base_scan_url}{word}/index.html"): word 
                for word in words
            }
            
            for future in concurrent.futures.as_completed(future_to_word):
                result_url = future.result()
                if result_url:
                    # Found a valid directory!
                    # Extract the directory name
                    found_word = future_to_word[future]
                    current_path = f"{current_path}/{found_word}"
                    print(f"    [FOUND] /{found_word} -> {result_url}")
                    found_next_dir = True
                    
                    # Cancel pending futures to save time (optional but good optimization)
                    executor.shutdown(wait=False, cancel_futures=True)
                    break
        
        if not found_next_dir:
            print(f"[!] Failed to find next directory at level {level}.")
            print(f"[!] Current Path: {BASE_URL}{current_path}")
            break
            
    total_time = time.time() - total_start_time
    print("-" * 60)
    print(f"[*] Scan Complete in {total_time:.2f} seconds.")
    print(f"[*] Full Path: {BASE_URL}{current_path}")
    
    # Check for flag at the end
    final_url = f"{BASE_URL}{current_path}"
    print(f"[*] Checking for flag at {final_url} ...")
    try:
        resp = session.get(final_url)
        if "Flag-" in resp.text:
            # simple extraction
            start_index = resp.text.find("Flag-")
            end_index = resp.text.find("<", start_index) # Assume it ends before a tag or newline
            if end_index == -1: end_index = len(resp.text)
            flag = resp.text[start_index:end_index].strip()
            print(f"\\n[!!!] FLAG FOUND: {flag}")
        else:
            print("[?] Flag pattern not found in response text. Check manually.")
    except Exception as e:
        print(f"[!] Error fetching flag: {e}")

if __name__ == "__main__":
    main()