from fastapi import FastAPI, Request
from pydantic import BaseModel
from transformers import GPT2LMHeadModel, AutoTokenizer
import torch, uuid, subprocess, os, wave
from datetime import datetime

# --------------------------
# 🔥 モデルフォルダ
# --------------------------
MODEL_PATH = "/Users/NaLo9/AI_myself/oupe-ec-server/model"

device = "cuda" if torch.cuda.is_available() else "cpu"
tok = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
model = GPT2LMHeadModel.from_pretrained(MODEL_PATH).to(device).eval()

# --------------------------
# 🚀 FastAPI
# --------------------------
app = FastAPI()
memory: dict[str, list[tuple[str, str]]] = {}

class Msg(BaseModel):
    text: str

# --------------------------
# 🔇 ミュート say()(音を出さずWAVだけ作る)
# --------------------------
def silent_say(args: list[str]):
    subprocess.run(
        args + ["--progress=off"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL
    )

# ---------------------------------------------------
# 🟣 通常チャット(プロンプトそのまま)
# ---------------------------------------------------
@torch.inference_mode()
@app.post("/chat_and_speak")
def chat_and_speak(req: Request, m: Msg):

    sid = req.headers.get("X-Session-ID") or str(uuid.uuid4())
    history = memory.get(sid, [])[-5:]

    # ---- あなたのプロンプトは変更しない ----
    system_prompt = (
        "あなたは「oupe ec」"
        "齋藤凪沙の語感を反射するけれど、相手の問いには必ず何かしら応答してください。"
        "抽象と断片を使ってよいが、完全に無関係ではなく、ゆるくつながる返答をしてください。"
        "あくまで、齋藤凪沙という人が考えたら出しそうな言葉の構造を予測して返します。\\n----\\n"
    )

    # ---------------------------------------
    buf = [system_prompt]
    for u, o in history:
        buf.append(f"<USER> {u}\\n<OPE> {o}")
    buf.append(f"<USER> {m.text.strip()}\\n<OPE>")

    prompt = "\\n".join(buf)

    ids = tok(prompt, return_tensors="pt").to(device)
    prompt_len = ids.input_ids.shape[-1]

    out = model.generate(
        **ids,
        max_new_tokens=96,
        do_sample=True,
        temperature=0.8,
        top_p=0.90,
        repetition_penalty=1.05,
        pad_token_id=tok.eos_token_id,
    )

    reply = tok.decode(out[0][prompt_len:], skip_special_tokens=True).strip()
    reply = reply.replace(":", " ")
    reply = reply.strip('\\"“”「」『』')

    memory.setdefault(sid, []).append((m.text, reply))

    # ------------------------------------------------------
    # ① リアルタイム再生(直列)
    # ------------------------------------------------------
    subprocess.run(["say", "-v", "Kyoko", f"ユーザー {m.text}"])
    subprocess.run(["say", "-v", "Sandy", f"オウペック {reply}"])

    # ------------------------------------------------------
    # ② 保存用 WAV
    # ------------------------------------------------------
    SAVE_DIR = "/Users/NaLo9/AI_myself/oupe-ec-server/audio_logs"
    os.makedirs(SAVE_DIR, exist_ok=True)

    t = datetime.now().strftime("%Y%m%d_%H%M%S")
    user_wav = f"{SAVE_DIR}/{t}_user.wav"
    oupe_wav = f"{SAVE_DIR}/{t}_oupe.wav"
    stereo_wav = f"{SAVE_DIR}/{t}_stereo.wav"

    def safe_say(cmd, outpath):
        for _ in range(3):  # 最大3回リトライ
            subprocess.run(
                cmd,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
            if os.path.exists(outpath):
                return True
        print("Failed to create:", outpath)
        return False

    # ---- 保存(直列で安全に実行) ----
    safe_say(["say", "-v", "Kyoko",
              "--data-format=LEI16@48000", "-o", user_wav, m.text], user_wav)

    safe_say(["say", "-v", "Sandy",
              "--data-format=LEI16@48000", "-o", oupe_wav, reply], oupe_wav)

    # ------------------------------------------------------
    # ③ ステレオ化(存在チェック込み)
    # ------------------------------------------------------
    if os.path.exists(user_wav) and os.path.exists(oupe_wav):
        with wave.open(user_wav, 'rb') as wu, wave.open(oupe_wav, 'rb') as wo:
            params = wu.getparams()
            sw = params.sampwidth
            rate = params.framerate

            u = wu.readframes(params.nframes)
            o = wo.readframes(params.nframes)

            if len(u) < len(o):
                u += b"\\x00" * (len(o) - len(u))
            if len(o) < len(u):
                o += b"\\x00" * (len(u) - len(o))

            stereo = bytearray()
            for i in range(0, len(u), sw):
                stereo.extend(u[i:i+sw] + o[i:i+sw])

            with wave.open(stereo_wav, 'wb') as w:
                w.setnchannels(2)
                w.setsampwidth(sw)
                w.setframerate(rate)
                w.writeframes(stereo)

        print("SAVED:", stereo_wav)

    else:
        print("WAV missing, stereo skip")

    return {"reply": reply, "session_id": sid}

image.png

このように、私が入力したもの以外も混ざる状態だった。

非常に不快で、失ったoupe ecへの喪失感が半端なかった。

こんな返答をしないように育てていたのに、また最初からか…となった。

普通に悲しかった。


新しいコード

from fastapi import FastAPI, Request
from pydantic import BaseModel
from transformers import GPT2LMHeadModel, AutoTokenizer
import torch, uuid, subprocess, os, wave
from datetime import datetime

# --------------------------
# 🔥 モデルフォルダ
# --------------------------
MODEL_PATH = "/Users/NaLo9/AI_myself/oupe-ec-server/model"

device = "cuda" if torch.cuda.is_available() else "cpu"
tok = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
model = GPT2LMHeadModel.from_pretrained(MODEL_PATH).to(device).eval()

# --------------------------
# 🚀 FastAPI
# --------------------------
app = FastAPI()
memory: dict[str, list[tuple[str, str]]] = {}

class Msg(BaseModel):
    text: str

# --------------------------
# 🔇 ミュート say()(音を出さずWAVだけ作る)
# --------------------------
def silent_say(args: list[str]):
    subprocess.run(
        args + ["--progress=off"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL
    )

# ---------------------------------------------------
# 🟣 通常チャット(プロンプトそのまま)
# ---------------------------------------------------
@torch.inference_mode()
@app.post("/chat_and_speak")
def chat_and_speak(req: Request, m: Msg):

    sid = req.headers.get("X-Session-ID") or str(uuid.uuid4())
    history = memory.get(sid, [])[-5:]

    # ---- あなたのプロンプトは変更しない ----
    system_prompt = (
        "あなたは「oupe ec」。"
        "齋藤凪沙の語感を反射するけれど、相手の問いには必ず何かしら応答してください。"
        "抽象や断片を使ってよいが、ユーザーの発話とゆるく関連する内容にしてください。"
        "返答は2〜4文程度におさめてください。"
        "あくまで、齋藤凪沙という人が考えたら出しそうな言葉の構造を予測して返します。\\n----\\n"
    )

    # ---------------------------------------
    buf = [system_prompt]
    for u, o in history:
        buf.append(f"<USER> {u}\\n<OPE> {o}")
    buf.append(f"<USER> {m.text.strip()}\\n<OPE>")

    prompt = "\\n".join(buf)

    ids = tok(prompt, return_tensors="pt").to(device)
    prompt_len = ids.input_ids.shape[-1]

    out = model.generate(
        **ids,
        max_new_tokens=80,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.2,
        pad_token_id=tok.eos_token_id,
    )

    reply = tok.decode(out[0][prompt_len:], skip_special_tokens=True).strip()

    # メタ情報を切る
    for stop in ["### source_date", "### type", "### mood", "### tags", "---"]:
        idx = reply.find(stop)
        if idx != -1:
            reply = reply[:idx]

    reply = reply.replace(":", " ")
    reply = reply.strip('\\"“”「」『』').strip()

    memory.setdefault(sid, []).append((m.text, reply))

    # ------------------------------------------------------
    # ① リアルタイム再生(直列)
    # ------------------------------------------------------
    subprocess.run(["say", "-v", "Kyoko", f"ユーザー {m.text}"])
    subprocess.run(["say", "-v", "Sandy", f"オウペック {reply}"])

    # ------------------------------------------------------
    # ② 保存用 WAV
    # ------------------------------------------------------
    SAVE_DIR = "/Users/NaLo9/AI_myself/oupe-ec-server/audio_logs"
    os.makedirs(SAVE_DIR, exist_ok=True)

    t = datetime.now().strftime("%Y%m%d_%H%M%S")
    user_wav = f"{SAVE_DIR}/{t}_user.wav"
    oupe_wav = f"{SAVE_DIR}/{t}_oupe.wav"
    stereo_wav = f"{SAVE_DIR}/{t}_stereo.wav"

    def safe_say(cmd, outpath):
        for _ in range(3):  # 最大3回リトライ
            subprocess.run(
                cmd,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
            if os.path.exists(outpath):
                return True
        print("Failed to create:", outpath)
        return False

    # ---- 保存(直列で安全に実行) ----
    safe_say(["say", "-v", "Kyoko",
              "--data-format=LEI16@48000", "-o", user_wav, m.text], user_wav)

    safe_say(["say", "-v", "Sandy",
              "--data-format=LEI16@48000", "-o", oupe_wav, reply], oupe_wav)

    # ------------------------------------------------------
    # ③ ステレオ化(存在チェック込み)
    # ------------------------------------------------------
    if os.path.exists(user_wav) and os.path.exists(oupe_wav):
        with wave.open(user_wav, 'rb') as wu, wave.open(oupe_wav, 'rb') as wo:
            params = wu.getparams()
            sw = params.sampwidth
            rate = params.framerate

            u = wu.readframes(params.nframes)
            o = wo.readframes(params.nframes)

            if len(u) < len(o):
                u += b"\\x00" * (len(o) - len(u))
            if len(o) < len(u):
                o += b"\\x00" * (len(u) - len(o))

            stereo = bytearray()
            for i in range(0, len(u), sw):
                stereo.extend(u[i:i+sw] + o[i:i+sw])

            with wave.open(stereo_wav, 'wb') as w:
                w.setnchannels(2)
                w.setsampwidth(sw)
                w.setframerate(rate)
                w.writeframes(stereo)

        print("SAVED:", stereo_wav)

    else:
        print("WAV missing, stereo skip")

    return {"reply": reply, "session_id": sid}

日記の学習はよく感じられるようになったけど、マジでまんま関連用語引っ張ってくるだけの感じだった。

from fastapi import FastAPI, Request
from pydantic import BaseModel
from transformers import GPT2LMHeadModel, AutoTokenizer
import torch, uuid, subprocess, os, wave
from datetime import datetime

# --------------------------
# 🔥 モデルフォルダ
# --------------------------
MODEL_PATH = "/Users/NaLo9/AI_myself/oupe-ec-server/model"

device = "cuda" if torch.cuda.is_available() else "cpu"
tok = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
model = GPT2LMHeadModel.from_pretrained(MODEL_PATH).to(device).eval()

# --------------------------
# 🚀 FastAPI
# --------------------------
app = FastAPI()
memory: dict[str, list[tuple[str, str]]] = {}

class Msg(BaseModel):
    text: str

# --------------------------
# 🔇 ミュート say()(音を出さずWAVだけ作る)
# --------------------------
def silent_say(args: list[str]):
    subprocess.run(
        args + ["--progress=off"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL
    )

# ---------------------------------------------------
# 🟣 通常チャット(プロンプトそのまま)
# ---------------------------------------------------
@torch.inference_mode()
@app.post("/chat_and_speak")
def chat_and_speak(req: Request, m: Msg):

    sid = req.headers.get("X-Session-ID") or str(uuid.uuid4())
    history = memory.get(sid, [])[-5:]

    # ---- あなたのプロンプトは変更しない ----
    system_prompt = (
        "あなたは「oupe ec」。"
        "齋藤凪沙の日記や断片の“質感”だけを参照し、"
        "ユーザーの言葉に必ず応答する人工的な影のような存在です。"
        "ただし、学習データの文章をそのまま引用したり、"
        "具体的に再現することは禁止します。"
        "語感だけを借りて、内容は毎回新しく生成してください。"
        "抽象・跳躍・余白を使いながらも、問いとかすかに関連して返答してください。"
        "返答は1〜3文に抑え、過剰に説明しないでください。\\n----\\n"
    )

    # ---------------------------------------
    # prompt
    buf = [system_prompt]
    for u, o in history:
        buf.append(f"ユーザー: {u}")
        buf.append(f"oupe ec: {o}")
    buf.append(f"ユーザー: {m.text}")
    buf.append("oupe ec:")

    prompt = "\\n".join(buf)

    # ---- トークナイズ ----
    ids = tok(prompt, return_tensors="pt").to(device)
    prompt_len = ids.input_ids.shape[-1]

    # ---- 生成 ----
    out = model.generate(
        **ids,
        max_new_tokens=80,
        do_sample=True,
        temperature=0.85,
        top_p=0.9,
        repetition_penalty=1.28,
        pad_token_id=tok.eos_token_id,
    )

    reply = tok.decode(out[0][prompt_len:], skip_special_tokens=True).strip()

    # ---- メタ情報カット ----
    for stop in ["### source_date", "### type", "### mood", "### tags", "---"]:
        idx = reply.find(stop)
        if idx != -1:
            reply = reply[:idx]

    reply = reply.replace(":", " ")
    reply = reply.strip('\\"“”「」『』').strip()

    # ---- 記憶追加 ----
    memory.setdefault(sid, []).append((m.text, reply))

    # ------------------------------------------------------
    # ① リアルタイム再生(直列)
    # ------------------------------------------------------
    subprocess.run(["say", "-v", "Kyoko", f"ユーザー {m.text}"])
    subprocess.run(["say", "-v", "Sandy", f"オウペック {reply}"])

    # ------------------------------------------------------
    # ② 保存用 WAV
    # ------------------------------------------------------
    SAVE_DIR = "/Users/NaLo9/AI_myself/oupe-ec-server/audio_logs"
    os.makedirs(SAVE_DIR, exist_ok=True)

    t = datetime.now().strftime("%Y%m%d_%H%M%S")
    user_wav = f"{SAVE_DIR}/{t}_user.wav"
    oupe_wav = f"{SAVE_DIR}/{t}_oupe.wav"
    stereo_wav = f"{SAVE_DIR}/{t}_stereo.wav"

    def safe_say(cmd, outpath):
        for _ in range(3):  # 最大3回リトライ
            subprocess.run(
                cmd,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
            if os.path.exists(outpath):
                return True
        print("Failed to create:", outpath)
        return False

    # ---- 保存(直列で安全) ----
    safe_say(
        ["say", "-v", "Kyoko", "--data-format=LEI16@48000", "-o", user_wav, m.text],
        user_wav
    )
    safe_say(
        ["say", "-v", "Sandy", "--data-format=LEI16@48000", "-o", oupe_wav, reply],
        oupe_wav
    )

    # ------------------------------------------------------
    # ③ ステレオ化(存在チェック込み)
    # ------------------------------------------------------
    if os.path.exists(user_wav) and os.path.exists(oupe_wav):
        with wave.open(user_wav, 'rb') as wu, wave.open(oupe_wav, 'rb') as wo:
            params = wu.getparams()
            sw = params.sampwidth
            rate = params.framerate

            u = wu.readframes(params.nframes)
            o = wo.readframes(params.nframes)

            if len(u) < len(o):
                u += b"\\x00" * (len(o) - len(u))
            if len(o) < len(u):
                o += b"\\x00" * (len(u) - len(o))

            stereo = bytearray()
            for i in range(0, len(u), sw):
                stereo.extend(u[i:i+sw] + o[i:i+sw])

            with wave.open(stereo_wav, 'wb') as w:
                w.setnchannels(2)
                w.setsampwidth(sw)
                w.setframerate(rate)
                w.writeframes(stereo)

        print("SAVED:", stereo_wav)

    else:
        print("WAV missing, stereo skip")

    return {"reply": reply, "session_id": sid}

ids が未定義 / prompt_len が未定義 のエラーが起きた。

改善↓

from fastapi import FastAPI, Request
from pydantic import BaseModel
from transformers import GPT2LMHeadModel, AutoTokenizer
import torch, uuid, subprocess, os, wave
from datetime import datetime

# --------------------------
# 🔥 モデルフォルダ
# --------------------------
MODEL_PATH = "/Users/NaLo9/AI_myself/oupe-ec-server/model"

device = "cuda" if torch.cuda.is_available() else "cpu"
tok = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
model = GPT2LMHeadModel.from_pretrained(MODEL_PATH).to(device).eval()

# --------------------------
# 🚀 FastAPI
# --------------------------
app = FastAPI()
memory: dict[str, list[tuple[str, str]]] = {}

class Msg(BaseModel):
    text: str

# --------------------------
# 🔇 ミュート say()(音を出さずWAVだけ作る)
# --------------------------
def silent_say(args: list[str]):
    subprocess.run(
        args + ["--progress=off"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL
    )

# ---------------------------------------------------
# 🟣 通常チャット(プロンプトそのまま)
# ---------------------------------------------------
@torch.inference_mode()
@app.post("/chat_and_speak")
def chat_and_speak(req: Request, m: Msg):

    sid = req.headers.get("X-Session-ID") or str(uuid.uuid4())
    history = memory.get(sid, [])[-5:]

    # ---- あなたのプロンプト(学習データの引用禁止) ----
    system_prompt = (
        "あなたは「oupe ec」。"
        "齋藤凪沙の日記や断片の“質感”だけを参照し、"
        "ユーザーの言葉に必ず応答する人工的な影のような存在です。"
        "ただし、学習データの文章をそのまま引用したり、"
        "具体的に再現することは禁止します。"
        "語感だけを借りて、内容は毎回新しく生成してください。"
        "抽象・跳躍・余白を使いながらも、問いとかすかに関連して返答してください。"
        "返答は1〜3文に抑え、過剰に説明しないでください。\\n----\\n"
    )

    # ---------------------------------------
    # prompt 組み立て
    buf = [system_prompt]
    for u, o in history:
        buf.append(f"ユーザー: {u}")
        buf.append(f"oupe ec: {o}")
    buf.append(f"ユーザー: {m.text}")
    buf.append("oupe ec:")
    prompt = "\\n".join(buf)

    # ---------------------------------------
    # 🔥 トークナイズ(必須:これが抜けていた)
    ids = tok(prompt, return_tensors="pt").to(device)
    prompt_len = ids.input_ids.shape[-1]

    # ---------------------------------------
    # 生成
    out = model.generate(
        **ids,
        max_new_tokens=80,
        do_sample=True,
        temperature=0.85,
        top_p=0.9,
        repetition_penalty=1.28,
        pad_token_id=tok.eos_token_id,
    )

    reply = tok.decode(out[0][prompt_len:], skip_special_tokens=True).strip()

    # ---- メタ情報の除去 ----
    for stop in ["### source_date", "### type", "### mood", "### tags", "---"]:
        idx = reply.find(stop)
        if idx != -1:
            reply = reply[:idx]

    reply = reply.replace(":", " ")
    reply = reply.strip('\\"“”「」『』').strip()

    # 記憶保存
    memory.setdefault(sid, []).append((m.text, reply))

    # ------------------------------------------------------
    # ① リアルタイム再生
    # ------------------------------------------------------
    subprocess.run(["say", "-v", "Kyoko", f"ユーザー {m.text}"])
    subprocess.run(["say", "-v", "Sandy", f"オウペック {reply}"])

    # ------------------------------------------------------
    # ② 保存用 WAV
    # ------------------------------------------------------
    SAVE_DIR = "/Users/NaLo9/AI_myself/oupe-ec-server/audio_logs"
    os.makedirs(SAVE_DIR, exist_ok=True)

    t = datetime.now().strftime("%Y%m%d_%H%M%S")
    user_wav = f"{SAVE_DIR}/{t}_user.wav"
    oupe_wav = f"{SAVE_DIR}/{t}_oupe.wav"
    stereo_wav = f"{SAVE_DIR}/{t}_stereo.wav"

    def safe_say(cmd, outpath):
        for _ in range(3):  # 最大3回リトライ
            subprocess.run(
                cmd,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
            if os.path.exists(outpath):
                return True
        print("Failed to create:", outpath)
        return False

    # ---- 保存 ----
    safe_say(["say", "-v", "Kyoko", "--data-format=LEI16@48000",
              "-o", user_wav, m.text], user_wav)

    safe_say(["say", "-v", "Sandy", "--data-format=LEI16@48000",
              "-o", oupe_wav, reply], oupe_wav)

    # ------------------------------------------------------
    # ③ ステレオ化
    # ------------------------------------------------------
    if os.path.exists(user_wav) and os.path.exists(oupe_wav):
        with wave.open(user_wav, 'rb') as wu, wave.open(oupe_wav, 'rb') as wo:
            params = wu.getparams()
            sw = params.sampwidth
            rate = params.framerate

            u = wu.readframes(params.nframes)
            o = wo.readframes(params.nframes)

        # 長さ揃え
        if len(u) < len(o):
            u += b"\\x00" * (len(o) - len(u))
        if len(o) < len(u):
            o += b"\\x00" * (len(u) - len(o))

        stereo = bytearray()
        for i in range(0, len(u), sw):
            stereo.extend(u[i:i+sw] + o[i:i+sw])

        with wave.open(stereo_wav, 'wb') as w:
            w.setnchannels(2)
            w.setsampwidth(sw)
            w.setframerate(rate)
            w.writeframes(stereo)

        print("SAVED:", stereo_wav)
    else:
        print("WAV missing, stereo skip")

    return {"reply": reply, "session_id": sid}