ERD | Notion

https://dbdiagram.io/d/68a722401e7a611967fd31a1
// PaperMind ERD - 모든 설명 포함
// 이 코드를 <https://dbdiagram.io/d> 에 붙여넣어서 확인하세요

// ================================
// 1. 핵심 메타데이터 (PostgreSQL)
// ================================

Table papers {
  id bigint [pk, increment, note: '논문 고유 식별자']
  openalex_paper_id varchar(50) [unique, not null, note: 'OpenAlex API의 논문 ID (예: W2741809807)']
  doi varchar(255) [null, note: 'Digital Object Identifier (예: 10.1038/nature12373)']
  title varchar(1000) [not null, note: '논문 제목']
  
  keywords json [null, note: '키워드 정보 JSON']
  type varchar(255) [null, note: '논문의 타입 (예: article, review article)']
  
  abstract text [null, note: '논문 초록/요약문']
  journal_id bigint [ref: > journals.id, null, note: '저널 외래키 참조']
  publisher varchar(255) [null, note: '출판사명 (예: Elsevier, Springer)']
  publication_date date [null, note: '논문 출간일']
  citation_count integer [default: 0, note: '인용 횟수 (OpenAlex 기준)']
  pdf_url varchar(500) [null, note: 'PDF 링크']
  issn varchar(20) [null, note: '실제 게재된 버전의 ISSN']
  is_open_access boolean [default: false, note: '오픈액세스 여부']
  created_at timestamp [default: `now()`, note: '데이터 생성 시각']
  updated_at timestamp [default: `now()`, note: '데이터 최종 수정 시각']
  
  indexes {
    openalex_paper_id [unique, note: 'OpenAlex ID로 빠른 검색']
    title [note: '제목 기반 텍스트 검색']
    publication_date [note: '출간일 범위 검색']
    citation_count [note: '인용수 정렬?']
  }
  
  Note: '논문 메타데이터 저장 - OpenAlex API에서 수집'
}

// ================================
// 저널 테이블 (정규화된 설계)
// ================================

Table journals {
  id bigint [pk, increment, note: '저널 고유 식별자']
  openalex_source_id varchar(50) [unique, not null, note: 'OpenAlex Source ID (예: S137773608)']
  name varchar(255) [not null, note: '저널의 공식 이름 (예: Nature, Science)']
  issn_l varchar(20) [null, note: 'Linking ISSN - 모든 매체를 대표하는 주 식별자']
  issn json [null, note: '모든 ISSN 배열 (print, online 등의 모든 버전)']
  alternate_titles json [null, note: '저널의 다른 이름들, 축약형, 번역명 등']
  abbreviated_title varchar(255) [null, note: 'ISSN 센터에서 제공하는 공식 축약 제목']
  impact_factor decimal(5,3) [null, note: '최신 임팩트 팩터 (예: 42.778) - 외부 데이터']
  sjr_ranking decimal(6,3) [null, note: 'SCImago Journal Rank 순위 - 외부 데이터']
  host_organization varchar(255) [null, note: '출판사/호스트 기관명 (예: Nature Portfolio, Springer)']
  is_oa boolean [default: false, note: '오픈 액세스 저널 여부 (Gold OA)']
  source_type varchar(50) [default: 'journal', note: '소스 타입 (journal, repository, conference)']
  created_at timestamp [default: `now()`, note: '데이터 생성 시각']
  updated_at timestamp [default: `now()`, note: '데이터 최종 수정 시각']
  
  indexes {
    openalex_source_id [unique, note: 'OpenAlex Source ID로 빠른 검색']
    name [note: '저널명으로 검색']
    issn_l [note: 'ISSN-L로 빠른 매칭']
    source_type [note: '소스 타입별 분류']
    impact_factor [note: '임팩트 팩터 순으로 정렬']
    sjr_ranking [note: 'SJR 순위로 정렬']
  }
  
  Note: '저널 메타데이터 - OpenAlex Sources 기반, 임팩트 팩터는 외부 소스 연동'
}

Table authors {
  id bigint [pk, increment, note: '저자 고유 식별자']
  name varchar(255) [not null, note: '저자 이름 (예: John Smith)']
  openalex_author_id varchar(50) [unique, null, note: 'OpenAlex 저자 ID (예: A2208157607)']
  orcid varchar(50) [null, note: 'ORCID 연구자 식별자 (예: 0000-0002-1825-0097)']
  affiliation varchar(500) [null, note: '소속 기관 (예: MIT, Stanford University)']
  h_index integer [default: 0, note: 'H-index 지수 (연구 영향력)']
  i10_index integer [default: 0, note: 'i10-index 지수 (10 이상의 피 인용수를 얻은 논문 수)']
  citation_count integer [default: 0, note: '총 인용 횟수']
  created_at timestamp [default: `now()`, note: '데이터 생성 시각']
  updated_at timestamp [default: `now()`, note: '데이터 최종 수정 시각']
  
  indexes {
    openalex_author_id [unique, note: 'OpenAlex 저자 ID로 빠른 검색']
    name [note: '저자명으로 검색']
    h_index [note: 'H-index 기준 정렬']
  }
  
  Note: '논문 저자 정보 - OpenAlex에서 수집한 연구자 데이터'
}

Table paper_authors {
  id bigint [pk, increment, note: '저자 관계 고유 식별자']
  paper_id bigint [ref: > papers.id, note: '논문 ID (외래키)']
  author_id bigint [ref: > authors.id, note: '저자 ID (외래키)']
  author_order integer [not null, note: '저자 순서 (1=제1저자, 2=제2저자...)']
  is_corresponding boolean [default: false, note: '교신저자 여부']
  
  indexes {
    (paper_id, author_id) [pk, note: '복합 기본키 - 논문당 저자는 유일']
    (paper_id, author_order) [unique, note: '논문 내에서 저자 순서는 중복 불가']
  }
  
  Note: '논문-저자 다대다 관계 테이블 - 한 논문에 여러 저자, 한 저자가 여러 논문'
}

// ================================
// 2. 개념 분류 시스템 (PostgreSQL)
// ================================

Table concepts {
  id bigint [pk, increment, note: '개념 고유 식별자']
  name varchar(255) [unique, not null, note: '학술 분야명 (예: Computer science, Machine learning)']
  openalex_concept_id varchar(50) [unique, not null, note: 'OpenAlex 개념 ID (예: C41008148)']
  level integer [not null, note: '계층 레벨 (0=최상위 Economics, 5=최하위 세부분야)']
  works_count integer [default: 0, note: '이 개념이 태그된 논문 수']
  created_at timestamp [default: `now()`, note: '데이터 생성 시각']
  updated_at timestamp [default: `now()`, note: '데이터 최종 수정 시각']
  
  indexes {
    openalex_concept_id [unique, note: 'OpenAlex 개념 ID로 빠른 검색']
    name [unique, note: '개념명으로 검색']
    level [note: '계층 레벨별 조회']
    works_count [note: '인기 개념 순서 정렬']
  }
  
  Note: 'OpenAlex Concept Taxonomy - 학술 분야 계층적 분류 시스템'
}

Table paper_concepts {
  paper_id bigint [ref: > papers.id, note: '논문 ID (외래키)']
  concept_id bigint [ref: > concepts.id, note: '개념 ID (외래키)']
  relevance_score decimal(5,4) [not null, note: 'OpenAlex 제공 관련도 점수 (0-1, 높을수록 관련성 높음)']
  
  indexes {
    (paper_id, concept_id) [pk, note: '복합 기본키 - 논문당 개념은 유일']
    (concept_id, relevance_score) [note: '개념별 관련도 순으로 논문 조회']
  }
  
  Note: '논문-개념 다대다 관계 - 하나의 논문이 여러 학술 분야에 속할 수 있음'
}

// ================================
// 4. 요약 및 하이라이팅 (PostgreSQL)
// ================================

Table paper_section_summaries {
  id bigint [pk, increment, note: '요약 고유 식별자']
  paper_id bigint [ref: > papers.id, note: '논문 ID (외래키)']
  section_name varchar(100) [not null, note: '논문 섹션명 (Abstract, Introduction, Methods, Results 등)']
  section_order integer [not null, note: '섹션 순서 (1=첫번째, 2=두번째...)']
  summary_content text [not null, note: 'AI가 생성한 섹션별 요약문']
  key_highlights json [null, note: '핵심 키워드 위치/하이라이트 정보 JSON']
  summary_status varchar(20) [default: 'pending', note: '요약 상태 (pending/processing/completed/failed)']
  summary_model varchar(50) [null, note: '요약 생성에 사용된 AI 모델명']
  summarized_at timestamp [null, note: '요약 완료 시각']
  
  indexes {
    (paper_id, section_order) [unique, note: '논문 내 섹션 순서는 유일']
    paper_id [note: '논문별 모든 요약 조회']
    summary_status [note: '요약 진행 상태별 조회']
  }
  
  Note: 'AI 생성 논문 섹션별 요약 - 사용자에게 순차적으로 표시'
}

// ===================================
// 5. 벡터 검색 (PostgreSQL + pgvector)
// ===================================

Table paper_embeddings {
  id bigint [pk, increment, note: '임베딩 고유 식별자']
  paper_id bigint [ref: - papers.id, note: '논문 ID (1:1 관계)']
  model_name varchar(100) [not null, note: '사용된 임베딩 모델명 (예: all-MiniLM-L6-v2)']
  title_embedding vector(768) [null, note: '논문 제목의 벡터 임베딩 (768차원)']
  section_embeddings jsonb [null, note: '논문 섹션별 벡터 임베딩 (768차원)']
  full_text_embedding vector(768) [null, note: '논문 전문 임베딩 (768차원)']
  has_full_text boolean [null, note: '전문이 있는지']
  dimension integer [default: 768, note: '벡터 차원 수']
  created_at timestamp [default: `now()`, note: '임베딩 생성 시각']
  
  indexes {
    paper_id [unique, note: '논문별 임베딩은 유일 (1:1 관계)']
    model_name [note: '모델별 임베딩 조회']
  }
  
  Note: '논문의 벡터 임베딩 저장 - 유사도 검색과 추천에 핵심적 역할'
}

Ref: "authors"."orcid" < "authors"."citation_count"

Ref: "authors"."affiliation" < "authors"."citation_count"