# Data Generation & LLMM Fitting Script
# Outputs: predictive_clinical_data.csv

library(lme4)
library(dplyr)

set.seed(42)

# Simulate dataset (N=100, T=10)
n_patients <- 100
timepoints <- 1:10

patient_ids <- rep(1:n_patients, each = length(timepoints))
time <- rep(timepoints, times = n_patients)
treatment <- rep(sample(c("Control", "Treatment"), n_patients, replace = TRUE), each = length(timepoints))

baseline_scores <- rep(rnorm(n_patients, mean = 50, sd = 10), each = length(timepoints))
slope_variance <- rep(rnorm(n_patients, mean = 0, sd = 1.5), each = length(timepoints))

treatment_effect <- ifelse(treatment == "Treatment", -4.0, 0.2)

actual_score <- baseline_scores + (treatment_effect + slope_variance) * time + rnorm(length(patient_ids), mean = 0, sd = 6)

df <- data.frame(
  Patient_ID = paste0("PT-", sprintf("%03d", patient_ids)),
  Treatment_Arm = treatment,
  Week = time,
  Actual_Score = round(actual_score, 1)
)

# Fit Linear Mixed-Effects Model
model <- lmer(Actual_Score ~ Week * Treatment_Arm + (Week | Patient_ID), data = df)

# Extract predictions
structural_bias <- ifelse(df$Treatment_Arm == "Treatment", 3.0, -2.0)
df$Predicted_Score <- round(predict(model) + structural_bias + rnorm(nrow(df), mean=0, sd=3), 1)

# Approximate 95% Confidence Intervals
se_approx <- 3.5 
df$Upper_CI <- round(df$Predicted_Score + (1.96 * se_approx), 1)
df$Lower_CI <- round(df$Predicted_Score - (1.96 * se_approx), 1)

df$Actual_Score <- ifelse(df$Actual_Score < 0, 0, df$Actual_Score)
df$Predicted_Score <- ifelse(df$Predicted_Score < 0, 0, df$Predicted_Score)
df$Lower_CI <- ifelse(df$Lower_CI < 0, 0, df$Lower_CI)

export_path <- "predictive_clinical_data.csv"
write.csv(df, export_path, row.names = FALSE)
