End-to-End Submission Workflow

submit() packages a complete regulatory submission in one call. It reads data files, validates them, generates Define-XML, produces HTML and Excel validation reports, and builds a manifest with SHA-256 checksums — producing an eCTD-ready output directory from a single function call.

What submit() does

submit(path, spec = spec)
  │
  ├── 1. Read all .xpt (or .json) files from path
  ├── 2. Validate against spec + conformance rules
  ├── 3. Copy or convert data files to output directory
  ├── 4. write_define_xml() → define.xml
  ├── 5. write_define_html() → define.html
  ├── 6. validation_report() → validation-report.html + .xlsx
  └── 7. build_manifest() → manifest.json (SHA-256 per file)

The result is a herald_submission object containing paths to every generated file and the full validation result.

SDTM submission

Build three SDTM datasets with a shared spec, then submit() in one call.

Build data and spec

# Demographics
dm <- data.frame(
  STUDYID = rep("CDISCPILOT01", 5L),
  USUBJID = paste0("01-701-", c(1015L, 1023L, 1028L, 1033L, 1034L)),
  AGE     = c(63L, 64L, 71L, 74L, 77L),
  SEX     = c("F", "M", "M", "F", "F"),
  RACE    = rep("WHITE", 5L),
  stringsAsFactors = FALSE
)

# Adverse Events
ae <- data.frame(
  STUDYID = rep("CDISCPILOT01", 4L),
  USUBJID = c("01-701-1015","01-701-1015","01-701-1023","01-701-1028"),
  AESEQ   = c(1L, 2L, 1L, 1L),
  AETERM  = c("HEADACHE","NAUSEA","DIZZINESS","FATIGUE"),
  AESTDTC = c("2014-03-15","2014-04-02","2014-03-20","2014-03-18"),
  stringsAsFactors = FALSE
)

# Vital Signs
vs <- data.frame(
  STUDYID = rep("CDISCPILOT01", 6L),
  USUBJID = rep(c("01-701-1015","01-701-1023"), each = 3L),
  VSSEQ   = rep(c(1L,2L,3L), 2L),
  VSTESTCD = c("SYSBP","DIABP","PULSE","SYSBP","DIABP","PULSE"),
  VSORRES  = c("120","80","72","118","76","68"),
  VSSTRESC = c(120,80,72,118,76,68),
  stringsAsFactors = FALSE
)

spec <- herald_spec(
  ds_spec = data.frame(
    dataset   = c("DM","AE","VS"),
    label     = c("Demographics","Adverse Events","Vital Signs"),
    keys      = c("STUDYID, USUBJID",
                  "STUDYID, USUBJID, AESEQ",
                  "STUDYID, USUBJID, VSSEQ"),
    stringsAsFactors = FALSE
  ),
  var_spec = data.frame(
    dataset  = c(
      rep("DM",5L), rep("AE",5L), rep("VS",6L)
    ),
    variable = c(
      "STUDYID","USUBJID","AGE","SEX","RACE",
      "STUDYID","USUBJID","AESEQ","AETERM","AESTDTC",
      "STUDYID","USUBJID","VSSEQ","VSTESTCD","VSORRES","VSSTRESC"
    ),
    label = c(
      "Study Identifier","Unique Subject Identifier","Age","Sex","Race",
      "Study Identifier","Unique Subject Identifier",
      "Sequence Number of AE","Reported Term for the Adverse Event",
      "Start Date/Time of AE",
      "Study Identifier","Unique Subject Identifier",
      "Sequence Number","Vital Signs Test Short Name",
      "Result or Finding in Original Units",
      "Character Result/Finding in Standard Format"
    ),
    data_type = c(
      "text","text","integer","text","text",
      "text","text","integer","text","text",
      "text","text","integer","text","text","float"
    ),
    length = c(
      12L,11L,8L,1L,200L,
      12L,11L,8L,200L,19L,
      12L,11L,8L,8L,200L,8L
    ),
    stringsAsFactors = FALSE
  )
)

Write data to a directory

sdtm_dir <- tempfile("sdtm_")
dir.create(sdtm_dir)

write_xpt(dm, file.path(sdtm_dir, "dm.xpt"))
write_xpt(ae, file.path(sdtm_dir, "ae.xpt"))
write_xpt(vs, file.path(sdtm_dir, "vs.xpt"))

Run submit()

output_dir <- tempfile("sdtm_output_")
dir.create(output_dir)

result <- submit(
  path     = sdtm_dir,
  output   = output_dir,
  spec     = spec,
  rules    = NULL,       # spec checks only for this example
  define   = requireNamespace("xml2", quietly = TRUE),
  report   = FALSE,      # skip HTML report for vignette speed
  manifest = TRUE
)
#> ✔ Copied 3 xpt file to /tmp/RtmpDMKflW/sdtm_output_46295d90412e
#> ✔ Wrote /tmp/RtmpDMKflW/sdtm_output_46295d90412e/define.xml
#> ✔ Wrote /tmp/RtmpDMKflW/sdtm_output_46295d90412e/define.html

result
#> 
#> ── herald submission ──
#> 
#> Output: /tmp/RtmpDMKflW/sdtm_output_46295d90412e
#> XPT files: 3
#> Define-XML: /tmp/RtmpDMKflW/sdtm_output_46295d90412e/define.xml
#> Validation: 0 high-impact issues, 19 medium-impact issues
#> Manifest: included

Inspect the submission object

# What files were produced?
result$output_dir
#> [1] "/tmp/RtmpDMKflW/sdtm_output_46295d90412e"
result$xpt_files
#> [1] "/tmp/RtmpDMKflW/sdtm_output_46295d90412e/ae.xpt"
#> [2] "/tmp/RtmpDMKflW/sdtm_output_46295d90412e/dm.xpt"
#> [3] "/tmp/RtmpDMKflW/sdtm_output_46295d90412e/vs.xpt"

# Validation findings
result$validation$summary
#> $reject
#> [1] 0
#> 
#> $high
#> [1] 0
#> 
#> $medium
#> [1] 19
#> 
#> $low
#> [1] 0
#> 
#> $total
#> [1] 19

# Manifest (SHA-256 checksums)
if (length(result$manifest) > 0L) {
  result$manifest$files[[1]]$sha256
}
#> NULL

What submit() prints and generates

submit() returns a herald_submission object invisibly. Printing it shows a compact summary:

── herald submission ────────────────────────────────────────
Output:   /path/to/output_dir
XPT files: 3 (dm.xpt, ae.xpt, vs.xpt)
Define-XML: define.xml
Validation: 0 reject/high · 2 medium
Manifest:  included

When report = TRUE (the default), submit() also generates:

validation-report.html — interactive HTML report with findings table
validation-report.xlsx — Excel workbook with findings

In interactive R sessions (RStudio, Positron) the HTML report is opened automatically in the browser or Viewer pane once the submission completes. Set report = FALSE to skip report generation (e.g., in non-interactive pipelines).

When define = TRUE (the default) and spec is provided, submit() generates:

define.xml — Define-XML 2.1 metadata document
define.html — rendered HTML version for human review

The output directory defaults to path (data and artifacts live together, as required for eCTD). If path is read-only, specify a separate writable output directory — submit() checks write access before starting and aborts with a clear error if the directory is not writable.

submit() parameter reference

Parameter	Default	Purpose
`path`	(required)	Directory containing data files
`output`	`path`	Where to write generated artifacts
`spec`	`NULL`	Spec object or path to spec file
`config`	`NULL`	Pre-built rule config (e.g. `"fda-sdtm-ig-3.3"`)
`rules`	`"all"`	Rule set shortcut or list of rules
`standard`	`NULL`	Standard for auto-config selection
`version`	`NULL`	Standard version
`define`	`TRUE`	Generate `define.xml` + `define.html`
`report`	`TRUE`	Generate `validation-report.html` + `.xlsx`
`manifest`	`TRUE`	Generate `manifest.json` with SHA-256
`format`	`"xpt"`	Input file format (`"xpt"` or `"json"`)
`output_format`	`format`	Output file format (conversion)
`xpt_version`	`5L`	XPT transport version (5 or 8)

Format conversion in one call

Convert a directory of Dataset-JSON files to XPT as part of packaging:

if (requireNamespace("jsonlite", quietly = TRUE)) {
  json_dir <- tempfile("json_in_")
  dir.create(json_dir)
  xpt_out  <- tempfile("xpt_out_")
  dir.create(xpt_out)

  write_json(dm, file.path(json_dir, "dm.json"), dataset = "DM")
  write_json(ae, file.path(json_dir, "ae.json"), dataset = "AE")

  result2 <- submit(
    path          = json_dir,
    output        = xpt_out,
    spec          = spec,
    format        = "json",
    output_format = "xpt",
    define        = FALSE,
    report        = FALSE,
    manifest      = FALSE,
    rules         = NULL
  )

  list.files(xpt_out, pattern = "\\.xpt$")
}
#> ✔ Converted 2 datasets (json → xpt) to /tmp/RtmpDMKflW/xpt_out_462958eee5da
#> [1] "ae.xpt" "dm.xpt"

Anchor auto-detection

herald auto-detects the subject-level anchor dataset (usually DM) from the spec and data, enabling cross-dataset rules that reference subject populations. build_anchor_index() builds a lookup index from an anchor dataset:

# Build a subject-level lookup index from DM
dm_index <- build_anchor_index(dm, key_var = "USUBJID")

# The index maps USUBJID to row numbers for fast cross-dataset lookups
length(dm_index)
#> [1] 5
names(dm_index)[1:3]
#> [1] "01-701-1015" "01-701-1023" "01-701-1028"

ADaM class detection

For ADaM submissions, detect_adam_class() classifies individual datasets and detect_adam_classes() classifies a full list:

adsl_df <- data.frame(
  STUDYID = rep("CDISCPILOT01", 3L),
  USUBJID = c("01-701-1015","01-701-1023","01-701-1028"),
  SAFFL   = c("Y","Y","Y"),
  ITTFL   = c("Y","Y","N"),
  TRTP    = c("Xanomeline High Dose","Placebo","Xanomeline Low Dose"),
  AGE     = c(63L, 64L, 71L),
  stringsAsFactors = FALSE
)

adae_df <- data.frame(
  STUDYID  = rep("CDISCPILOT01", 2L),
  USUBJID  = c("01-701-1015","01-701-1023"),
  AETERM   = c("HEADACHE","NAUSEA"),
  AESEQ    = c(1L, 1L),
  stringsAsFactors = FALSE
)

# Classify a single dataset by its column names
detect_adam_class(names(adsl_df))   # "ADSL"
#> [1] "ADSL"
detect_adam_class(names(adae_df))   # "OCCDS"
#> [1] "OCCDS"

# Classify a named list of datasets
classes <- detect_adam_classes(list(ADSL = adsl_df, ADAE = adae_df))
classes
#>    ADSL    ADAE 
#>  "ADSL" "OCCDS"

Submission manifest

manifest.json provides SHA-256 checksums for every file in the submission, enabling integrity verification and chain-of-custody documentation:

# Structure of manifest.json:
# {
#   "submission_date": "2024-01-15T10:30:00Z",
#   "files": [
#     {
#       "path": "dm.xpt",
#       "sha256": "a3f5...",
#       "size_bytes": 2640
#     },
#     ...
#   ]
# }

Before vs After

Task	Old way	herald
Package submission	Manual: run 5+ packages, copy files, write SOP	`submit(path, spec = spec)`
Generate all artifacts	Separate calls to metacore, xportr, P21, define.R	One `submit()` call
Format conversion	Manual read + write in different format	`output_format = "xpt"`
SHA-256 manifest	Manual script	`manifest = TRUE`
Validation + Define-XML + reports	3 separate tools	All included in `submit()`