Skip to contents

submit() packages a complete regulatory submission in one call. It reads data files, validates them, generates Define-XML, produces HTML and Excel validation reports, and builds a manifest with SHA-256 checksums — producing an eCTD-ready output directory from a single function call.

What submit() does

submit(path, spec = spec)
  │
  ├── 1. Read all .xpt (or .json) files from path
  ├── 2. Validate against spec + conformance rules
  ├── 3. Copy or convert data files to output directory
  ├── 4. write_define_xml() → define.xml
  ├── 5. write_define_html() → define.html
  ├── 6. validation_report() → validation-report.html + .xlsx
  └── 7. build_manifest() → manifest.json (SHA-256 per file)

The result is a herald_submission object containing paths to every generated file and the full validation result.

SDTM submission

Build three SDTM datasets with a shared spec, then submit() in one call.

Build data and spec

# Demographics
dm <- data.frame(
  STUDYID = rep("CDISCPILOT01", 5L),
  USUBJID = paste0("01-701-", c(1015L, 1023L, 1028L, 1033L, 1034L)),
  AGE     = c(63L, 64L, 71L, 74L, 77L),
  SEX     = c("F", "M", "M", "F", "F"),
  RACE    = rep("WHITE", 5L),
  stringsAsFactors = FALSE
)

# Adverse Events
ae <- data.frame(
  STUDYID = rep("CDISCPILOT01", 4L),
  USUBJID = c("01-701-1015","01-701-1015","01-701-1023","01-701-1028"),
  AESEQ   = c(1L, 2L, 1L, 1L),
  AETERM  = c("HEADACHE","NAUSEA","DIZZINESS","FATIGUE"),
  AESTDTC = c("2014-03-15","2014-04-02","2014-03-20","2014-03-18"),
  stringsAsFactors = FALSE
)

# Vital Signs
vs <- data.frame(
  STUDYID = rep("CDISCPILOT01", 6L),
  USUBJID = rep(c("01-701-1015","01-701-1023"), each = 3L),
  VSSEQ   = rep(c(1L,2L,3L), 2L),
  VSTESTCD = c("SYSBP","DIABP","PULSE","SYSBP","DIABP","PULSE"),
  VSORRES  = c("120","80","72","118","76","68"),
  VSSTRESC = c(120,80,72,118,76,68),
  stringsAsFactors = FALSE
)
spec <- herald_spec(
  ds_spec = data.frame(
    dataset   = c("DM","AE","VS"),
    label     = c("Demographics","Adverse Events","Vital Signs"),
    keys      = c("STUDYID, USUBJID",
                  "STUDYID, USUBJID, AESEQ",
                  "STUDYID, USUBJID, VSSEQ"),
    stringsAsFactors = FALSE
  ),
  var_spec = data.frame(
    dataset  = c(
      rep("DM",5L), rep("AE",5L), rep("VS",6L)
    ),
    variable = c(
      "STUDYID","USUBJID","AGE","SEX","RACE",
      "STUDYID","USUBJID","AESEQ","AETERM","AESTDTC",
      "STUDYID","USUBJID","VSSEQ","VSTESTCD","VSORRES","VSSTRESC"
    ),
    label = c(
      "Study Identifier","Unique Subject Identifier","Age","Sex","Race",
      "Study Identifier","Unique Subject Identifier",
      "Sequence Number of AE","Reported Term for the Adverse Event",
      "Start Date/Time of AE",
      "Study Identifier","Unique Subject Identifier",
      "Sequence Number","Vital Signs Test Short Name",
      "Result or Finding in Original Units",
      "Character Result/Finding in Standard Format"
    ),
    data_type = c(
      "text","text","integer","text","text",
      "text","text","integer","text","text",
      "text","text","integer","text","text","float"
    ),
    length = c(
      12L,11L,8L,1L,200L,
      12L,11L,8L,200L,19L,
      12L,11L,8L,8L,200L,8L
    ),
    stringsAsFactors = FALSE
  )
)

Write data to a directory

sdtm_dir <- tempfile("sdtm_")
dir.create(sdtm_dir)

write_xpt(dm, file.path(sdtm_dir, "dm.xpt"))
write_xpt(ae, file.path(sdtm_dir, "ae.xpt"))
write_xpt(vs, file.path(sdtm_dir, "vs.xpt"))

Run submit()

output_dir <- tempfile("sdtm_output_")
dir.create(output_dir)

result <- submit(
  path     = sdtm_dir,
  output   = output_dir,
  spec     = spec,
  rules    = NULL,       # spec checks only for this example
  define   = requireNamespace("xml2", quietly = TRUE),
  report   = FALSE,      # skip HTML report for vignette speed
  manifest = TRUE
)
#>  Copied 3 xpt file to /tmp/RtmpDMKflW/sdtm_output_46295d90412e
#>  Wrote /tmp/RtmpDMKflW/sdtm_output_46295d90412e/define.xml
#>  Wrote /tmp/RtmpDMKflW/sdtm_output_46295d90412e/define.html

result
#> 
#> ── herald submission ──
#> 
#> Output: /tmp/RtmpDMKflW/sdtm_output_46295d90412e
#> XPT files: 3
#> Define-XML: /tmp/RtmpDMKflW/sdtm_output_46295d90412e/define.xml
#> Validation: 0 high-impact issues, 19 medium-impact issues
#> Manifest: included

Inspect the submission object

# What files were produced?
result$output_dir
#> [1] "/tmp/RtmpDMKflW/sdtm_output_46295d90412e"
result$xpt_files
#> [1] "/tmp/RtmpDMKflW/sdtm_output_46295d90412e/ae.xpt"
#> [2] "/tmp/RtmpDMKflW/sdtm_output_46295d90412e/dm.xpt"
#> [3] "/tmp/RtmpDMKflW/sdtm_output_46295d90412e/vs.xpt"

# Validation findings
result$validation$summary
#> $reject
#> [1] 0
#> 
#> $high
#> [1] 0
#> 
#> $medium
#> [1] 19
#> 
#> $low
#> [1] 0
#> 
#> $total
#> [1] 19

# Manifest (SHA-256 checksums)
if (length(result$manifest) > 0L) {
  result$manifest$files[[1]]$sha256
}
#> NULL

What submit() prints and generates

submit() returns a herald_submission object invisibly. Printing it shows a compact summary:

── herald submission ────────────────────────────────────────
Output:   /path/to/output_dir
XPT files: 3 (dm.xpt, ae.xpt, vs.xpt)
Define-XML: define.xml
Validation: 0 reject/high · 2 medium
Manifest:  included

When report = TRUE (the default), submit() also generates:

  • validation-report.html — interactive HTML report with findings table
  • validation-report.xlsx — Excel workbook with findings

In interactive R sessions (RStudio, Positron) the HTML report is opened automatically in the browser or Viewer pane once the submission completes. Set report = FALSE to skip report generation (e.g., in non-interactive pipelines).

When define = TRUE (the default) and spec is provided, submit() generates:

  • define.xml — Define-XML 2.1 metadata document
  • define.html — rendered HTML version for human review

The output directory defaults to path (data and artifacts live together, as required for eCTD). If path is read-only, specify a separate writable output directory — submit() checks write access before starting and aborts with a clear error if the directory is not writable.

submit() parameter reference

Parameter Default Purpose
path (required) Directory containing data files
output path Where to write generated artifacts
spec NULL Spec object or path to spec file
config NULL Pre-built rule config (e.g. "fda-sdtm-ig-3.3")
rules "all" Rule set shortcut or list of rules
standard NULL Standard for auto-config selection
version NULL Standard version
define TRUE Generate define.xml + define.html
report TRUE Generate validation-report.html + .xlsx
manifest TRUE Generate manifest.json with SHA-256
format "xpt" Input file format ("xpt" or "json")
output_format format Output file format (conversion)
xpt_version 5L XPT transport version (5 or 8)

Format conversion in one call

Convert a directory of Dataset-JSON files to XPT as part of packaging:

if (requireNamespace("jsonlite", quietly = TRUE)) {
  json_dir <- tempfile("json_in_")
  dir.create(json_dir)
  xpt_out  <- tempfile("xpt_out_")
  dir.create(xpt_out)

  write_json(dm, file.path(json_dir, "dm.json"), dataset = "DM")
  write_json(ae, file.path(json_dir, "ae.json"), dataset = "AE")

  result2 <- submit(
    path          = json_dir,
    output        = xpt_out,
    spec          = spec,
    format        = "json",
    output_format = "xpt",
    define        = FALSE,
    report        = FALSE,
    manifest      = FALSE,
    rules         = NULL
  )

  list.files(xpt_out, pattern = "\\.xpt$")
}
#>  Converted 2 datasets (json → xpt) to /tmp/RtmpDMKflW/xpt_out_462958eee5da
#> [1] "ae.xpt" "dm.xpt"

Anchor auto-detection

herald auto-detects the subject-level anchor dataset (usually DM) from the spec and data, enabling cross-dataset rules that reference subject populations. build_anchor_index() builds a lookup index from an anchor dataset:

# Build a subject-level lookup index from DM
dm_index <- build_anchor_index(dm, key_var = "USUBJID")

# The index maps USUBJID to row numbers for fast cross-dataset lookups
length(dm_index)
#> [1] 5
names(dm_index)[1:3]
#> [1] "01-701-1015" "01-701-1023" "01-701-1028"

ADaM class detection

For ADaM submissions, detect_adam_class() classifies individual datasets and detect_adam_classes() classifies a full list:

adsl_df <- data.frame(
  STUDYID = rep("CDISCPILOT01", 3L),
  USUBJID = c("01-701-1015","01-701-1023","01-701-1028"),
  SAFFL   = c("Y","Y","Y"),
  ITTFL   = c("Y","Y","N"),
  TRTP    = c("Xanomeline High Dose","Placebo","Xanomeline Low Dose"),
  AGE     = c(63L, 64L, 71L),
  stringsAsFactors = FALSE
)

adae_df <- data.frame(
  STUDYID  = rep("CDISCPILOT01", 2L),
  USUBJID  = c("01-701-1015","01-701-1023"),
  AETERM   = c("HEADACHE","NAUSEA"),
  AESEQ    = c(1L, 1L),
  stringsAsFactors = FALSE
)

# Classify a single dataset by its column names
detect_adam_class(names(adsl_df))   # "ADSL"
#> [1] "ADSL"
detect_adam_class(names(adae_df))   # "OCCDS"
#> [1] "OCCDS"

# Classify a named list of datasets
classes <- detect_adam_classes(list(ADSL = adsl_df, ADAE = adae_df))
classes
#>    ADSL    ADAE 
#>  "ADSL" "OCCDS"

Submission manifest

manifest.json provides SHA-256 checksums for every file in the submission, enabling integrity verification and chain-of-custody documentation:

# Structure of manifest.json:
# {
#   "submission_date": "2024-01-15T10:30:00Z",
#   "files": [
#     {
#       "path": "dm.xpt",
#       "sha256": "a3f5...",
#       "size_bytes": 2640
#     },
#     ...
#   ]
# }

Before vs After

Task Old way herald
Package submission Manual: run 5+ packages, copy files, write SOP submit(path, spec = spec)
Generate all artifacts Separate calls to metacore, xportr, P21, define.R One submit() call
Format conversion Manual read + write in different format output_format = "xpt"
SHA-256 manifest Manual script manifest = TRUE
Validation + Define-XML + reports 3 separate tools All included in submit()