{
  "_id": "6a494c46f6c47c4419499520",
  "Package": "wordpiece",
  "Type": "Package",
  "Title": "R Implementation of Wordpiece Tokenization",
  "Version": "2.1.3",
  "Authors@R": "c(\nperson(given = \"Jonathan\",\nfamily = \"Bratt\",\nrole = c(\"aut\", \"cre\"),\nemail = \"jonathan.bratt@macmillan.com\",\ncomment = c(ORCID = \"0000-0003-2859-0076\")),\nperson(given = \"Jon\",\nfamily = \"Harmon\",\nrole = c(\"aut\"),\nemail = \"jonthegeek@gmail.com\",\ncomment = c(ORCID = \"0000-0003-4781-4346\")),\nperson(given = \"Bedford Freeman & Worth Pub Grp LLC DBA Macmillan Learning\",\nrole = c(\"cph\"))\n)",
  "Description": "Apply 'Wordpiece' (<arXiv:1609.08144>) tokenization to\ninput text, given an appropriate vocabulary. The 'BERT'\n(<arXiv:1810.04805>) tokenization conventions are used by\ndefault.",
  "Encoding": "UTF-8",
  "URL": "https://github.com/macmillancontentscience/wordpiece",
  "BugReports": "https://github.com/macmillancontentscience/wordpiece/issues",
  "License": "Apache License (>= 2)",
  "RoxygenNote": "7.1.2",
  "Roxygen": "list(markdown = TRUE)",
  "VignetteBuilder": "knitr",
  "Config/testthat/edition": "3",
  "Config/pak/sysreqs": "cmake make libicu-dev libuv1-dev",
  "Repository": "https://jonthegeek.r-universe.dev",
  "Date/Publication": "2022-03-03 14:09:42 UTC",
  "RemoteUrl": "https://github.com/macmillancontentscience/wordpiece",
  "RemoteRef": "HEAD",
  "RemoteSha": "3eb92c759556e89d235202c45decb2dc859e661d",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-07-04 18:06:10 UTC",
    "User": "root"
  },
  "Author": "Jonathan Bratt [aut, cre] (ORCID:\n<https://orcid.org/0000-0003-2859-0076>),\nJon Harmon [aut] (ORCID: <https://orcid.org/0000-0003-4781-4346>),\nBedford Freeman & Worth Pub Grp LLC DBA Macmillan Learning [cph]",
  "Maintainer": "Jonathan Bratt <jonathan.bratt@macmillan.com>",
  "_user": "jonthegeek",
  "_type": "src",
  "_file": "wordpiece_2.1.3.tar.gz",
  "_fileid": "https://r2.ropensci.org/6f91359ed890267e35a43fed901d32fdfcfdc2f9d268150ac4a6cad5d09ba322",
  "_filesize": 109314,
  "_sha256": "6f91359ed890267e35a43fed901d32fdfcfdc2f9d268150ac4a6cad5d09ba322",
  "_expires": "2026-10-12T18:09:08.000Z",
  "_created": "2026-07-04T18:06:10.000Z",
  "_published": "2026-07-04T18:09:10.382Z",
  "_jobs": [
    {
      "job": 85154845026,
      "time": 138,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "8084158473"
    },
    {
      "job": 85154845027,
      "time": 147,
      "config": "linux-release-x86_64",
      "r": "4.6.1",
      "check": "NOTE",
      "artifact": "8084159462"
    },
    {
      "job": 85154845049,
      "time": 72,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "8084150713"
    },
    {
      "job": 85154845036,
      "time": 85,
      "config": "macos-release-arm64",
      "r": "4.6.1",
      "check": "NOTE",
      "artifact": "8084152223"
    },
    {
      "job": 85154622091,
      "time": 192,
      "config": "source",
      "r": "4.6.1",
      "check": "OK",
      "artifact": "8084142001"
    },
    {
      "job": 85154845014,
      "time": 119,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "8084156152"
    },
    {
      "job": 85154845034,
      "time": 67,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "8084150369"
    },
    {
      "job": 85154845048,
      "time": 128,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "8084157286"
    },
    {
      "job": 85154845032,
      "time": 69,
      "config": "windows-release",
      "r": "4.6.1",
      "check": "NOTE",
      "artifact": "8084150461"
    }
  ],
  "_host": "GitHub-Actions",
  "_buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773",
  "_status": "success",
  "_upstream": "https://github.com/macmillancontentscience/wordpiece",
  "_commit": {
    "id": "3eb92c759556e89d235202c45decb2dc859e661d",
    "author": "Jonathan Bratt <33073024+jonathanbratt@users.noreply.github.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "Cran prep (#32)\n\n* update vnum\r\n\r\n* update comments\r\n\r\n* Update cran-comments.md\r\n\r\nCo-authored-by: Jon Harmon <jonthegeek@gmail.com>\r\n\r\nCo-authored-by: Jon Harmon <jonthegeek@gmail.com>",
    "time": 1646316582
  },
  "_maintainer": {
    "name": "Jonathan Bratt",
    "email": "jonathan.bratt@macmillan.com",
    "login": "jonathanbratt",
    "description": "",
    "uuid": 33073024,
    "orcid": "0000-0003-2859-0076"
  },
  "_distro": "resolute",
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.3.0",
      "role": "Depends"
    },
    {
      "package": "dlr",
      "version": ">= 1.0.0",
      "role": "Imports"
    },
    {
      "package": "fastmatch",
      "version": ">= 1.1",
      "role": "Imports"
    },
    {
      "package": "memoise",
      "version": ">= 2.0.0",
      "role": "Imports"
    },
    {
      "package": "piecemaker",
      "version": ">= 1.0.0",
      "role": "Imports"
    },
    {
      "package": "rlang",
      "role": "Imports"
    },
    {
      "package": "stringi",
      "version": ">= 1.0",
      "role": "Imports"
    },
    {
      "package": "wordpiece.data",
      "version": ">= 1.0.2",
      "role": "Imports"
    },
    {
      "package": "covr",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    }
  ],
  "_owner": "macmillancontentscience",
  "_selfowned": false,
  "_usedby": 0,
  "_updates": [],
  "_tags": [],
  "_stars": 8,
  "_contributors": [
    {
      "user": "jonathanbratt",
      "count": 14,
      "uuid": 33073024
    },
    {
      "user": "jonthegeek",
      "count": 7,
      "uuid": 33983824
    }
  ],
  "_userbio": {
    "uuid": 33983824,
    "type": "user",
    "name": "Jon Harmon",
    "followers": 361,
    "description": "Executive Director at DSLC.io (fka @r4ds) | Principal Developer (@atorus-research)"
  },
  "_downloads": {
    "count": 342,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/wordpiece"
  },
  "_devurl": "https://github.com/macmillancontentscience/wordpiece",
  "_searchresults": 8,
  "_rbuild": "4.6.1",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/wordpiece.html",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/macmillancontentscience/wordpiece",
  "_realowner": "macmillancontentscience",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.2",
      "date": "2021-02-11"
    },
    {
      "version": "2.0.1",
      "date": "2021-10-18"
    },
    {
      "version": "2.1.3",
      "date": "2022-03-03"
    }
  ],
  "_exports": [
    "load_or_retrieve_vocab",
    "load_vocab",
    "prepare_vocab",
    "set_wordpiece_cache_dir",
    "wordpiece_cache_dir",
    "wordpiece_tokenize",
    "wordpiece_vocab"
  ],
  "_help": [
    {
      "page": "load_or_retrieve_vocab",
      "title": "Load a vocabulary file, or retrieve from cache",
      "topics": [
        "load_or_retrieve_vocab"
      ]
    },
    {
      "page": "load_vocab",
      "title": "Load a vocabulary file",
      "topics": [
        "load_vocab"
      ]
    },
    {
      "page": "prepare_vocab",
      "title": "Format a Token List as a Vocabulary",
      "topics": [
        "prepare_vocab"
      ]
    },
    {
      "page": "set_wordpiece_cache_dir",
      "title": "Set a Cache Directory for wordpiece",
      "topics": [
        "set_wordpiece_cache_dir"
      ]
    },
    {
      "page": "wordpiece_cache_dir",
      "title": "Retrieve Directory for wordpiece Cache",
      "topics": [
        "wordpiece_cache_dir"
      ]
    },
    {
      "page": "wordpiece_tokenize",
      "title": "Tokenize Sequence with Word Pieces",
      "topics": [
        "wordpiece_tokenize"
      ]
    }
  ],
  "_readme": "https://github.com/macmillancontentscience/wordpiece/raw/HEAD/README.md",
  "_rundeps": [
    "cachem",
    "cli",
    "digest",
    "dlr",
    "fastmap",
    "fastmatch",
    "fs",
    "glue",
    "lifecycle",
    "magrittr",
    "memoise",
    "piecemaker",
    "rappdirs",
    "rlang",
    "stringi",
    "stringr",
    "vctrs",
    "wordpiece.data"
  ],
  "_vignettes": [
    {
      "source": "basic_usage.Rmd",
      "filename": "basic_usage.html",
      "title": "Using wordpiece",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Provided Vocabularies",
        "Loading a Vocabulary",
        "Tokenizing Text",
        "Vocabulary Case",
        "Representing \"Unknown\" Tokens"
      ],
      "created": "2021-01-12 21:26:23",
      "modified": "2021-09-27 14:37:17",
      "commits": 2
    }
  ],
  "_score": 4.6020599913279625,
  "_indexed": false,
  "_nocasepkg": "wordpiece",
  "_universes": [
    "jonthegeek"
  ],
  "_indexurl": "https://macmillancontentscience.r-universe.dev/wordpiece",
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "2.1.3",
      "date": "2026-07-04T18:08:30.000Z",
      "distro": "resolute",
      "commit": "3eb92c759556e89d235202c45decb2dc859e661d",
      "fileid": "https://r2.ropensci.org/4edc1b0497d65d839d17476f25c1d80cf111c1f79e16c5ab30a909ad54ec579a",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773"
    },
    {
      "r": "4.6.1",
      "os": "linux",
      "version": "2.1.3",
      "date": "2026-07-04T18:08:42.000Z",
      "distro": "resolute",
      "commit": "3eb92c759556e89d235202c45decb2dc859e661d",
      "fileid": "https://r2.ropensci.org/2c10aa00f1665d1494816e313285a9c33f0cc87ad5369144bf11556db99dfca4",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "2.1.3",
      "date": "2026-07-04T18:07:28.000Z",
      "commit": "3eb92c759556e89d235202c45decb2dc859e661d",
      "fileid": "https://r2.ropensci.org/1efa5a177420e1ea6a857404b6f55c727bc31d3a4ff8e88073c2e47fdbefb37e",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773"
    },
    {
      "r": "4.6.1",
      "os": "mac",
      "version": "2.1.3",
      "date": "2026-07-04T18:07:39.000Z",
      "commit": "3eb92c759556e89d235202c45decb2dc859e661d",
      "fileid": "https://r2.ropensci.org/e365f7ab4e919e69116d66a293c71cc6bb511ae472c38d5fc3741421ffddd8b0",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "2.1.3",
      "date": "2026-07-04T18:08:24.000Z",
      "commit": "3eb92c759556e89d235202c45decb2dc859e661d",
      "fileid": "https://r2.ropensci.org/9328e1cecb495549aa8481c5a333a167ae0402327acf34d86186daabe2d0fd47",
      "status": "success",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "2.1.3",
      "date": "2026-07-04T18:07:16.000Z",
      "commit": "3eb92c759556e89d235202c45decb2dc859e661d",
      "fileid": "https://r2.ropensci.org/fe7c61753317df027d7045ce6503e6154700a846e2d523f67ac4f39667a11864",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "2.1.3",
      "date": "2026-07-04T18:08:15.000Z",
      "commit": "3eb92c759556e89d235202c45decb2dc859e661d",
      "fileid": "https://r2.ropensci.org/59d87c1d88a8eed5a183b13826ffe4bf7762797e5734ba5988f3030c21bc7d8f",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773"
    },
    {
      "r": "4.6.1",
      "os": "win",
      "version": "2.1.3",
      "date": "2026-07-04T18:07:16.000Z",
      "commit": "3eb92c759556e89d235202c45decb2dc859e661d",
      "fileid": "https://r2.ropensci.org/85e5812667c7a6e08bd05bdcac878aac1c4add0ef229d090cbb577b2e8aba60f",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28714954773"
    }
  ]
}