{
  "_id": "6a486580e06c18f0a6538b5e",
  "Package": "morphemepiece",
  "Type": "Package",
  "Title": "Morpheme Tokenization",
  "Version": "1.2.3",
  "Authors@R": "c(\nperson(given = \"Jonathan\",\nfamily = \"Bratt\",\nrole = c(\"aut\", \"cre\"),\nemail = \"jonathan.bratt@macmillan.com\",\ncomment = c(ORCID = \"0000-0003-2859-0076\")),\nperson(given = \"Jon\",\nfamily = \"Harmon\",\nrole = c(\"aut\"),\nemail = \"jonthegeek@gmail.com\",\ncomment = c(ORCID = \"0000-0003-4781-4346\")),\nperson(given = \"Bedford Freeman & Worth Pub Grp LLC DBA Macmillan Learning\",\nrole = c(\"cph\"))\n)",
  "Description": "Tokenize text into morphemes. The morphemepiece algorithm\nuses a lookup table to determine the morpheme breakdown of\nwords, and falls back on a modified wordpiece tokenization\nalgorithm for words not found in the lookup table.",
  "URL": "https://github.com/macmillancontentscience/morphemepiece",
  "BugReports": "https://github.com/macmillancontentscience/morphemepiece/issues",
  "License": "Apache License (>= 2)",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.1.2",
  "Roxygen": "list(markdown = TRUE)",
  "VignetteBuilder": "knitr",
  "Config/testthat/edition": "3",
  "Config/pak/sysreqs": "cmake make libicu-dev libuv1-dev libx11-dev",
  "Repository": "https://jonthegeek.r-universe.dev",
  "Date/Publication": "2022-04-15 21:02:22 UTC",
  "RemoteUrl": "https://github.com/macmillancontentscience/morphemepiece",
  "RemoteRef": "HEAD",
  "RemoteSha": "bc071b1a03226b2441c431d263982f862e4dc7fd",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-07-04 01:41:01 UTC",
    "User": "root"
  },
  "Author": "Jonathan Bratt [aut, cre] (ORCID:\n<https://orcid.org/0000-0003-2859-0076>),\nJon Harmon [aut] (ORCID: <https://orcid.org/0000-0003-4781-4346>),\nBedford Freeman & Worth Pub Grp LLC DBA Macmillan Learning [cph]",
  "Maintainer": "Jonathan Bratt <jonathan.bratt@macmillan.com>",
  "_user": "jonthegeek",
  "_type": "src",
  "_file": "morphemepiece_1.2.3.tar.gz",
  "_fileid": "https://r2.ropensci.org/fc09e47a3092319ea2f00ebbb9d7cd73258ee764ee740f36aa246f81579c62c8",
  "_filesize": 142974,
  "_sha256": "fc09e47a3092319ea2f00ebbb9d7cd73258ee764ee740f36aa246f81579c62c8",
  "_expires": "2026-10-12T01:44:30.000Z",
  "_created": "2026-07-04T01:41:01.000Z",
  "_published": "2026-07-04T01:44:32.506Z",
  "_jobs": [
    {
      "job": 85092363548,
      "time": 173,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "8077170533"
    },
    {
      "job": 85092363540,
      "time": 150,
      "config": "linux-release-x86_64",
      "r": "4.6.1",
      "check": "NOTE",
      "artifact": "8077167637"
    },
    {
      "job": 85092363541,
      "time": 88,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "8077159513"
    },
    {
      "job": 85092363547,
      "time": 87,
      "config": "macos-release-arm64",
      "r": "4.6.1",
      "check": "NOTE",
      "artifact": "8077159340"
    },
    {
      "job": 85091990642,
      "time": 339,
      "config": "source",
      "r": "4.6.1",
      "check": "OK",
      "artifact": "8077145219"
    },
    {
      "job": 85092363551,
      "time": 132,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "8077165167"
    },
    {
      "job": 85092363559,
      "time": 69,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "8077156958"
    },
    {
      "job": 85092363545,
      "time": 66,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "8077156536"
    },
    {
      "job": 85092363549,
      "time": 98,
      "config": "windows-release",
      "r": "4.6.1",
      "check": "NOTE",
      "artifact": "8077160746"
    }
  ],
  "_host": "GitHub-Actions",
  "_buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662",
  "_status": "success",
  "_upstream": "https://github.com/macmillancontentscience/morphemepiece",
  "_commit": {
    "id": "bc071b1a03226b2441c431d263982f862e4dc7fd",
    "author": "Jonathan Bratt <33073024+jonathanbratt@users.noreply.github.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "prep for CRAN (#30)\n\n",
    "time": 1650056542
  },
  "_maintainer": {
    "name": "Jonathan Bratt",
    "email": "jonathan.bratt@macmillan.com",
    "login": "jonathanbratt",
    "description": "",
    "uuid": 33073024,
    "orcid": "0000-0003-2859-0076"
  },
  "_distro": "resolute",
  "_registered": true,
  "_dependencies": [
    {
      "package": "dlr",
      "version": ">= 1.0.0",
      "role": "Imports"
    },
    {
      "package": "fastmatch",
      "role": "Imports"
    },
    {
      "package": "magrittr",
      "role": "Imports"
    },
    {
      "package": "memoise",
      "version": ">= 2.0.0",
      "role": "Imports"
    },
    {
      "package": "morphemepiece.data",
      "role": "Imports"
    },
    {
      "package": "piecemaker",
      "version": ">= 1.0.0",
      "role": "Imports"
    },
    {
      "package": "purrr",
      "version": ">= 0.3.4",
      "role": "Imports"
    },
    {
      "package": "readr",
      "role": "Imports"
    },
    {
      "package": "rlang",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "version": ">= 1.4.0",
      "role": "Imports"
    },
    {
      "package": "dplyr",
      "role": "Suggests"
    },
    {
      "package": "fs",
      "role": "Suggests"
    },
    {
      "package": "ggplot2",
      "role": "Suggests"
    },
    {
      "package": "here",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "remotes",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "utils",
      "role": "Suggests"
    }
  ],
  "_owner": "macmillancontentscience",
  "_selfowned": false,
  "_usedby": 0,
  "_updates": [],
  "_tags": [],
  "_stars": 11,
  "_contributors": [
    {
      "user": "jonathanbratt",
      "count": 16,
      "uuid": 33073024
    },
    {
      "user": "jonthegeek",
      "count": 11,
      "uuid": 33983824
    }
  ],
  "_userbio": {
    "uuid": 33983824,
    "type": "user",
    "name": "Jon Harmon",
    "followers": 361,
    "description": "Executive Director at DSLC.io (fka @r4ds) | Principal Developer (@atorus-research)"
  },
  "_downloads": {
    "count": 340,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/morphemepiece"
  },
  "_devurl": "https://github.com/macmillancontentscience/morphemepiece",
  "_searchresults": 11,
  "_rbuild": "4.6.1",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/morphemepiece.html",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/macmillancontentscience/morphemepiece",
  "_realowner": "macmillancontentscience",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.0",
      "date": "2021-09-09"
    },
    {
      "version": "1.0.1",
      "date": "2021-09-17"
    },
    {
      "version": "1.1.0",
      "date": "2021-10-27"
    },
    {
      "version": "1.1.2",
      "date": "2021-12-06"
    },
    {
      "version": "1.2.1",
      "date": "2022-04-08"
    },
    {
      "version": "1.2.3",
      "date": "2022-04-16"
    }
  ],
  "_exports": [
    "load_lookup",
    "load_or_retrieve_lookup",
    "load_or_retrieve_vocab",
    "load_vocab",
    "morphemepiece_cache_dir",
    "morphemepiece_lookup",
    "morphemepiece_tokenize",
    "morphemepiece_vocab",
    "prepare_vocab",
    "set_morphemepiece_cache_dir"
  ],
  "_help": [
    {
      "page": "morphemepiece-package",
      "title": "morphemepiece: Morpheme Tokenization",
      "topics": [
        "morphemepiece-package"
      ]
    },
    {
      "page": "load_lookup",
      "title": "Load a morphemepiece lookup file",
      "topics": [
        "load_lookup"
      ]
    },
    {
      "page": "load_or_retrieve_lookup",
      "title": "Load a lookup file, or retrieve from cache",
      "topics": [
        "load_or_retrieve_lookup"
      ]
    },
    {
      "page": "load_or_retrieve_vocab",
      "title": "Load a vocabulary file, or retrieve from cache",
      "topics": [
        "load_or_retrieve_vocab"
      ]
    },
    {
      "page": "load_vocab",
      "title": "Load a vocabulary file",
      "topics": [
        "load_vocab"
      ]
    },
    {
      "page": "morphemepiece_cache_dir",
      "title": "Retrieve Directory for Morphemepiece Cache",
      "topics": [
        "morphemepiece_cache_dir"
      ]
    },
    {
      "page": "morphemepiece_tokenize",
      "title": "Tokenize Sequence with Morpheme Pieces",
      "topics": [
        "morphemepiece_tokenize"
      ]
    },
    {
      "page": "prepare_vocab",
      "title": "Format a Token List as a Vocabulary",
      "topics": [
        "prepare_vocab"
      ]
    },
    {
      "page": "set_morphemepiece_cache_dir",
      "title": "Set a Cache Directory for Morphemepiece",
      "topics": [
        "set_morphemepiece_cache_dir"
      ]
    }
  ],
  "_readme": "https://github.com/macmillancontentscience/morphemepiece/raw/HEAD/README.md",
  "_rundeps": [
    "bit",
    "bit64",
    "cachem",
    "cli",
    "clipr",
    "cpp11",
    "crayon",
    "digest",
    "dlr",
    "fastmap",
    "fastmatch",
    "fs",
    "glue",
    "hms",
    "lifecycle",
    "magrittr",
    "memoise",
    "morphemepiece.data",
    "piecemaker",
    "pillar",
    "pkgconfig",
    "prettyunits",
    "progress",
    "purrr",
    "R6",
    "rappdirs",
    "readr",
    "rlang",
    "stringi",
    "stringr",
    "tibble",
    "tidyselect",
    "tzdb",
    "utf8",
    "vctrs",
    "vroom",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "generating_vocab.Rmd",
      "filename": "generating_vocab.html",
      "title": "Generating a Vocabulary and Lookup",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Check coverage",
        "Other measures of coverage quality"
      ],
      "created": "2021-07-29 19:56:28",
      "modified": "2021-10-26 20:24:03",
      "commits": 5
    },
    {
      "source": "algorithm_test.Rmd",
      "filename": "algorithm_test.html",
      "title": "Testing the fall-through algorithm",
      "engine": "knitr::rmarkdown",
      "headings": [],
      "created": "2021-07-29 19:56:28",
      "modified": "2021-09-06 19:26:32",
      "commits": 3
    }
  ],
  "_score": 5.08278537031645,
  "_indexed": false,
  "_nocasepkg": "morphemepiece",
  "_universes": [
    "jonthegeek"
  ],
  "_indexurl": "https://macmillancontentscience.r-universe.dev/morphemepiece",
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.2.3",
      "date": "2026-07-04T01:43:53.000Z",
      "distro": "resolute",
      "commit": "bc071b1a03226b2441c431d263982f862e4dc7fd",
      "fileid": "https://r2.ropensci.org/8a4fda93f2f4b64d5c6954868b7de8bd49ce37e580260b0a804b90d70e7a4ed1",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662"
    },
    {
      "r": "4.6.1",
      "os": "linux",
      "version": "1.2.3",
      "date": "2026-07-04T01:43:30.000Z",
      "distro": "resolute",
      "commit": "bc071b1a03226b2441c431d263982f862e4dc7fd",
      "fileid": "https://r2.ropensci.org/18cc717b5362fe600ec61f37bc5d3530ee34b30317f4d6c7f057b0c32cfd645c",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.2.3",
      "date": "2026-07-04T01:42:33.000Z",
      "commit": "bc071b1a03226b2441c431d263982f862e4dc7fd",
      "fileid": "https://r2.ropensci.org/84c0d3528de1b866ddfac233790c7a62604d28d478e132c90e5d9b53e57111b6",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662"
    },
    {
      "r": "4.6.1",
      "os": "mac",
      "version": "1.2.3",
      "date": "2026-07-04T01:42:32.000Z",
      "commit": "bc071b1a03226b2441c431d263982f862e4dc7fd",
      "fileid": "https://r2.ropensci.org/7992047579f97a85990f828cdbec530f93dd25952ae9ca687510b2d0fad99203",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.2.3",
      "date": "2026-07-04T01:43:26.000Z",
      "commit": "bc071b1a03226b2441c431d263982f862e4dc7fd",
      "fileid": "https://r2.ropensci.org/f9d00963374b5f95b20d91e0bd58588caa5503202adb4e7d5ee138bc29ecec32",
      "status": "success",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.2.3",
      "date": "2026-07-04T01:42:08.000Z",
      "commit": "bc071b1a03226b2441c431d263982f862e4dc7fd",
      "fileid": "https://r2.ropensci.org/45999f89a0cfc3ba8ff65e1c184d497806cea2b472446f09a92b0b16400991f9",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.2.3",
      "date": "2026-07-04T01:42:05.000Z",
      "commit": "bc071b1a03226b2441c431d263982f862e4dc7fd",
      "fileid": "https://r2.ropensci.org/e9e115f0b0816e1f10ae8af7c46665f9b9825c7d75b49db315e87552ed23c0c6",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662"
    },
    {
      "r": "4.6.1",
      "os": "win",
      "version": "1.2.3",
      "date": "2026-07-04T01:42:10.000Z",
      "commit": "bc071b1a03226b2441c431d263982f862e4dc7fd",
      "fileid": "https://r2.ropensci.org/ee2e5b6a877be3143d314d4a5e51d600caeac447c826d58a9eaf760f78c157ed",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/28690935662"
    }
  ]
}