{
  "_id": "6a156416acfb0bcc41d61663",
  "Package": "piecemaker",
  "Title": "Tools for Preparing Text for Tokenizers",
  "Version": "1.0.2.9000",
  "Authors@R": "c(\nperson(\"Jon\", \"Harmon\", , \"jonthegeek@gmail.com\", role = c(\"aut\", \"cre\"),\ncomment = c(ORCID = \"0000-0003-4781-4346\")),\nperson(\"Jonathan\", \"Bratt\", , \"jonathan.bratt@macmillan.com\", role = \"aut\",\ncomment = c(ORCID = \"0000-0003-2859-0076\")),\nperson(\"Bedford Freeman & Worth Pub Grp LLC DBA Macmillan Learning\", role = \"cph\")\n)",
  "Description": "Tokenizers break text into pieces that are more usable by\nmachine learning models. Many tokenizers share some preparation\nsteps. This package provides those shared steps, along with a\nsimple tokenizer.",
  "License": "Apache License (>= 2)",
  "URL": "https://github.com/macmillancontentscience/piecemaker,\nhttps://macmillancontentscience.github.io/piecemaker/",
  "BugReports": "https://github.com/macmillancontentscience/piecemaker/issues",
  "Config/testthat/edition": "3",
  "Encoding": "UTF-8",
  "Roxygen": "list(markdown = TRUE)",
  "RoxygenNote": "7.2.3",
  "Config/pak/sysreqs": "libicu-dev",
  "Repository": "https://jonthegeek.r-universe.dev",
  "Date/Publication": "2023-06-02 19:46:08 UTC",
  "RemoteUrl": "https://github.com/macmillancontentscience/piecemaker",
  "RemoteRef": "HEAD",
  "RemoteSha": "b02c1a74923301545366805680e54091675305c6",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-26 09:10:15 UTC",
    "User": "root"
  },
  "Author": "Jon Harmon [aut, cre] (ORCID: <https://orcid.org/0000-0003-4781-4346>),\nJonathan Bratt [aut] (ORCID: <https://orcid.org/0000-0003-2859-0076>),\nBedford Freeman & Worth Pub Grp LLC DBA Macmillan Learning [cph]",
  "Maintainer": "Jon Harmon <jonthegeek@gmail.com>",
  "MD5sum": "a758a674aa068c414e6c53bc5bdd467f",
  "_user": "jonthegeek",
  "_type": "src",
  "_file": "piecemaker_1.0.2.9000.tar.gz",
  "_fileid": "bb3ea7cd85f9966b7ea90efa1e5c12448e1d4be83798da08e67afd101b4d6b3f",
  "_filesize": 112471,
  "_sha256": "bb3ea7cd85f9966b7ea90efa1e5c12448e1d4be83798da08e67afd101b4d6b3f",
  "_created": "2026-05-26T09:10:15.000Z",
  "_published": "2026-05-26T09:12:54.822Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77842796843,
      "time": 124,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7213172476"
    },
    {
      "job": 77842797048,
      "time": 110,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213167382"
    },
    {
      "job": 77842796786,
      "time": 94,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7213161012"
    },
    {
      "job": 77842796851,
      "time": 96,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213162451"
    },
    {
      "job": 77842323706,
      "time": 173,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213126981"
    },
    {
      "job": 77842796747,
      "time": 114,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213168673"
    },
    {
      "job": 77842796799,
      "time": 84,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7213158201"
    },
    {
      "job": 77842796812,
      "time": 55,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7213147574"
    },
    {
      "job": 77842796819,
      "time": 73,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213153611"
    }
  ],
  "_buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/macmillancontentscience/piecemaker",
  "_commit": {
    "id": "b02c1a74923301545366805680e54091675305c6",
    "author": "Jon Harmon <jonthegeek@gmail.com>",
    "committer": "Jon Harmon <jonthegeek@gmail.com>",
    "message": "Increment version number to 1.0.2.9000\n",
    "time": 1685735168
  },
  "_maintainer": {
    "name": "Jon Harmon",
    "email": "jonthegeek@gmail.com",
    "login": "jonthegeek",
    "mastodon": "@jonthegeek@fosstodon.org",
    "bluesky": "@jonthegeek.com",
    "linkedin": "in/jonthegeek",
    "description": "Executive Director at DSLC.io (fka @r4ds) | Principal Developer (@atorus-research)",
    "uuid": 33983824,
    "orcid": "0000-0003-4781-4346"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 2.10",
      "role": "Depends"
    },
    {
      "package": "cli",
      "role": "Imports"
    },
    {
      "package": "glue",
      "role": "Imports"
    },
    {
      "package": "rlang",
      "version": ">= 0.4.2",
      "role": "Imports"
    },
    {
      "package": "stringi",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "role": "Imports"
    },
    {
      "package": "covr",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    }
  ],
  "_owner": "macmillancontentscience",
  "_selfowned": true,
  "_usedby": 2,
  "_updates": [],
  "_tags": [],
  "_stars": 0,
  "_contributors": [
    {
      "user": "jonthegeek",
      "count": 20,
      "uuid": 33983824
    },
    {
      "user": "jonathanbratt",
      "count": 1,
      "uuid": 33073024
    }
  ],
  "_userbio": {
    "uuid": 33983824,
    "type": "user",
    "name": "Jon Harmon",
    "description": "Executive Director at DSLC.io (fka @r4ds) | Principal Developer (@atorus-research)"
  },
  "_downloads": {
    "count": 278,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/piecemaker"
  },
  "_devurl": "https://github.com/macmillancontentscience/piecemaker",
  "_pkgdown": "https://macmillancontentscience.github.io/piecemaker/",
  "_searchresults": 7,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/piecemaker.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/macmillancontentscience/piecemaker",
  "_realowner": "macmillancontentscience",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.0",
      "date": "2021-08-06"
    },
    {
      "version": "1.0.1",
      "date": "2022-03-03"
    },
    {
      "version": "1.0.2",
      "date": "2023-06-02"
    }
  ],
  "_exports": [
    "prepare_and_tokenize",
    "prepare_text",
    "remove_control_characters",
    "remove_diacritics",
    "remove_replacement_characters",
    "space_cjk",
    "space_punctuation",
    "squish_whitespace",
    "tokenize_space",
    "validate_utf8"
  ],
  "_help": [
    {
      "page": "prepare_and_tokenize",
      "title": "Split Text on Spaces",
      "topics": [
        "prepare_and_tokenize"
      ]
    },
    {
      "page": "prepare_text",
      "title": "Prepare Text for Tokenization",
      "topics": [
        "prepare_text"
      ]
    },
    {
      "page": "remove_control_characters",
      "title": "Remove Non-Character Characters",
      "topics": [
        "remove_control_characters"
      ]
    },
    {
      "page": "remove_diacritics",
      "title": "Remove Diacritical Marks on Characters",
      "topics": [
        "remove_diacritics"
      ]
    },
    {
      "page": "remove_replacement_characters",
      "title": "Remove the Unicode Replacement Character",
      "topics": [
        "remove_replacement_characters"
      ]
    },
    {
      "page": "space_cjk",
      "title": "Add Spaces Around CJK Ideographs",
      "topics": [
        "space_cjk"
      ]
    },
    {
      "page": "space_punctuation",
      "title": "Add Spaces Around Punctuation",
      "topics": [
        "space_punctuation"
      ]
    },
    {
      "page": "squish_whitespace",
      "title": "Remove Extra Whitespace",
      "topics": [
        "squish_whitespace"
      ]
    },
    {
      "page": "tokenize_space",
      "title": "Break Text at Spaces",
      "topics": [
        "tokenize_space"
      ]
    },
    {
      "page": "validate_utf8",
      "title": "Clean Up Text to UTF-8",
      "topics": [
        "validate_utf8"
      ]
    }
  ],
  "_readme": "https://github.com/macmillancontentscience/piecemaker/raw/HEAD/README.md",
  "_rundeps": [
    "cli",
    "glue",
    "lifecycle",
    "magrittr",
    "rlang",
    "stringi",
    "stringr",
    "vctrs"
  ],
  "_score": 3.4771212547196626,
  "_indexed": false,
  "_nocasepkg": "piecemaker",
  "_universes": [
    "jonthegeek"
  ],
  "_indexurl": "https://macmillancontentscience.r-universe.dev/piecemaker",
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.0.2.9000",
      "date": "2026-05-26T09:12:25.000Z",
      "distro": "noble",
      "commit": "b02c1a74923301545366805680e54091675305c6",
      "fileid": "8a9158d150a2a7ce21b6baa4f82cf752031df5130a74d3ec23262fb8d411475a",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.0.2.9000",
      "date": "2026-05-26T09:12:13.000Z",
      "distro": "noble",
      "commit": "b02c1a74923301545366805680e54091675305c6",
      "fileid": "3296536feb196b88473d72a92b580a2edadc226c04873d41eb847b5b5d7c9acb",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.0.2.9000",
      "date": "2026-05-26T09:11:56.000Z",
      "commit": "b02c1a74923301545366805680e54091675305c6",
      "fileid": "56acf31033531255dba9472d165840d92b58a57c80fd4de3c19bc21ec7a25790",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.0.2.9000",
      "date": "2026-05-26T09:12:00.000Z",
      "commit": "b02c1a74923301545366805680e54091675305c6",
      "fileid": "3a778b95938d39d2656b76d0dd798d6a609bc7e9d9ec0a734034ae17d512c23d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.0.2.9000",
      "date": "2026-05-26T09:12:27.000Z",
      "commit": "b02c1a74923301545366805680e54091675305c6",
      "fileid": "23d92e41c203390fd2c264ec4c80ffd1d1b170732a821079b5ce04911ea72bab",
      "status": "success",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.0.2.9000",
      "date": "2026-05-26T09:11:45.000Z",
      "commit": "b02c1a74923301545366805680e54091675305c6",
      "fileid": "0bbebe51bacce5add24a4925b48dce2a6b45d6deefd8f453bfacf6fbcdfeb40a",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.0.2.9000",
      "date": "2026-05-26T09:11:16.000Z",
      "commit": "b02c1a74923301545366805680e54091675305c6",
      "fileid": "c408d0f5f2361e1553cd5b075cf53fbe6c47b07de3031342aa472f840697407d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.0.2.9000",
      "date": "2026-05-26T09:11:32.000Z",
      "commit": "b02c1a74923301545366805680e54091675305c6",
      "fileid": "faa3a8b80d0a7c24cff25eaa9205e91685e1bb62df76633d512acda075cd4e9f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/jonthegeek/actions/runs/26443086680"
    }
  ]
}