strunicode

Swift-like unicode string handling. Most (all?) API operations take linear time, but in exchange they take constant space.

Beware, a sequence of Character may take 10 times as much space as a utf-8 string, thus seq[Character] should be avoided. This library does not use seq[Character] in any of its APIs.

Types

Unicode = UnicodeImpl
A unicode string
Character {...}{.shallow.} = object
  s: string
  b: Slice[int]
A unicode grapheme cluster

Funcs

func initCharacter(s: Unicode; b: Slice[int]): Character {...}{.inline, raises: [], tags: [].}
Slice a unicode grapheme cluster out of a string. This does not create a copy of the string, but in exchange, the passed string must never change (i.e: grow/shrink or be modified) while the returned Character lives
func `$`(c: Character): string {...}{.inline, raises: [], tags: [].}
func `==`(a, b: Character): bool {...}{.inline, raises: [], tags: [].}
Check the characters are canonically equivalent

Examples:

const
  cafeA = "CafΓ©".Unicode
const
  cafeB = "Café".Unicode
doAssert cafeA.at(3) == cafeB.at(3)
func `==`(a: openArray[char]; b: Character): bool {...}{.inline, raises: [], tags: [].}
func `==`(a: Character; b: openArray[char]): bool {...}{.inline, raises: [], tags: [].}
func `==`(a: Unicode; b: Character): bool {...}{.inline, raises: [], tags: [].}
func `==`(a: Character; b: Unicode): bool {...}{.inline, raises: [], tags: [].}
func `[]`(c: Character; i: int): char {...}{.inline, raises: [IndexError], tags: [].}
Return byte of c at position i as char
func len(c: Character): int {...}{.inline, raises: [], tags: [].}
Return number of bytes that the character takes
func `==`(a, b: Unicode): bool {...}{.inline, raises: [], tags: [].}
Check strings are canonically equivalent

Examples:

const
  cafeA = "CafΓ©".Unicode
const
  cafeB = "Café".Unicode
doAssert cafeA == cafeB
func `==`(a: openArray[char]; b: Unicode): bool {...}{.inline, raises: [], tags: [].}
func `==`(a: Unicode; b: openArray[char]): bool {...}{.inline, raises: [], tags: [].}
func count(s: Unicode): int {...}{.inline, raises: [], tags: [].}
Return the number of characters in the string Check strings are canonically equivalent

Examples:

doAssert "πŸ‡¦πŸ‡·πŸ‡ΊπŸ‡ΎπŸ‡¨πŸ‡±".Unicode.count == 3
func at(s: Unicode; i: int): Character {...}{.raises: [IndexError], tags: [].}
Return the character at the given position

Examples:

doAssert "πŸ‡¦πŸ‡·πŸ‡ΊπŸ‡ΎπŸ‡¨πŸ‡±".Unicode.at(1) == "πŸ‡ΊπŸ‡Ύ"
func at(s: Unicode; i: BackwardsIndex): Character {...}{.raises: [IndexError], tags: [].}
func atByte(s: Unicode; i: int): Character {...}{.inline, raises: [], tags: [].}
Returns the character at the given byte index. Returns an empty character if the index is out of bounds
func atByte(s: Unicode; i: BackwardsIndex): Character {...}{.inline, raises: [], tags: [].}
func lastCharacter(s: Unicode): Character {...}{.inline, raises: [], tags: [].}
Return the last character in the string. It can be used to remove the last character as well.

Examples:

doAssert "πŸ‡¦πŸ‡·πŸ‡ΊπŸ‡ΎπŸ‡¨πŸ‡±".Unicode.lastCharacter == "πŸ‡¨πŸ‡±"
func reverse(s: var Unicode) {...}{.inline, raises: [], tags: [].}
Reverse unicode string s in-place

Examples:

var s = "πŸ‡¦πŸ‡·πŸ‡ΊπŸ‡ΎπŸ‡¨πŸ‡±".Unicode
s.reverse
doAssert s == "πŸ‡¨πŸ‡±πŸ‡ΊπŸ‡ΎπŸ‡¦πŸ‡·"
func reversed(s: Unicode): Unicode {...}{.inline, raises: [], tags: [].}
Return the reverse of s

Examples:

doAssert "πŸ‡¦πŸ‡·πŸ‡ΊπŸ‡ΎπŸ‡¨πŸ‡±".Unicode.reversed ==
    "πŸ‡¨πŸ‡±πŸ‡ΊπŸ‡ΎπŸ‡¦πŸ‡·"

Iterators

iterator items(c: Character): char {...}{.inline, raises: [], tags: [].}
Iterate over chars/bytes of a Character
iterator runes(c: Character): Rune {...}{.inline, raises: [], tags: [].}
Iterate over runes of a character
iterator items(s: Unicode): Character {...}{.inline, raises: [], tags: [].}
Return characters of s
iterator reversed(s: Unicode): Character {...}{.inline, raises: [], tags: [].}

Templates

template toOpenArray(c: Character): untyped
template toOpenArray(s: Unicode): untyped