Dokumentation för denna modul finns på /dok (redigera), /test


Användning

redigera

Enbart via {{ipa}}.

Parametrar

redigera
  • (namngiven och frivillig) word= behövs enbart för dokumentation och testande, eller på uppslag för vissa ord på finska då modulen behöver "hjälp" för att producera rätt resultat

Självtest

redigera
perkele -> /ˈperkele/, [ˈpe̞rke̞le̞]
pistenäyttö -> /ˈpistenæytːø/, [ˈpis̠t̪e̞næˌyt̪ːø̞]
pistenäyttö (piste*-näyttö) -> /ˈpisteˣˌnæy̯tːø/, [ˈpis̠t̪e̞nˌnæy̯t̪ːø̞]
perustelemattomalta -> /ˈperustelemɑtːomɑltɑ/, [ˈpe̞rus̠t̪e̞le̞ˌmɑt̪ːo̞ˌmɑlt̪ɑ]

en.wiktionary

redigera
-- based on https://en.wiktionary.org/wiki/Module:fi-IPA
-- and https://en.wiktionary.org/wiki/Module:fi-hyphenation
-- this module must NOT be "updated" by blindly copying from those pages
-- authors: "Surjection" et al
-- modification for non-en wiktionaries: "Taylor 49"

local export = {}

local woowels = "aeiouyåäö"
local woowel = "[" .. woowels .. "]"
local coonsonants = "bcdfghjklmnpqrstvwxzšžʔ*"
local coonsonant = "[" .. coonsonants .. "]"

-- orthographic symbols that signify separation of syllables
local sep_symbols = "-'’./ "
-- these signify that the next syllable is an "initial" syllable in a word
-- all symbols from here should also be in sep_symbols
local stressed_symbols = "-/ "

-- diphthongs and long woowels
-- in initial syllables
local woowel_sequences_initial = {
  "[aeouyäö]i",
  "[aoei]u",
  "[eiäö]y",
  "uo",
  "ie",
  "yö",
  "aa", "ee", "ii", "oo", "uu", "yy", "ää", "öö"
}

-- in non-initial syllables
-- further, diphthongs ending _u or _y are diphthongs only
-- in non-initial syllables if the syllable is open
local woowel_sequences_noninitial = {
  "[aeouyäö]i",
  "aa", "ee", "ii", "oo", "uu", "yy", "ää", "öö"
}

-- in non-initial *open* syllables, in addition to above
local woowel_sequences_noninitial_open = {
  "[aoei]u",
  "[eiäö]y"
}

-- allow_diphthongs_everywhere is only for backwards compatibility, assume false
function generate_hyphenation(word, keep_sep_symbols, allow_diphthongs_everywhere)
  local res = {}
  local syllable = ""
  local pos = 1
  local found_woowel = false
  local initial_syllable = true

  while pos <= #word do
    if mw.ustring.find(mw.ustring.lower(word), "^" .. coonsonant .. woowel, pos) then
      -- CV: end current syllable if we have found a woowel
      if found_woowel then
        if syllable then
          table.insert(res, syllable)
          initial_syllable = false
        end

        found_woowel = false
        syllable = ""
      end
      syllable = syllable .. mw.ustring.sub(word, pos, pos)
      pos = pos + 1
    elseif mw.ustring.find(mw.ustring.lower(word), "^" .. coonsonant, pos) then
      -- C: continue
      syllable = syllable .. mw.ustring.sub(word, pos, pos)
      pos = pos + 1
    elseif mw.ustring.find(mw.ustring.lower(word), "^" .. woowel, pos) then
      if found_woowel then
        -- already found a woowel, end current syllable
        if syllable then
          table.insert(res, syllable)
          initial_syllable = false
        end
        syllable = ""
      end
      found_woowel = true

      -- check for diphthongs or long woowels
      local woowel_sequences = (allow_diphthongs_everywhere or initial_syllable) and woowel_sequences_initial or woowel_sequences_noninitial
      local seq_ok = false
      for k, v in pairs(woowel_sequences) do
        if mw.ustring.find(mw.ustring.lower(word), "^" .. v, pos) then
          seq_ok = true
          break
        end
      end

      if not seq_ok and not initial_syllable then
        for k, v in pairs(woowel_sequences_noninitial_open) do
          if mw.ustring.find(mw.ustring.lower(word), "^" .. v .. "[^" .. coonsonants .. "]", pos) or mw.ustring.find(mw.ustring.lower(word), "^" .. v .. "[" .. coonsonants .. "][" .. woowels .. "]", pos) then
            seq_ok = true
            break
          end
        end
      end

      if seq_ok then
        syllable = syllable .. mw.ustring.sub(word, pos, pos + 1)
        pos = pos + 2
      else
        syllable = syllable .. mw.ustring.sub(word, pos, pos)
        pos = pos + 1
      end
    elseif mw.ustring.find(mw.ustring.lower(word), "^[" .. sep_symbols .. "]", pos) then
      -- separates syllables
      if syllable then
        table.insert(res, syllable)
      end

      local sepchar = mw.ustring.sub(word, pos, pos)
      initial_syllable = mw.ustring.find(sepchar, "^[" .. stressed_symbols .. "]")
      syllable = (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and keep_sep_symbols:find(mw.ustring.sub(word, pos, pos)))) and sepchar or ""
      pos = pos + 1
      found_woowel = false
    else
      -- ?: continue
      syllable = syllable .. mw.ustring.sub(word, pos, pos)
      pos = pos + 1
    end
  end

  if syllable then
    table.insert(res, syllable)
  end

  return res
end--function generate_hyphenation

local gsub = mw.ustring.gsub

local U = mw.ustring.char
local nonsyllabic = U(0x32F)  -- inverted breve below
local unreleased = U(0x31A)
local long = "ː"

local letters_phonemes = {
  ["a"] = "ɑ",
  ["ä"] = "æ",
  ["ö"] = "ø",
  ["å"] = "o",

  ["g"] = "ɡ",
  ["q"] = "k",
  ["v"] = "ʋ",
  ["š"] = "ʃ",
  ["ž"] = "ʒ",

  ["x"] = "ks",
  ["zz"] = "ts",
  ["ng"] = "ŋː",
  ["nk"] = "ŋk",
  ["nkk"] = "ŋkː",
  ["qu"] = "kʋ",
  ["*"] = "ˣ",
  ["’"] = ".",
}

local lookahead = 3 -- how many unstressed syllables at most in a single unit, thus max consecutive unstressed syllables

local vowels = "ɑeiouyæø"
local vowel = "[" .. vowels .. "]"
local consonants = "kptɡgbdfʔsnmŋlrhʋʃʒrjçɦx"
local consonant = "[" .. consonants .. "]"
local diacritics = "̝̞̠̪"
local diacritic = "[" .. diacritics .. "]"

local spelled_consonants = "cšvwxzž"
local spelled_consonant = "[" .. consonants .. spelled_consonants .. "]"
local spelled_vowels = "aäö"
local spelled_vowel = "[" .. vowels .. spelled_vowels .. "]"

local tertiary = "ˌ" -- "tertiary stress", a weaker secondary stress (either rhythmic or in some compound words). is there a better way to represent this?

local stress_indicator = "[ ˈˌ" .. tertiary .. "/-]"
local plosives = "kptbdɡ"

local stress_p = "[ˈˌ" .. tertiary .. "]"
local stress_s = "[ˌ" .. tertiary .. "]"

local replacements_narrow = {
  ["e"] = "e̞",
  ["ø"] = "ø̞",
  ["o"] = "o̞",
  ["t"] = "t̪",
  ["s"] = "s̠"
}

--  This adds letters_phonemes["e"] = "e", letters_phonemes["i"] = "i", etc.
for letter in mw.ustring.gmatch("eiouydhfjklmnprstu", ".") do
  letters_phonemes[letter] = letter
end

--[[ This regex finds the diphthongs in the IPA transcription,
    so that the nonsyllabic diacritic can be added. ]]
-- /_i/ diphthongs can appear in any syllable
local diphthongs_i = {
  "[ɑeouyæø]i"
}
-- /_U/ diphthongs can appear in the initial syllable or later open syllables (no consonantal coda)
local diphthongs_u = {
  "[ɑoei]u",
  "[eiæø]y",
}
-- rising diphthongs can only appear in the initial syllable (of a word, compound word part, etc.)
local diphthongs_rising = {
  "uo",
  "ie",
  "yø",
}

local post_fixes = {
  ["t̪s̠"] = "ts̠",         -- t is alveolar in /ts/
  ["nt̪"] = "n̪t̪",         -- n is dental in /nt/
  ["ˈŋn"] = "ˈɡn",       -- initial <gn> is /gn/
               -- ŋ is short before consonant (by default)
  ["ŋ"..long.."("..consonant..")"] = "ŋ%1",
               -- dissimilation of vowels by sandhi
  ["("..vowel..diacritic.."*"..long.."?)("..stress_s..")%1"] = "%1%2(ʔ)%1"
}

local post_fixes_narrow = {
               -- long j, v after i, u diphthong
  ["(i"..nonsyllabic..")j("..vowel..")"] = "%1j("..long..")%2",
               -- /ʋ/ after /u/ usually realized as /w/
               -- (see Suomi, Toivanen and Ylitalo 2008)
  ["(u"..nonsyllabic..")ʋ("..vowel..")"] = "%1w("..long..")%2",
               -- cleanup
  ["("..stress_s..")%."] = "%1",
               -- sandhi: nm > mm, np > mp, nb > mb, nk > ŋk, ng > ŋg
  ["nm"] = "m" .. long,
  ["n([pb])"] = "m%1",
  ["n("..stress_p.."%s*)([ɡk])"] = "ŋ%1%2",
  ["n("..stress_p.."%s*)([mpb])"] = "m%1%2",
               -- handle potentially long consonants over secondary stresses
  ["("..stress_s..")("..consonant..diacritic.."*)%("..long.."%)"] = "(%2)%1%2",
  ["("..consonant..diacritic.."*)%("..long.."%)("..stress_s..")"] = "%2%1("..long..")",
    ["(ŋ"..diacritic.."*)"..tertiary.."ɡ"] = "%1"..tertiary.."ŋ"
}

function is_light_syllable(syllable)
  return mw.ustring.len(syllable) < 4 and mw.ustring.find(mw.ustring.lower(syllable), "^[" .. "-'’./ " .. "]?" .. spelled_consonant .. "?" .. spelled_vowel .. "$")
end

function has_later_heavy_syllable(hyph, start)
  local stop = math.min(start + lookahead, #hyph - 1)
  for index = start, stop do
    if not is_light_syllable(hyph[index]) then
      return true
    end
  end
  return false
end

-- applied *before* IPA conversion
local function add_secondary_stress(word)
  local hyph = generate_hyphenation(word, true)
  local res = ""
  local last_index = #hyph

  -- find stressed syllables and add secondary stress before each syllable
  for index, syllable in ipairs(hyph) do
    local stressed = false
    local has_symbol = mw.ustring.find(syllable, "^[" .. "-'’./ " .. "ˈˌ" .. tertiary .. "]")

    if has_symbol then
      -- check if symbol indicates stress
      stressed = mw.ustring.find(syllable, "^" .. stress_indicator)
      has_symbol = stressed
    end

    if not stressed then
      if index == 1 then
        stressed = true
      elseif not prev_stress and index < last_index then
        -- shift stress if current syllable light and a heavy syllable occurs later (except as the last syllable)
        stressed = index == last_index - 1 or not is_light_syllable(syllable) or not has_later_heavy_syllable(hyph, index + 1)
      end

      if stressed then
        last_stressed = index
      end
    end

    -- check if next syllable already stressed
    -- if is, do not stress this syllable
    if stressed and index < last_index then
      stressed = stressed and not mw.ustring.find(hyph[index + 1], "^" .. stress_indicator)
    end

    if index > 1 and stressed and not has_symbol then
      res = res .. "-$"
    end
    res = res .. syllable

    prev_stress = stressed
  end

  local noninitial = {}
  local index = 1
  res = mw.ustring.gsub(res, "-([$]?)",
    function (dollar)
      index = index + 1
      noninitial[index] = #dollar > 0
      return #dollar > 0 and tertiary or "-"
    end)

  return res, noninitial
end

local function handle_diphthongs(IPA, strict_initial)
  -- Add nonsyllabic diacritic after last vowel of diphthong.
  for _, diphthong_regex in pairs(diphthongs_i) do
    IPA = mw.ustring.gsub(IPA, diphthong_regex, "%0" .. nonsyllabic)
  end

  local only_initial = stress_indicator .. "[^" .. vowels .. "]*"
  if strict_initial then
    only_initial = "^[^" .. vowels .. "]*"
  end

  for _, diphthong_regex in pairs(diphthongs_rising) do
    -- initial syllables
    IPA = mw.ustring.gsub(IPA, only_initial .. diphthong_regex, "%0" .. nonsyllabic)
  end

  for _, diphthong_regex in pairs(diphthongs_u) do
    -- initial syllables
    IPA = mw.ustring.gsub(IPA, only_initial .. diphthong_regex, "%0" .. nonsyllabic)

    local open_noninitial =
      function(diphthong, after)
        if mw.ustring.find(after, "^" .. consonant .. diacritic .. "*" .. vowel) then
          -- consonant after diphthong
          -- must be followed by vowel so that it's part of the
          -- following syllable, else it's in this syllable
          -- and thus this syllabie is closed

          return diphthong .. nonsyllabic .. after
        elseif mw.ustring.find(after, "^" .. consonant) then
          -- consonant after diphthong
          -- must be in this syllable

          return diphthong .. after
        end
        -- no consonant after diphthong => open
        return diphthong .. nonsyllabic .. after
      end

    -- open non-initial syllables
    IPA = mw.ustring.gsub(IPA, "(" .. diphthong_regex .. ")([^" .. nonsyllabic .. "].+)", open_noninitial)
    IPA = mw.ustring.gsub(IPA, "(" .. diphthong_regex .. ")($)", open_noninitial)
  end

  return IPA
end

local function IPA_word(term, is_narrow, has_initial)
  local rest = term
  local phonemes = {}

  while mw.ustring.len(rest) > 0 do
    -- Find the longest string of letters that matches a recognised sequence in the list
    local longestmatch = ""

    for letter, phoneme in pairs(letters_phonemes) do
      if mw.ustring.sub(rest, 1, mw.ustring.len(letter)) == letter and mw.ustring.len(letter) > mw.ustring.len(longestmatch) then
        longestmatch = letter
      end
    end

    -- Convert the string to IPA
    if mw.ustring.len(longestmatch) > 0 then
      table.insert(phonemes, letters_phonemes[longestmatch])
      rest = mw.ustring.sub(rest, mw.ustring.len(longestmatch) + 1)
    else
      table.insert(phonemes, mw.ustring.sub(rest, 1, 1))
      rest = mw.ustring.sub(rest, 2)
    end
  end

  local result = table.concat(phonemes)

  if is_narrow then
    -- articulation of h
    result = mw.ustring.gsub(result, "(.?)h(.?)",
      function (before, after)
        local h
        if after ~= "" then
          if before ~= "" and vowels:find(before) then
            if consonants:find(after) then
              -- vihma, yhtiö
              if before == "i" or before == "y" then
                h = "ç"
              -- mahti, kohme, tuhka
              elseif before == "ɑ" or before == "o" or before == "u" then
                h = "x"
              end
            -- maha
            elseif vowels:find(after) then
              h = "ɦ"
            end
          end
        end

        if h then
          return before .. h .. after
        end
      end)

    -- double letter replacement and diphthongs must be handled earlier here
    result = mw.ustring.gsub(result, "(%a)%1", "%1" .. long)
    if has_initial then
      result = handle_diphthongs(result, true)
    end

    for letter, phoneme in pairs(replacements_narrow) do
      result = mw.ustring.gsub(result, letter, phoneme)
    end
  end

  return result
end

function IPA_wordparts(term, is_narrow)
  term = mw.ustring.lower(term)
  local notinitial = {} -- true if the component is not an initial component
  local hyphenstress = "ˌ" -- secondary by default
  local is_prefix = false
  local is_suffix = false

  if mw.ustring.find(term, "%/") then
    hyphenstress = tertiary -- tertiary if we have slashes
  end

  if is_narrow then
    term, notinitial = add_secondary_stress(term)
  end

  local found
  term, found = mw.ustring.gsub(term, "^%-+", "")
  is_suffix = found > 0
  term, found = mw.ustring.gsub(term, "%-+$", "")
  is_prefix = found > 0

  -- make sure we keep slashes to figure out if secondary or tertiary
  term = mw.ustring.gsub(term, "%/", "-%1")
  local wordparts = mw.text.split(term, "-", true)

  for key, val in ipairs(wordparts) do
    local stress = key > 1 and hyphenstress or "ˈ"
    local part = val

    if mw.ustring.find(part, "^%/") then
      stress = "ˌ" -- always secondary
      part = part:sub(2)
    end

    wordparts[key] = stress .. IPA_word(part, is_narrow, not notinitial[key])
  end

  IPA = table.concat(wordparts, "")

  if is_narrow then
    -- handle * in narrow transcription
    IPA = mw.ustring.gsub(IPA, "ˣ(%s*)("..stress_p.."?)((.?)" .. diacritic .. "*)",
      function (space, stress, after, potential_consonant)
        if potential_consonant == "" then
          return space .. stress .. "(ʔ)" .. after
        elseif consonants:find(potential_consonant) then
          if #space > 0 or #stress > 0 then
            local amark = ""
            if plosives:find(mw.ustring.sub(after, 1, 1)) then
              amark = unreleased
            end
            return after .. amark .. space .. stress .. after
          else
            return space .. after .. long
          end
        else
          return space .. stress .. "ʔ" .. after
        end
      end)
  else
    --  Replace double letters (vowels or consonants) with single letter plus length sign.
    IPA = gsub(IPA, "(%a)%1", "%1" .. long)
    IPA = handle_diphthongs(IPA, false)
  end

  for letter, phoneme in pairs(post_fixes) do
    IPA = mw.ustring.gsub(IPA, letter, phoneme)
  end

  if is_narrow then
    for letter, phoneme in pairs(post_fixes_narrow) do
      IPA = mw.ustring.gsub(IPA, letter, phoneme)
    end
  end

  if is_prefix then
    IPA = IPA .. "-"
  end
  if is_suffix then
    IPA = "-" .. IPA
  end

  return IPA
end

function export.IPA (arxframent)
  local pron = ''
  local IPA_input = mw.title.getCurrentTitle().text -- {{PAGENAME}}
  arxourown = arxframent.args
  local strover = arxourown["word"] -- use only if needed
  if (type(strover)=="string") then
    if (string.len(strover)~=0) then
      IPA_input = strover -- override only if parameter is non-empty (due to forwarding)
    end--if
  end--if
  pron = "/" .. IPA_wordparts(IPA_input, false) .. "/, [" .. IPA_wordparts(IPA_input, true) .. "]"
  return pron
end

return export