Pen Settings

HTML

CSS

CSS Base

Vendor Prefixing

Add External Stylesheets/Pens

Any URLs added here will be added as <link>s in order, and before the CSS in the editor. You can use the CSS from another Pen by using its URL and the proper URL extension.

+ add another resource

JavaScript

Babel includes JSX processing.

Add External Scripts/Pens

Any URL's added here will be added as <script>s in order, and run before the JavaScript in the editor. You can use the URL of any other Pen and it will include the JavaScript from that Pen.

+ add another resource

Packages

Add Packages

Search for and use JavaScript packages from npm here. By selecting a package, an import statement will be added to the top of the JavaScript editor for this package.

Behavior

Auto Save

If active, Pens will autosave every 30 seconds after being saved once.

Auto-Updating Preview

If enabled, the preview panel updates automatically as you code. If disabled, use the "Run" button to update.

Format on Save

If enabled, your code will be formatted when you actively save your Pen. Note: your code becomes un-folded during formatting.

Editor Settings

Code Indentation

Want to change your Syntax Highlighting theme, Fonts and more?

Visit your global Editor Settings.

HTML

              
                <p>Missing character identifier: <input type="text" id="identifierElm" value="?" required pattern="[ -~]" />(← This represents one missing byte)</p>
<p lang="ja">
  漢字:
  <input type="checkbox" name="jis1cb" id="jis1cb" checked />
  <label for="jis1cb">JIS 第一水準漢字</label>
  <input type="checkbox" name="jis2cb" id="jis2cb" checked />
  <label for="jis2cb">JIS 第二水準漢字</label>
  <input type="checkbox" name="jis3cb" id="jis3cb" checked />
  <label for="jis3cb">JIS 第三水準漢字</label>
  <input type="checkbox" name="jis4cb" id="jis4cb" checked />
  <label for="jis4cb">JIS 第四水準漢字</label>
</p>
<p id="identifierElmIssue">Missing character identifier has to be one single ASCII character.</p>
<p>Input: (Convert one &#xfffd; into <button id="conv1">one missing byte</button> or <button id="conv2">two missing bytes</button>) </p>
<textarea name="inputElm" id="inputElm" cols="30" rows="10">譁?蟄怜喧縺代@縺滓律譛ャ隱槭?ョ繝?繧ュ繧ケ繝医?ョ萓?</textarea>
<p>Output:</p>
<pre id="outputElm" lang="ja">(斂斃文斈斉斌斎斐斑斗料斛斜斟)字化けした日本語(の)(ダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミム)キスト(の)(侃來侈侊例侍侏侑侒侔侖侘侚供依侠)</pre>
              
            
!

CSS

              
                pre {
  white-space: pre-wrap;
}
#identifierElmIssue {
  display: none;
}

:root:has(#identifierElm:invalid) #identifierElmIssue {
  display: block;
  color: red;
}

              
            
!

JS

              
                import {
  TextEncoder,
  TextDecoder,
  EncodingIndexes,
  getEncoding
} from "https://esm.sh/text-decoding";

function encode(text, codec) {
  return new TextEncoder(codec, {
    NONSTANDARD_allowLegacyEncoding: true
  }).encode(text);
}
function decode(byteArr, codec) {
  return new TextDecoder(codec).decode(byteArr);
}

let identifier = "?";
let identifierByte = 63;
identifierElm.addEventListener("input", (e) => {
  identifier = e.target.value;
  identifierByte = encode(identifier, "utf8")[0];
});

let jis1 = true,
  jis2 = true,
  jis3 = true,
  jis4 = true;

function guessSize(byteArr) {
  if (!byteArr.length) throw new Error("bytearr is empty!");
  if (byteArr[0] !== identifierByte) {
    if ((byteArr[0] & 0b10000000) === 0) return 1;
    // ASCII byte
    else if ((byteArr[0] & 0b11000000) === 0b10000000)
      throw new Error(`Starting from mid byte! ${byteArr[0].toString(16)}`);
    else if ((byteArr[0] & 0b11100000) === 0b11000000) return 2;
    else if ((byteArr[0] & 0b11110000) === 0b11100000) return 3;
    else if ((byteArr[0] & 0b11111000) === 0b11110000) return 4;
    throw new Error(`Unidentified byte pattern ${byteArr[0]}`);
  } else {
    let ptr = 1;
    while (
      (byteArr[ptr] === identifierByte ||
        (byteArr[ptr] & 0b11000000) === 0b10000000) &&
      ptr + 1 < byteArr.length
    )
      ptr++;
    return ptr;
  }
}

function possibleChoices(byteArr, offset) {
  let prefix = [byteArr[0]];
  if (byteArr[0] === identifierByte) {
    let start = 0b10000000,
      end = 0b10111111;
    if (offset === 0) {
      if (byteArr.length === 1) {
        start = 0b00100000;
        end = 0b01111111;
      } else if (byteArr.length === 2) {
        start = 0b11000000;
        end = 0b11011111;
      } else if (byteArr.length === 3) {
        start = 0b11100000;
        end = 0b11101111;
      } else if (byteArr.length === 4) {
        start = 0b11110000;
        end = 0b11110111;
      } else {
        throw new Error(
          `invalid permute length (${byteArr.length}): ${byteArr}`
        );
      }
    }
    prefix = new Array(end - start + 1).fill(start).map((i, idx) => i + idx);
  }
  if (byteArr.length === 1) return prefix.map((v) => new Uint8Array([v]));
  return prefix
    .map((p) =>
      possibleChoices(byteArr.slice(1), offset + 1).map(
        (c) => new Uint8Array([p, ...c])
      )
    )
    .flat();
}

function solveBytes(byteArr, floatingBytes, prevMissing) {
  try {
    let choices = possibleChoices(byteArr, 0);

    // decoded UTF-8 must be one char
    choices = choices.filter((ba) => decode(ba, "utf8").length === 1);

    choices = choices.filter((ba) => {
      const d = encode(decode(ba, "utf8"), "shift_jis");

      if (d.length === 2) {
        // keep choices where encode_shiftjis(decode_utf8(i)) is valid
        // i.e. char is encoded in Shift-JIS
        if (d[0] === 128 && d[1] === 63) return false; // [128, 63] represents unencoded char

        // JIS level 1 kanji
        if (
          (d[0] === 0x88 && 0x9f <= d[1] && d[1] <= 0xfc) ||
          (0x89 <= d[0] && d[0] <= 0x97 && 0x40 <= d[1] && d[1] <= 0xfc) ||
          (d[0] === 0x98 && 0x40 <= d[1] && d[1] <= 0x9e)
        )
          return jis1;

        if (
          (d[0] === 0x98 && 0x9f <= d[1] && d[1] <= 0xfc) ||
          (0x99 <= d[0] && d[0] <= 0xe9 && 0x40 <= d[1] && d[1] <= 0xfc) ||
          (d[0] === 0xea && 0x40 <= d[1] && d[1] <= 0xa4)
        )
          return jis2;

        const twoByte = (d[0] << 8) + d[1];

        if (
          (0x879f <= twoByte && twoByte <= 0x889e) ||
          (0x9873 <= twoByte && twoByte <= 0x989e) ||
          (0xeaa5 <= twoByte && twoByte <= 0xeffc)
        )
          return jis3;
        if (0xf040 <= twoByte && twoByte <= 0xfcf4) return jis4;
      }
      return true;
    });

    // This is only valid if previous done byte is a confirmed byte
    if (!prevMissing) {
      // When re-encoded with prepending floating bytes, the outcome should have \ufffd
      // i.e. resulting mojibake should not be conclusive
      choices = choices.filter((ba) =>
        decode(new Uint8Array([...floatingBytes, ...ba]), "shift_jis")
          .replaceAll("\x80", "\ufffd") // text-decoding somehow allowed 0x80 to be doceded to unicode U+0080 which \x80 is not a valid Shift-JIS byte
          .includes("\ufffd")
      );
    }
    return "(" + choices.map((ba) => decode(ba, "utf8")).join("") + ")";
  } catch (e) {
    return `(error: ${e}, ${decode(byteArr, "utf8")})`;
  }
}

function getFloatingBytes(byteArr) {
  const decoded = decode(byteArr, "shift_jis").replaceAll("\ufffd", "");
  let confirmedBytes = encode(decoded, "shift_jis");
  return byteArr.slice(confirmedBytes.length);
}

function mainDecode(input) {
  // try plain decode
  const plainDecode = decode(encode(input, "shift_jis", "utf8"));
  if (!plainDecode.includes("\ufffd")) {
    return plainDecode;
  }

  // need to guess bytes
  let output = "";
  let pendingBytes = encode(input, "shift_jis", "utf8");
  let doneBytes = new Uint8Array([]);
  while (pendingBytes.length) {
    // purge decodable bytes first
    const decoded = decode(pendingBytes, "utf8");
    const failurePos = decoded.startsWith("?") ? 0 : decoded.indexOf("\ufffd");
    if (failurePos < 0) {
      // all remaining text is safe
      output += decoded;
      pendingBytes = new Uint8Array([]);
    } else {
      const safeSeg = decoded.substring(0, failurePos);
      const safeBytes = encode(safeSeg, "utf8").length;
      output += safeSeg;
      doneBytes = new Uint8Array([
        ...doneBytes,
        ...pendingBytes.slice(0, safeBytes)
      ]);
      pendingBytes = pendingBytes.slice(safeBytes);

      // guess next char size
      const size = guessSize(pendingBytes);
      const floatingBytes = getFloatingBytes(doneBytes);
      const prevMissing =
        doneBytes.length > 0 &&
        doneBytes[doneBytes.length - 1] === identifierByte;
      output += solveBytes(
        pendingBytes.slice(0, size),
        floatingBytes,
        prevMissing
      );
      doneBytes = new Uint8Array([
        ...doneBytes,
        ...pendingBytes.slice(0, size)
      ]);
      pendingBytes = pendingBytes.slice(size);
    }
  }
  return output;
}

let input = "譁?蟄怜喧縺代@縺滓律譛ャ隱槭?ョ繝?繧ュ繧ケ繝医?ョ萓?";
function solve() {
  outputElm.innerText = input
    .split("\n")
    .map((v) => {
      try {
        return mainDecode(v);
      } catch (e) {
        return `error: ${e} (${v}) (${decode(encode(v, "shift_jis", "utf8"))})`;
      }
    })
    .join("\n");
}
inputElm.addEventListener("input", (e) => {
  input = e.target.value;
  solve();
});

conv1.addEventListener("click", () => {
  inputElm.value = inputElm.value.replaceAll("\ufffd", identifier);
  input = inputElm.value;
  solve();
});

conv2.addEventListener("click", () => {
  inputElm.value = inputElm.value.replaceAll("\ufffd", identifier + identifier);
  input = inputElm.value;
  solve();
});

jis1cb.addEventListener("change", (evt) => {
  jis1 = evt.target.checked;
  solve();
});
jis2cb.addEventListener("change", (evt) => {
  jis2 = evt.target.checked;
  solve();
});
jis3cb.addEventListener("change", (evt) => {
  jis3 = evt.target.checked;
  solve();
});
jis4cb.addEventListener("change", (evt) => {
  jis4 = evt.target.checked;
  solve();
});

              
            
!
999px

Console