Pen Settings

HTML

CSS

CSS Base

Vendor Prefixing

Add External Stylesheets/Pens

Any URL's added here will be added as <link>s in order, and before the CSS in the editor. If you link to another Pen, it will include the CSS from that Pen. If the preprocessor matches, it will attempt to combine them before processing.

+ add another resource

JavaScript

Babel includes JSX processing.

Add External Scripts/Pens

Any URL's added here will be added as <script>s in order, and run before the JavaScript in the editor. You can use the URL of any other Pen and it will include the JavaScript from that Pen.

+ add another resource

Packages

Add Packages

Search for and use JavaScript packages from npm here. By selecting a package, an import statement will be added to the top of the JavaScript editor for this package.

Behavior

Save Automatically?

If active, Pens will autosave every 30 seconds after being saved once.

Auto-Updating Preview

If enabled, the preview panel updates automatically as you code. If disabled, use the "Run" button to update.

Format on Save

If enabled, your code will be formatted when you actively save your Pen. Note: your code becomes un-folded during formatting.

Editor Settings

Code Indentation

Want to change your Syntax Highlighting theme, Fonts and more?

Visit your global Editor Settings.

HTML

              
                <html>

<head>
  <meta charset="UTF-8">
  <title>Lecteur OCR sur navigateur</title>

</head>

<body>
  <select id="langsel">
<option value='eng'>Anglais</option>
<option value='fra' selected>Français</option>
</select>

  <input type="file" id="pdf-upload">
  <div id="progresTotal"></div>
  <hr class="hidden-print" />
  <div id="log"></div>
  <div id="apercu"></div>
  <div id="recap" class="hidden-print">
    <ul id="liste"></ul>
  </div>

  <div id="liensPages"></div>

  <div id="pdfPages" class="">
    <h3 class="hidden-print">Aperçu du document</h3>
  </div>
</body>

<script src="https://cdn.rawgit.com/naptha/tesseract.js/1.0.10/dist/tesseract.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.0.550/pdf.min.js"></script>


</html>
              
            
!

CSS

              
                    #log>div {
      color: #313131;
      border-top: 1px solid #dadada;
      padding: 9px;
      display: flex;
    }

    #log>div:first-child {
      border: 0;
    }

    .hidden {
      visibility: hidden;
    }

    .status {
      min-width: 250px;
    }

    #log {
      border: 1px solid #dadada;
      padding: 10px;
      margin-top: 20px;
      min-height: 100px;
    }

    body {
      font-family: sans-serif;
      margin: 30px;
    }

    progress {
      display: block;
      width: 100%;
      transition: opacity 0.5s linear;
    }

    progress[value="1"] {
      opacity: 0.5;
    }
              
            
!

JS

              
                
  let progresTotal = 0
  const afficheProgres = (progres) => {
    let cont = document.querySelector('#progresTotal')
    let pro = document.createElement('progress')
    cont.innerHTML = ""
    pro.value = progres * 100
    pro.max = 100
    pro.innerText = `${progres * 100} %`
    cont.appendChild(pro)
  }

  function progressUpdate(packet) {
    const log = document.getElementById('log');
    if (log.firstChild && log.firstChild.status === packet.status) {
      if ('progress' in packet) {
        let progress = log.firstChild.querySelector('progress')
        progress.value = packet.progress
      }
    } else {
      let line = document.createElement('div');
      line.status = packet.status;
      let status = document.createElement('div')
      status.className = 'status'
      status.appendChild(document.createTextNode(packet.status))
      line.appendChild(status)
      if ('progress' in packet) {
        const progress = document.createElement('progress')
        progress.value = packet.progress
        progress.max = 1
        line.appendChild(progress)
      }
      if (packet.status == 'done') {
        const pre = document.createElement('pre')
        pre.appendChild(document.createTextNode(packet.data.text))
        line.innerHTML = ''
        line.appendChild(pre)
      }
      log.insertBefore(line, log.firstChild)
    }
  }
  const recognizeFile = (file, pages = 1) => {
    Tesseract.detect(file).then(res => console.log(res))
    Tesseract.recognize(file, {
        lang: document.querySelector('#langsel').value,
        textord_min_xheight: 8,
        tessedit_dump_pageseg_images: 1,
      })
      .progress(function(packet) {
        // console.log(packet)
        progressUpdate(packet)
      })
      .then(function(data) {
        console.log("Data", data)
        progressUpdate({
          status: 'done',
          data: data
        })
      /*
        data.words.forEach(w => {
          let box = w.bbox
          file.strokeWidth(2)
          file.strokeStyle('red')
          file.strokeRect(box.x0, box.y0, box.x1 - box.x0, box.y1 - box.y0)
          file.beginPath()
          file.moveTo(w.baseline.x0, w.baseline.y0)
          file.lineTo(w.baseline.y0, w.baseline.y1)
          file.strokeStyle = 'green'
          file.stroke()
        })*/
      })
  }
  let refered = new Array()
  document.getElementById("pdf-upload").addEventListener("change", function(e) {
    document.querySelector("#pdfPages").innerHTML = '<h3 class="hidden-print">Aperçu du document</h3>'
    let file = e.target.files[0]
    if (file.type == "image/jpeg" || file.type == "image/tiff") {
      recognizeFile(file)
      return
    }
    if (file.type != "application/pdf") {
      console.error('Incorrect file type', file.type)
      return
    }
    var fileReader = new FileReader();
    fileReader.onload = function() {
      let typedarray = new Uint8Array(this.result);
      pdfjsLib.getDocument(typedarray).then(pdf => {
        // you can now use *pdf* here
        addToDiv("Le fichier contient " + pdf.numPages + " page(s).")
        if (pdf.numPages > 10) throw new Error('La détection des PDF sont limités à 10 pages. Merci de tronquer le document')
        for (let i = 1; i <= pdf.numPages; i++) {
          pdf.getPage(i).then(page => {
            console.log(page)
            let vp = page.getViewport(4)
            let canvas = document.createElement("canvas")
            canvas.id = "pdfPage_" + i
            canvas.height = vp.height
            canvas.width = vp.width
            let ctx = canvas.getContext('2d')
            page.render({
              canvasContext: ctx,
              viewport: vp
            }).then(() => {
              recognizeFile(ctx, pdf.numPages)
            })
            document.querySelector("#pdfPages").appendChild(canvas)
          });
        }
      }).catch(err => {
        if (err) alert(err)
      })
    };
    fileReader.readAsArrayBuffer(file);
  })
  const addToDiv = (content) => {
    document.querySelector("#liste").innerHTML += "<li>" + content + "</li>"
  }
              
            
!
999px

Console