<h1>SRT to TranScript Markers</h1>

<p>See <a href="https://www.extremraym.com/en/wavesurfer-wp-markers/">Interactive Marker and Transcripts for WaveSurfer-WP</a> article for usage practice. Also work with YoutTube and SoundCloud.</p>

<hr>

<h2>Input</h2>

<p>
  <label for="fileInput">SRT File:</label>
  <input type="file" id="fileInput" accept=".srt">
</p>

<p>
  <label for="id">ID:</label>
  <input id="id" type="number" min="1" step="1" value="1">
</p>

<p>Class name:<br>
  <input id="wavesurfer-marker" type="radio" name="class" value="youtube-marker" checked>
  <label for="wavesurfer-marker">youtube-marker</label><br>
  <input id="youtube-marker" type="radio" name="class" value="wavesurfer-marker">
  <label for="youtube-marker">wavesurfer-marker</label>
</p>

<p>Autoplay:<br>
  <input id="autoplay-true" type="radio" name="autoplay" value="true" checked>
  <label for="autoplay-true">True</label><br>
  <input id="autoplay-false" type="radio" name="autoplay" value="valse">
  <label for="autoplay-false">False</label>
</p>

<p>Subtitles Merging:<br>
  <input id="merge-true" type="radio" name="merge" value="true" checked>
  <label for="merge-true">True</label> (Transcript style: merge subtitles from the same sentances into one paragraph).<br>
  <input id="merge-false" type="radio" name="merge" value="valse">
  <label for="merge-false">False</label> (Lyrics style, create one paragraph per subtitles).
</p>

<button id="generate">Generate Output</button>

<hr>

<h2>Output</h2>

<button class="copy-button" data-clipboard-target="#output">Copy Output</button>

<div class="output">
  <div class="output-code">
    <pre><code id="output" class="language-markup">Output: Empty</code></pre>
  </div>
  <div class="output-text-section">
    <div id="output-text">Output Text</div>
  </div>
</div>

<hr>

<div class="changelog">
  
  <h2>Changelog</h2>
  
  <ul>
    <li>v2017-01-20: YouTube Markers. Text output. Generate Button.</li>
    <li>v2016-12-20: Initial Release</li>
  </ul>

</div>
html {
  font-family: Helvetica, Arial, sans-serif;
  line-height: 1.5em;
  background: #EEE;
  padding: 0em 1em;
  margin: 1em auto;
  min-height: 300px;
}

h1 {
  margin-top: 0;
}

img {
  max-width: 100%;
}

#fileDisplayArea {
  margin-top: 2em;
}

button {
  margin-top: 1em;
}

button:hover,
input:hover,
label:hover {
  cursor: pointer;
}

.output {
  display: -ms-flex;
  display: -webkit-flex;
  display: flex;
}

.output > div {
  width: 50%;
  padding: 10px;
}

* {
  box-sizing: border-box;
}

div.output-text-section {
  padding: 1em;
}

#output-text {
  border: 1px solid black;
  padding: 1em;
  background: white;
}

.wavesurfer-marker:hover,
.youtube-marker:hover {
  background-color: yellow;
  cursor: pointer;
}

button {
  padding: 0.5em;
}

#output span.punctuation {
  color: #e2777a;
}

#fileInput {
  background: #2D2D2D;
  color: white;
  color: #ccc;
  font-family: Consolas,Monaco,'Andale Mono','Ubuntu Mono',monospace;
  text-align: left;
  white-space: pre;
  word-spacing: normal;
  word-break: normal;
  word-wrap: normal;
  line-height: 1.5;
  -moz-tab-size: 4;
}
/* COPY OUTPUT BUTTON*/
var clipboard = new Clipboard('.copy-button');

/* UPLOAD FILE FUNCTIONS */
window.onload = function() {
  var fileInput = document.getElementById('fileInput');
  var fileDisplayArea = document.getElementById('output');
  var generateButton = document.getElementById('generate');

  fileInput.addEventListener('change', function(e) {
    GenerateOutput(fileInput, fileDisplayArea);
  });
  generateButton.onclick = function(e) {
    GenerateOutput(fileInput, fileDisplayArea);
  };
}

function GenerateOutput(fileInput, fileDisplayArea) {
  var file = fileInput.files[0];
  var textType = /text.srt/;
  var re = /(?:\.([^.]+))?$/;
  var ext = re.exec(file.name)[1]; 

  if (ext === 'srt') {
  //if (file.type.match(textType)) {
    var reader = new FileReader();

    reader.onload = function(e) {

      var id = document.getElementById('id').value;
      var name = document.querySelector('input[name="class"]:checked').value;
      var autoplay_false = document.getElementById('autoplay-false');
      var merge_false = document.getElementById('merge-false');

      var output = '';
      var srt = new Srt(reader.result);

      var new_line = 0;

      srt.lines.forEach(function(line) {

        var start = line.start.text;
        start = start.replace(',', '.');
        var end = line.end.text;
        end = end.replace(',', '.');

        var data_start = ' data-start="' + start + '"';
        var data_end = ' data-end="' + end + '"';
        var line_subtitle = line.subtitle.replace(/(\n)(.+)/gm, " $2"); // Line break with multiline subtitles
        line_subtitle = line_subtitle.replace(/(\r\n|\n|\r)/gm, ""); // Will create break lines
        var data_autoplay = '';
        if (autoplay_false.checked) {
          data_autoplay = ' data-autoplay="false"';
        }

        var data_id = '';
        if (id > 1) {
          var data_id = ' data-id="' + id + '"';
        }

        var subtitle = '<span class="' + name + '"' + data_id + data_autoplay + data_start + data_end + '>' + line_subtitle + '</span>';
        var lastChar = line_subtitle.substr(line_subtitle.length - 1);

        if (merge_false.checked) {
          subtitle = '<p>' + subtitle + '</p>\n';
        } else {
          if (new_line === 0) {
            subtitle = '<p>' + subtitle;
            new_line = 1;
          } else {
            subtitle = ' ' + subtitle;
          }
          if (lastChar === '.' || lastChar === '?' || lastChar === '!' || lastChar === '"' || lastChar === ')') {
            subtitle = subtitle + '</p>\n';
            new_line = 0;
          }
        }

        output = output + subtitle;
      });
      var type = '';
      if (name === 'wavesurfer-marker') {
        type = 'wavesurfer';
      } else {
        type = 'youtube';
      }
      output = '<div id="' + type + '-transcript-#' + id + '" class="' + type + '-transcript">\n' + output + '</div>';

      var result = Prism.highlight(output, Prism.languages.markup);

      fileDisplayArea.innerHTML = result;

      var container = document.getElementById('output-text');
      container.innerHTML = output;
    }

    reader.readAsText(file, 'UTF-8'); //or ISO-8859-1

  } else {
    fileDisplayArea.innerText = "File not supported!"
  }
}

/* SRT PARSING */
// thanks https://github.com/fuermosi777/srtjs !!
var Srt = function(srtContent) {
  this.lines = [];
  this.init();
};

var Srt = function(srtContent) {
  if (!srtContent) return;
  this.srtContent = srtContent;
  this.lines = [];
  this.init();
};

Srt.prototype = {
  init: function() {
    this.parse();
  },
  parse: function() {
    var lines = this.srtContent.split('\n\r\n'); // Only \n\n if from code editor
    if( lines.length === 1 ) {
      lines = this.srtContent.split('\n\n');
    }
    for (var i = 0; i < lines.length; i++) {
      var line = lines[i];
      var origin = line.split('\n');
      if (origin.length >= 3) {
        // counter
        var counter = origin[0];
        // time
        var timeLine = origin[1];
        //var startText = timeLine.match(/^[0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9][0-9][0-9]/)[0];
        //var endText = timeLine.match(/\s[0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9][0-9][0-9]/)[0].replace(' ', '');
        var startText = timeLine.match(/^[0-9]*:[0-9]*:[0-9]*[(,|\.)]*[0-9]*/)[0];
        var endText = timeLine.match(/\s[0-9]*:[0-9]*:[0-9]*[(,|\.)]*[0-9]*/)[0].replace(' ', '');
        var startDate = this.stringToDate(startText);
        var endDate = this.stringToDate(endText);
        // subtitle 
        var subtitle = '';
        for (var j = 2; j < origin.length; j++) {
          subtitle = subtitle + origin[j] + '\n';
        }
        // push to list
        this.lines.push({
          counter: counter,
          subtitle: subtitle,
          start: this.dateToObject(startDate),
          end: this.dateToObject(endDate)
        });
      }
    }
  },
  // helper functions
  // not main methods
  // used to make everything simple
  stringToDate: function(string) {
    // turn string format like "00:12:42,321" to date
    var firstColonIndex = string.indexOf(':');
    var secondColonIndex = this.nthChar(string, ':', 2);
    var commaIndex = string.indexOf(',');
    var hour = string.substring(0, firstColonIndex);
    var minute = string.substring(firstColonIndex + 1, secondColonIndex);
    var second = string.substring(secondColonIndex + 1, commaIndex);
    var msecond = string.substring(commaIndex + 1);
    return new Date(1970, 1, 1, hour, minute, second, msecond);
  },
  dateToObject: function(date) {
    return {
      text: (date.getHours() < 10 ? '0' : '') + date.getHours() + ':' + (date.getMinutes() < 10 ? '0' : '') + date.getMinutes() + ':' + (date.getSeconds() < 10 ? '0' : '') + date.getSeconds() + ',' + (date.getMilliseconds() < 10 ? '00' : '') + ((date.getMilliseconds() < 100 && date.getMilliseconds() >= 10) ? '0' : '') + date.getMilliseconds(),
      time: date,
      hours: date.getHours(),
      minutes: date.getMinutes(),
      seconds: date.getSeconds(),
      milliseconds: date.getMilliseconds()
    }
  },
  nthChar: function(string, character, n) {
    // find the index of the nth char in string
    var count = 0,
      i = 0;
    while (count < n && (i = string.indexOf(character, i) + 1)) {
      count++;
    }
    if (count == n) return i - 1;
    return NaN;
  }
};

// Text Sample
/*
var text = `1
00:20:41,150 --> 00:20:45,109
- How did he do that?
- Made him an offer he couldn't refuse.

2
00:20:45,150 --> 00:20:50,109
This is the second subtitle.`;
*/

External CSS

  1. https://cdnjs.cloudflare.com/ajax/libs/prism/1.5.1/themes/prism-tomorrow.min.css

External JavaScript

  1. https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.5.16/clipboard.min.js
  2. https://cdnjs.cloudflare.com/ajax/libs/prism/1.5.1/prism.min.js