Last active
July 3, 2022 12:45
-
-
Save hai-vr/b340f9a46952640f81efe7f02da6bdf6 to your computer and use it in GitHub Desktop.
SubtitleManager.cs - Shared to twitter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// MIT License | |
// | |
// Copyright (c) 2022 Haï~ (@vr_hai github.com/hai-vr) | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in all | |
// copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
// SOFTWARE. | |
using JetBrains.Annotations; | |
using TMPro; | |
using UdonSharp; | |
using UnityEngine; | |
using UnityEngine.UI; | |
using VRC.SDK3.Video.Components; | |
using VRC.SDKBase; | |
using VRC.Udon.Common; | |
[UdonBehaviourSyncMode(BehaviourSyncMode.Manual)] | |
public class SubtitleManager : UdonSharpBehaviour | |
{ | |
[UdonSynced] | |
private string syncedSubtitlesChunk; | |
[UdonSynced] | |
private int syncIdentifier; | |
private string _syncedSubtitlesRaw = ""; | |
private float[] _start = new float[0]; | |
private float[] _end = new float[0]; | |
private string[] _subtitleText = new string[0]; | |
private int _subtitleCount; | |
private bool enableSubtitleInjection = true; | |
private int _trackingIndex; | |
private float _trackingTime; | |
private string _previousText; | |
public Text syncField; | |
public Text proxyField; | |
public Toggle toggleGlobal; | |
public Toggle toggleSubtitles; | |
public Toggle toggleDyslexia; | |
public TextMeshProUGUI[] subtitleOutputs; | |
public TextMeshProUGUI[] markOutputs; | |
public GameObject[] subtitleGameObjects; | |
public VRCUnityVideoPlayer videoPlayer; | |
public GameObject inputField; | |
public GameObject inputFieldTemplate; | |
private bool _global = true; | |
public GameObject normalSubtitles; | |
public GameObject dyslexiaSubtitles; | |
[UdonSynced] | |
private int _chunkCount; | |
[UdonSynced] | |
private int _chunkSync; | |
private int _localChunkSync; | |
private int _lastSyncIdentifier; | |
private bool _dyslexia; | |
private void _TransmitSubtitles() | |
{ | |
Debug.Log($"Transmitting subtitles..."); | |
// The number 8192 is arbitrary. | |
_chunkCount = _syncedSubtitlesRaw.Length / 8192 + 1; | |
_chunkSync = 0; | |
syncField.text = $"Sync 0 / {_chunkCount}"; | |
RequestSerialization(); | |
} | |
public override void OnPlayerJoined(VRCPlayerApi player) | |
{ | |
if (Networking.IsOwner(gameObject)) | |
{ | |
_TransmitSubtitles(); | |
} | |
} | |
public override void OnPreSerialization() | |
{ | |
Debug.Log($"About to send chunk {_chunkSync}, total {_chunkCount}"); | |
_RedefineChunk(); | |
} | |
public override void OnPostSerialization(SerializationResult result) | |
{ | |
Debug.Log($"Sent chunk {_chunkSync}, total {_chunkCount}"); | |
syncField.text = $"Sync {_chunkSync + 1} / {_chunkCount}"; | |
_chunkSync++; | |
if (_chunkSync < _chunkCount) | |
{ | |
Debug.Log("Will send another chunk..."); | |
SendCustomEventDelayedFrames(nameof(_SendNextChunk), 1); | |
} | |
} | |
public void _SendNextChunk() | |
{ | |
RequestSerialization(); | |
} | |
public override void OnDeserialization() | |
{ | |
syncField.text = $"Sync {_chunkSync + 1} / {_chunkCount}"; | |
Debug.Log($"Received chunk {_chunkSync}, total {_chunkCount}"); | |
if (_chunkSync == 0) | |
{ | |
Debug.Log($"Acquired chunk {_chunkSync}"); | |
_localChunkSync = 0; | |
_syncedSubtitlesRaw = syncedSubtitlesChunk; | |
} | |
else if (_localChunkSync == _chunkSync - 1) | |
{ | |
Debug.Log($"Acquired chunk {_chunkSync}"); | |
_localChunkSync++; | |
_syncedSubtitlesRaw += syncedSubtitlesChunk; | |
} | |
else | |
{ | |
Debug.Log($"Rejected chunk {_chunkSync} because local chunk is {_localChunkSync}"); | |
} | |
if (_localChunkSync == _chunkCount - 1) | |
{ | |
Debug.Log($"Received all chunks"); | |
if (_global) | |
{ | |
if (_lastSyncIdentifier == syncIdentifier) | |
{ | |
// We don't want a hitch in the middle of watching a movie just because a new player joined and transmission was finished | |
Debug.Log("Not reloading subtitles because it has the same identifier as the previously loaded one"); | |
} | |
else | |
{ | |
_ApplyGlobal(); | |
_lastSyncIdentifier = syncIdentifier; | |
} | |
} | |
} | |
} | |
private void _RedefineChunk() | |
{ | |
syncedSubtitlesChunk = _syncedSubtitlesRaw.Substring( | |
Mathf.Min(_chunkSync * 8192, _syncedSubtitlesRaw.Length), | |
Mathf.Min(8192, _syncedSubtitlesRaw.Length - _chunkSync * 8192)); | |
} | |
private void Start() | |
{ | |
_ResetSubtitleTrackingState(); | |
} | |
private void _ResetSubtitleTrackingState() | |
{ | |
_trackingIndex = 0; | |
_trackingTime = 0; | |
foreach (var output in subtitleOutputs) | |
{ | |
output.text = ""; | |
} | |
foreach (var output in markOutputs) | |
{ | |
output.text = ""; | |
} | |
_previousText = ""; | |
} | |
[UsedImplicitly] // UI Input | |
public void _OnToggleShow() | |
{ | |
enableSubtitleInjection = toggleSubtitles.isOn; | |
foreach (var subtitleGameObject in subtitleGameObjects) | |
{ | |
subtitleGameObject.SetActive(enableSubtitleInjection); | |
} | |
} | |
[UsedImplicitly] // UI Input | |
public void _OnToggleGlobal() | |
{ | |
_global = toggleGlobal.isOn; | |
if (_global) | |
{ | |
_ApplyGlobal(); | |
} | |
} | |
[UsedImplicitly] // UI Input | |
public void _OnToggleDyslexia() | |
{ | |
_dyslexia = toggleDyslexia.isOn; | |
normalSubtitles.SetActive(!_dyslexia); | |
dyslexiaSubtitles.SetActive(_dyslexia); | |
} | |
private void _ApplyGlobal() | |
{ | |
Debug.Log($"Applying global subtitles of length {_syncedSubtitlesRaw.Length}"); | |
_ParseSubtitles(_syncedSubtitlesRaw); | |
_ResetSubtitleTrackingState(); | |
} | |
private void _ApplyLocal(string subtitles) | |
{ | |
_ParseSubtitles(subtitles); | |
_ResetSubtitleTrackingState(); | |
} | |
[UsedImplicitly] // UI Input | |
public void _InjectSubtitles() | |
{ | |
var toBeInjected = proxyField.text; | |
proxyField.text = ""; | |
// TMP_InputField contents is a performance hog. | |
// In order to clear the data, the way I've found so far is to copy the entire input field. | |
// I haven't found a way yet to just clear the text using Unity events, if even possible. | |
var vrcInstantiate = VRCInstantiate(inputFieldTemplate); | |
vrcInstantiate.SetActive(true); | |
vrcInstantiate.transform.parent = inputField.transform.parent; | |
vrcInstantiate.transform.position = inputField.transform.position; | |
vrcInstantiate.transform.rotation = inputField.transform.rotation; | |
vrcInstantiate.transform.localScale = inputField.transform.localScale; | |
var oriRect = inputField.GetComponent<RectTransform>(); | |
var insRect = vrcInstantiate.GetComponent<RectTransform>(); | |
insRect.anchorMin = oriRect.anchorMin; | |
insRect.anchorMax = oriRect.anchorMax; | |
insRect.anchoredPosition = oriRect.anchoredPosition; | |
insRect.sizeDelta = oriRect.sizeDelta; | |
Destroy(inputField); | |
inputField = vrcInstantiate; | |
Debug.Log("Injected subtitles from input of length " + toBeInjected.Length); | |
if (_global) | |
{ | |
_syncedSubtitlesRaw = toBeInjected; | |
_ApplyGlobal(); | |
syncIdentifier = Networking.GetServerTimeInMilliseconds(); | |
Networking.SetOwner(Networking.LocalPlayer, gameObject); | |
_TransmitSubtitles(); | |
} | |
else | |
{ | |
_ApplyLocal(toBeInjected); | |
} | |
} | |
public void Update() | |
{ | |
if (!enableSubtitleInjection) return; | |
if (_subtitleCount == 0) return; | |
var time = videoPlayer.GetTime(); | |
if (time == _trackingTime) return; | |
if (time < _trackingTime) | |
{ | |
// Rewinded, reevaluate the subtitle state | |
_ResetSubtitleTrackingState(); | |
} | |
_trackingTime = time; | |
var hasAnyText = false; | |
var buildingText = ""; | |
var forwardEvaluationIndex = _trackingIndex; | |
var eject = false; | |
// The whole shenanigans here is that is it legal for subtitles timestamp ranges to be overlapping. | |
// We're going to make an assumption: The start point of overlapping timestamps are sorted. | |
// In this configuration, keep track of the last valid subtitle, and try to find out if the current and any other following ones can be displayed. | |
while (!eject && forwardEvaluationIndex < _subtitleCount) | |
{ | |
if (time > _start[forwardEvaluationIndex] && time < _end[forwardEvaluationIndex]) | |
{ | |
// The current forwardEvaluationIndex should be displayed. | |
if (!hasAnyText) | |
{ | |
hasAnyText = true; | |
buildingText = _subtitleText[forwardEvaluationIndex]; | |
} | |
else | |
{ | |
buildingText = buildingText + "\n" + _subtitleText[forwardEvaluationIndex]; | |
} | |
} | |
else if (_trackingIndex == forwardEvaluationIndex && time > _end[_trackingIndex]) | |
{ | |
// We are past the current tracking, permanently move on to the next. | |
// This can go OOB, but the loop is based on forwardEvaluationIndex | |
// which gates it within legal range (lower than _subtitleCount). | |
_trackingIndex++; | |
} | |
else | |
{ | |
// We have visited all subtitles that can be shown past the last valid tracking index. | |
eject = true; | |
} | |
// Try to find other overlapping subtitle timestamps that directly follows this one. | |
forwardEvaluationIndex++; | |
} | |
if (_previousText != buildingText) | |
{ | |
_previousText = buildingText; | |
foreach (var output in subtitleOutputs) | |
{ | |
output.text = buildingText; | |
} | |
foreach (var output in markOutputs) | |
{ | |
output.text = $"<mark=#000000>{buildingText}</mark>"; | |
} | |
} | |
} | |
private void _ParseSubtitles(string source) | |
{ | |
var allLines = source | |
.Replace("\r\n", "\n") | |
.Split('\n'); | |
// Not the actual number of subtitles, but the actual number should be guaranteed to be equal or lower than this. | |
var guesstimateSubCount = allLines.Length / 4 + 1; | |
_start = new float[guesstimateSubCount]; | |
_end = new float[guesstimateSubCount]; | |
_subtitleText = new string[guesstimateSubCount]; | |
var parserState = 0; | |
var currentSubtitleIndex = 0; | |
int actualSubtitleCount = 0; | |
foreach (var line in allLines) | |
{ | |
if (parserState == 0 && line.Contains(" --> ")) | |
{ | |
var arrowPos = line.IndexOf(" --> "); | |
var startStr = line.Substring(0, arrowPos); | |
var endStr = line.Substring(arrowPos + 5); | |
var startSecond = _ParseSRTimestamp(startStr); | |
var endSecond = _ParseSRTimestamp(endStr); | |
_start[currentSubtitleIndex] = startSecond; | |
_end[currentSubtitleIndex] = endSecond; | |
_subtitleText[currentSubtitleIndex] = ""; | |
actualSubtitleCount = currentSubtitleIndex + 1; | |
parserState = 1; | |
} | |
else if (parserState == 1 && line != "") | |
{ | |
_subtitleText[currentSubtitleIndex] = line; | |
parserState = 2; | |
} | |
else if (parserState == 2 && line != "") | |
{ | |
_subtitleText[currentSubtitleIndex] += "\n" + line; | |
} | |
else if (parserState != 0 && line == "") | |
{ | |
currentSubtitleIndex++; | |
parserState = 0; | |
} | |
} | |
_subtitleCount = actualSubtitleCount; | |
Debug.Log("Parsed into " + _subtitleCount + " subtitle groups"); | |
} | |
private float _ParseSRTimestamp(string timestampStr) | |
{ | |
var timeComponents = timestampStr.Split(':'); | |
// Defensive, timestamps are not supposed to deviate from that | |
if (timeComponents.Length != 3) return 0; | |
var secondComponents = timeComponents[2].Split(','); | |
// Defensive, I don't think the milliseconds part could be missing but I could not find a spec | |
var milliseconds = secondComponents.Length == 0 ? 0f : (int.Parse(secondComponents[1]) * 0.001f); | |
var time = int.Parse(timeComponents[0]) * 60 * 60 | |
+ int.Parse(timeComponents[1]) * 60 | |
+ int.Parse(secondComponents[0]) | |
+ milliseconds; | |
return time; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment