more robust regex for chapters from description

This should reduce the number of falsely matched chapters, e.g., when time-code-like numbers appear in the middle of the text, like 16:9 or sports results.

It also checks for chapters that have an end time and omits the end time code from the title.

Track lists in music videos are now also properly displayed as chapters.
This commit is contained in:
Toni Förster 2023-12-27 22:36:52 +01:00
parent 39f6319043
commit ecba91f35d
No known key found for this signature in database
GPG Key ID: 292F3E5086C83FC7

View File

@ -144,24 +144,37 @@ extension VideosAPI {
} }
func extractChapters(from description: String) -> [Chapter] { func extractChapters(from description: String) -> [Chapter] {
guard let chaptersRegularExpression = try? NSRegularExpression( /*
pattern: "(?<start>(?:[0-9]+:){1,}(?:[0-9]+))(?:\\s)+(?:- ?)?(?<title>.*)", The following chapter patterns are covered:
options: .caseInsensitive
) else { return [] }
let chapterLines = chaptersRegularExpression.matches( start - end - title / start - end: Title / start - end title
in: description, start - title / start: title / start title / [start] - title / [start]: title / [start] title
range: NSRange(description.startIndex..., in: description) index. title - start / index. title start
) title: (start)
The order is important!
*/
let patterns = [
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?(?:\\s*-\\s*)?(?<end>(?:[0-9]+:){1,2}[0-9]+)?(?:\\s*-\\s*|\\s*[:]\\s*)?(?<title>.*)(?=\\n|$)",
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?\\s*[-:]?\\s*(?<title>.+)(?=\\n|$)",
"(?<=\\n|^)(?<index>[0-9]+\\.\\s)(?<title>.+?)(?:\\s*-\\s*)?(?<start>(?:[0-9]+:){1,2}[0-9]+)(?=\\n|$)",
"(?<=\\n|^)(?<title>.+?):\\s*\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)(?=\\n|$)"
]
for pattern in patterns {
guard let chaptersRegularExpression = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) else { continue }
let chapterLines = chaptersRegularExpression.matches(in: description, range: NSRange(description.startIndex..., in: description))
if !chapterLines.isEmpty {
return chapterLines.compactMap { line in return chapterLines.compactMap { line in
let titleRange = line.range(withName: "title") let titleRange = line.range(withName: "title")
let startRange = line.range(withName: "start") let startRange = line.range(withName: "start")
guard let titleSubstringRange = Range(titleRange, in: description), guard let titleSubstringRange = Range(titleRange, in: description),
let startSubstringRange = Range(startRange, in: description) else { return nil } let startSubstringRange = Range(startRange, in: description)
else {
let titleCapture = String(description[titleSubstringRange]) return nil
}
let titleCapture = String(description[titleSubstringRange]).trimmingCharacters(in: .whitespaces)
let startCapture = String(description[startSubstringRange]) let startCapture = String(description[startSubstringRange])
let startComponents = startCapture.components(separatedBy: ":") let startComponents = startCapture.components(separatedBy: ":")
guard startComponents.count <= 3 else { return nil } guard startComponents.count <= 3 else { return nil }
@ -181,15 +194,13 @@ extension VideosAPI {
guard var startSeconds = seconds else { return nil } guard var startSeconds = seconds else { return nil }
if let minutes { startSeconds += (minutes ?? 0) * 60
startSeconds += 60 * minutes startSeconds += (hours ?? 0) * 60 * 60
}
if let hours {
startSeconds += 60 * 60 * hours
}
return .init(title: titleCapture, start: startSeconds) return .init(title: titleCapture, start: startSeconds)
} }
} }
} }
return []
}
}