Merge pull request #597 from stonerl/regex-for-chapters

more robust regex for chapters from description
This commit is contained in:
Arkadiusz Fal 2024-01-09 16:55:16 +01:00 committed by GitHub
commit 7ff52294a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -144,52 +144,63 @@ extension VideosAPI {
}
func extractChapters(from description: String) -> [Chapter] {
guard let chaptersRegularExpression = try? NSRegularExpression(
pattern: "(?<start>(?:[0-9]+:){1,}(?:[0-9]+))(?:\\s)+(?:- ?)?(?<title>.*)",
options: .caseInsensitive
) else { return [] }
/*
The following chapter patterns are covered:
let chapterLines = chaptersRegularExpression.matches(
in: description,
range: NSRange(description.startIndex..., in: description)
)
start - end - title / start - end: Title / start - end title
start - title / start: title / start title / [start] - title / [start]: title / [start] title
index. title - start / index. title start
title: (start)
return chapterLines.compactMap { line in
let titleRange = line.range(withName: "title")
let startRange = line.range(withName: "start")
The order is important!
*/
let patterns = [
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?(?:\\s*-\\s*)?(?<end>(?:[0-9]+:){1,2}[0-9]+)?(?:\\s*-\\s*|\\s*[:]\\s*)?(?<title>.*)(?=\\n|$)",
"(?<=\\n|^)\\s*(?:►\\s*)?\\[?(?<start>(?:[0-9]+:){1,2}[0-9]+)\\]?\\s*[-:]?\\s*(?<title>.+)(?=\\n|$)",
"(?<=\\n|^)(?<index>[0-9]+\\.\\s)(?<title>.+?)(?:\\s*-\\s*)?(?<start>(?:[0-9]+:){1,2}[0-9]+)(?=\\n|$)",
"(?<=\\n|^)(?<title>.+?):\\s*\\((?<start>(?:[0-9]+:){1,2}[0-9]+)\\)(?=\\n|$)"
]
guard let titleSubstringRange = Range(titleRange, in: description),
let startSubstringRange = Range(startRange, in: description) else { return nil }
for pattern in patterns {
guard let chaptersRegularExpression = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) else { continue }
let chapterLines = chaptersRegularExpression.matches(in: description, range: NSRange(description.startIndex..., in: description))
let titleCapture = String(description[titleSubstringRange])
let startCapture = String(description[startSubstringRange])
let startComponents = startCapture.components(separatedBy: ":")
guard startComponents.count <= 3 else { return nil }
if !chapterLines.isEmpty {
return chapterLines.compactMap { line in
let titleRange = line.range(withName: "title")
let startRange = line.range(withName: "start")
guard let titleSubstringRange = Range(titleRange, in: description),
let startSubstringRange = Range(startRange, in: description)
else {
return nil
}
let titleCapture = String(description[titleSubstringRange]).trimmingCharacters(in: .whitespaces)
let startCapture = String(description[startSubstringRange])
let startComponents = startCapture.components(separatedBy: ":")
guard startComponents.count <= 3 else { return nil }
var hours: Double?
var minutes: Double?
var seconds: Double?
var hours: Double?
var minutes: Double?
var seconds: Double?
if startComponents.count == 3 {
hours = Double(startComponents[0])
minutes = Double(startComponents[1])
seconds = Double(startComponents[2])
} else if startComponents.count == 2 {
minutes = Double(startComponents[0])
seconds = Double(startComponents[1])
if startComponents.count == 3 {
hours = Double(startComponents[0])
minutes = Double(startComponents[1])
seconds = Double(startComponents[2])
} else if startComponents.count == 2 {
minutes = Double(startComponents[0])
seconds = Double(startComponents[1])
}
guard var startSeconds = seconds else { return nil }
startSeconds += (minutes ?? 0) * 60
startSeconds += (hours ?? 0) * 60 * 60
return .init(title: titleCapture, start: startSeconds)
}
}
guard var startSeconds = seconds else { return nil }
if let minutes {
startSeconds += 60 * minutes
}
if let hours {
startSeconds += 60 * 60 * hours
}
return .init(title: titleCapture, start: startSeconds)
}
return []
}
}