From 70d39c549b952f9bb1d614bf30f29d5bb7d713d4 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 12:00:52 +0530 Subject: [PATCH 01/13] create lib_ccxr and add bits module --- .gitignore | 3 +- src/rust/Cargo.lock | 41 +++++--- src/rust/Cargo.toml | 1 + src/rust/lib_ccxr/Cargo.lock | 7 ++ src/rust/lib_ccxr/Cargo.toml | 17 ++++ src/rust/lib_ccxr/src/lib.rs | 1 + src/rust/lib_ccxr/src/util/bits.rs | 153 +++++++++++++++++++++++++++++ src/rust/lib_ccxr/src/util/mod.rs | 5 + 8 files changed, 216 insertions(+), 12 deletions(-) create mode 100644 src/rust/lib_ccxr/Cargo.lock create mode 100644 src/rust/lib_ccxr/Cargo.toml create mode 100644 src/rust/lib_ccxr/src/lib.rs create mode 100644 src/rust/lib_ccxr/src/util/bits.rs create mode 100644 src/rust/lib_ccxr/src/util/mod.rs diff --git a/.gitignore b/.gitignore index a1ef235b5..3f8a2ed82 100644 --- a/.gitignore +++ b/.gitignore @@ -149,7 +149,8 @@ src/rust/CMakeCache.txt src/rust/Makefile src/rust/cmake_install.cmake src/rust/target/ +src/rust/lib_ccxr/target/ windows/ccx_rust.lib windows/*/debug/* windows/*/CACHEDIR.TAG -windows/.rustc_info.json \ No newline at end of file +windows/.rustc_info.json diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 89013e51f..131f9590d 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -111,6 +111,7 @@ dependencies = [ "env_logger", "iconv", "leptonica-sys", + "lib_ccxr", "log", "palette", "rsmpeg", @@ -255,11 +256,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "lib_ccxr" +version = "0.1.0" + [[package]] name = "libc" -version = "0.2.140" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libloading" @@ -413,18 +418,18 @@ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "proc-macro2" -version = "1.0.52" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -495,9 +500,23 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.158" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "771d4d9c4163ee138805e12c710dd365e4f44be8be0503cb1bb9eb989425d9c9" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.29", +] [[package]] name = "shlex" @@ -530,9 +549,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.4" +version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c622ae390c9302e214c31013517c2061ecb2699935882c60a9b37f82f8625ae" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ "proc-macro2", "quote", @@ -586,7 +605,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.4", + "syn 2.0.29", ] [[package]] diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index e8edc2096..f736deb3a 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -18,6 +18,7 @@ palette = "0.6.0" rsmpeg = { version = "0.14.1", optional = true, features = ["link_system_ffmpeg"] } tesseract-sys = { version = "0.5.14", optional = true, default-features = false} leptonica-sys = { version = "0.4.3", optional = true, default-features = false} +lib_ccxr = { path = "lib_ccxr" } [build-dependencies] bindgen = "0.58.1" diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock new file mode 100644 index 000000000..7532d4515 --- /dev/null +++ b/src/rust/lib_ccxr/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "lib_ccxr" +version = "0.1.0" diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml new file mode 100644 index 000000000..ca3612505 --- /dev/null +++ b/src/rust/lib_ccxr/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "lib_ccxr" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[features] +default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] +enable_sharing = [] +wtv_debug = [] +enable_ffmpeg = [] +debug_out = [] +debug = [] +with_libcurl = [] diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs new file mode 100644 index 000000000..812d1edf2 --- /dev/null +++ b/src/rust/lib_ccxr/src/lib.rs @@ -0,0 +1 @@ +pub mod util; diff --git a/src/rust/lib_ccxr/src/util/bits.rs b/src/rust/lib_ccxr/src/util/bits.rs new file mode 100644 index 000000000..9fbe7c7c1 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/bits.rs @@ -0,0 +1,153 @@ +#[rustfmt::skip] +const PARITY_TABLE: [bool; 256] = [ + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, +]; + +const BIT_REVERSE_TABLE: [u8; 256] = [ + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +]; + +const HAMMING_8_4_DECODER_TABLE: [u8; 256] = [ + 0x01, 0xff, 0x01, 0x01, 0xff, 0x00, 0x01, 0xff, 0xff, 0x02, 0x01, 0xff, 0x0a, 0xff, 0xff, 0x07, + 0xff, 0x00, 0x01, 0xff, 0x00, 0x00, 0xff, 0x00, 0x06, 0xff, 0xff, 0x0b, 0xff, 0x00, 0x03, 0xff, + 0xff, 0x0c, 0x01, 0xff, 0x04, 0xff, 0xff, 0x07, 0x06, 0xff, 0xff, 0x07, 0xff, 0x07, 0x07, 0x07, + 0x06, 0xff, 0xff, 0x05, 0xff, 0x00, 0x0d, 0xff, 0x06, 0x06, 0x06, 0xff, 0x06, 0xff, 0xff, 0x07, + 0xff, 0x02, 0x01, 0xff, 0x04, 0xff, 0xff, 0x09, 0x02, 0x02, 0xff, 0x02, 0xff, 0x02, 0x03, 0xff, + 0x08, 0xff, 0xff, 0x05, 0xff, 0x00, 0x03, 0xff, 0xff, 0x02, 0x03, 0xff, 0x03, 0xff, 0x03, 0x03, + 0x04, 0xff, 0xff, 0x05, 0x04, 0x04, 0x04, 0xff, 0xff, 0x02, 0x0f, 0xff, 0x04, 0xff, 0xff, 0x07, + 0xff, 0x05, 0x05, 0x05, 0x04, 0xff, 0xff, 0x05, 0x06, 0xff, 0xff, 0x05, 0xff, 0x0e, 0x03, 0xff, + 0xff, 0x0c, 0x01, 0xff, 0x0a, 0xff, 0xff, 0x09, 0x0a, 0xff, 0xff, 0x0b, 0x0a, 0x0a, 0x0a, 0xff, + 0x08, 0xff, 0xff, 0x0b, 0xff, 0x00, 0x0d, 0xff, 0xff, 0x0b, 0x0b, 0x0b, 0x0a, 0xff, 0xff, 0x0b, + 0x0c, 0x0c, 0xff, 0x0c, 0xff, 0x0c, 0x0d, 0xff, 0xff, 0x0c, 0x0f, 0xff, 0x0a, 0xff, 0xff, 0x07, + 0xff, 0x0c, 0x0d, 0xff, 0x0d, 0xff, 0x0d, 0x0d, 0x06, 0xff, 0xff, 0x0b, 0xff, 0x0e, 0x0d, 0xff, + 0x08, 0xff, 0xff, 0x09, 0xff, 0x09, 0x09, 0x09, 0xff, 0x02, 0x0f, 0xff, 0x0a, 0xff, 0xff, 0x09, + 0x08, 0x08, 0x08, 0xff, 0x08, 0xff, 0xff, 0x09, 0x08, 0xff, 0xff, 0x0b, 0xff, 0x0e, 0x03, 0xff, + 0xff, 0x0c, 0x0f, 0xff, 0x04, 0xff, 0xff, 0x09, 0x0f, 0xff, 0x0f, 0x0f, 0xff, 0x0e, 0x0f, 0xff, + 0x08, 0xff, 0xff, 0x05, 0xff, 0x0e, 0x0d, 0xff, 0xff, 0x0e, 0x0f, 0xff, 0x0e, 0x0e, 0xff, 0x0e, +]; + +/// Returns the parity of the given byte. +/// +/// # Exmaples +/// ```rust +/// # use lib_ccxr::util::*; +/// assert_eq!(parity(0x00), false); +/// assert_eq!(parity(0x01), true); +/// ``` +pub fn parity(value: u8) -> bool { + PARITY_TABLE[value as usize] +} + +/// Returns a byte with its bits flipped from input. +/// +/// # Exmaples +/// ```rust +/// # use lib_ccxr::util::*; +/// assert_eq!(reverse(0x00), 0x00); +/// assert_eq!(reverse(0x01), 0x80); +/// ``` +pub fn reverse(value: u8) -> u8 { + BIT_REVERSE_TABLE[value as usize] +} + +/// Returns the decoded byte given a \[8,4\] hamming code byte if no error is present. +/// +/// # Exmaples +/// ```rust +/// # use lib_ccxr::util::*; +/// assert_eq!(decode_hamming_8_4(0x00), Some(0x01)); +/// assert_eq!(decode_hamming_8_4(0x01), None); +/// ``` +pub fn decode_hamming_8_4(value: u8) -> Option { + // ETS 300 706, chapter 8.2 + let decoded = HAMMING_8_4_DECODER_TABLE[value as usize]; + if decoded == 0xff { + None + } else { + Some(decoded) + } +} + +/// Returns the decoded byte given a \[24,18\] hamming code byte if no error is present. +/// +/// # Exmaples +/// ```rust +/// # use lib_ccxr::util::*; +/// assert_eq!(decode_hamming_24_18(0x00000000), Some(0x00000000)); +/// assert_eq!(decode_hamming_24_18(0x00000001), None); +/// ``` +pub fn decode_hamming_24_18(mut value: u32) -> Option { + // ETS 300 706, chapter 8.3 + let mut test: u8 = 0; + + // Tests A-F correspond to bits 0-6 respectively in 'test'. + for i in 0..23 { + test ^= (((value >> i) & 0x01) as u8) * (i + 33); + } + + // Only parity bit is tested for bit 24 + test ^= (((value >> 23) & 0x01) as u8) * 32u8; + + if (test & 0x1f) != 0x1f { + // Not all tests A-E correct + if (test & 0x20) == 0x20 { + // F correct: Double error + return None; + } + // Test F incorrect: Single error + value ^= 1 << (30 - test); + } + + Some( + (value & 0x000004) >> 2 + | (value & 0x000070) >> 3 + | (value & 0x007f00) >> 4 + | (value & 0x7f0000) >> 5, + ) +} diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs new file mode 100644 index 000000000..30e215483 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -0,0 +1,5 @@ +//! Provides basic utilities used throughout the program. + +mod bits; + +pub use bits::*; From 03b82924eb483dbc52a836b075fcd0a68a1fdc0c Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 12:16:39 +0530 Subject: [PATCH 02/13] add log module --- src/rust/Cargo.lock | 118 ++++--- src/rust/lib_ccxr/Cargo.lock | 9 + src/rust/lib_ccxr/Cargo.toml | 1 + src/rust/lib_ccxr/src/util/log.rs | 530 ++++++++++++++++++++++++++++++ src/rust/lib_ccxr/src/util/mod.rs | 1 + 5 files changed, 609 insertions(+), 50 deletions(-) create mode 100644 src/rust/lib_ccxr/src/util/log.rs diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 131f9590d..9d76606fb 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "6748e8def348ed4d14996fa801f4122cd763fff530258cdc03f64b25f89d3a5a" dependencies = [ "memchr", ] @@ -52,7 +52,7 @@ version = "0.58.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f8523b410d7187a43085e7e064416ea32ded16bd0a4e6fc025e21616d01258f" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cexpr 0.4.0", "clang-sys", "clap", @@ -75,7 +75,7 @@ version = "0.64.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cexpr 0.6.0", "clang-sys", "lazy_static", @@ -97,11 +97,17 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "camino" -version = "1.1.4" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c530edf18f37068ac2d977409ed5cd50d53d73bc653c7647b48eb78976ac9ae2" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" [[package]] name = "ccx_rust" @@ -124,7 +130,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" dependencies = [ - "nom 5.1.2", + "nom 5.1.3", ] [[package]] @@ -144,9 +150,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clang-sys" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ed9a53e5d4d9c573ae844bfac6872b159cb1d1585a83b29e7a64b7eef7332a" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" dependencies = [ "glob", "libc", @@ -161,7 +167,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags", + "bitflags 1.3.2", "strsim", "textwrap", "unicode-width", @@ -176,9 +182,9 @@ checksum = "74c57ab96715773d9cb9789b38eb7cbf04b3c6f5624a9d98f51761603376767c" [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "env_logger" @@ -247,9 +253,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "leptonica-sys" -version = "0.4.4" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "811a92997ff15e0d7323c1e8fa7190331dd02ea50d9d7cfaa4fdc2b21a613a2e" +checksum = "eff3f1dc2f0112411228f8db99ca8a6a1157537a7887b28b1c91fdc4051fb326" dependencies = [ "bindgen 0.64.0", "pkg-config", @@ -259,6 +265,9 @@ dependencies = [ [[package]] name = "lib_ccxr" version = "0.1.0" +dependencies = [ + "bitflags 2.4.0", +] [[package]] name = "libc" @@ -278,12 +287,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "memchr" @@ -299,9 +305,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "nom" -version = "5.1.2" +version = "5.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b" dependencies = [ "memchr", "version_check", @@ -319,18 +325,18 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", ] [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "palette" @@ -358,9 +364,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "peeking_take_while" @@ -370,9 +376,9 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "phf" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ "phf_macros", "phf_shared", @@ -380,9 +386,9 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ "phf_shared", "rand", @@ -390,31 +396,31 @@ dependencies = [ [[package]] name = "phf_macros" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" dependencies = [ "phf_generator", "phf_shared", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.29", ] [[package]] name = "phf_shared" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ "siphasher", ] [[package]] name = "pkg-config" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" [[package]] name = "proc-macro2" @@ -451,9 +457,21 @@ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" [[package]] name = "regex" -version = "1.7.1" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" dependencies = [ "aho-corasick", "memchr", @@ -462,9 +480,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "rsmpeg" @@ -526,9 +544,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "siphasher" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "strsim" @@ -590,18 +608,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" dependencies = [ "proc-macro2", "quote", @@ -619,9 +637,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-width" diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index 7532d4515..d7ac327fc 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "lib_ccxr" version = "0.1.0" +dependencies = [ + "bitflags", +] diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index ca3612505..fb032a7ce 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bitflags = "2.3.1" [features] default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] diff --git a/src/rust/lib_ccxr/src/util/log.rs b/src/rust/lib_ccxr/src/util/log.rs new file mode 100644 index 000000000..92913aed1 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/log.rs @@ -0,0 +1,530 @@ +//! Provides primitives for logging functionality +//! +//! The interface of this module is highly inspired by the famous log crate of rust. +//! +//! The first step before using any of the logging functionality is to setup a logger. This can be +//! done by creating a [`CCExtractorLogger`] and calling [`set_logger`] with it. To gain access to +//! the instance of [`CCExtractorLogger`], [`logger`] or [`logger_mut`] can be used. +//! +//! There are 4 types of logging messages based on its importance and severity denoted by their +//! respective macros. +//! - [`fatal!`] +//! - [`error!`] +//! - [`info!`] +//! - [`debug!`] +//! +//! Hex dumps can be logged for debugging by [`hex_dump`] and [`hex_dump_with_start_idx`]. Communication +//! with the GUI is possible through [`send_gui`]. + +use bitflags::bitflags; +use std::fmt::Arguments; +use std::sync::{OnceLock, RwLock, RwLockReadGuard, RwLockWriteGuard}; + +static LOGGER: OnceLock> = OnceLock::new(); + +/// The possible targets for logging messages. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OutputTarget { + Stdout, + Stderr, + Quiet, +} + +bitflags! { + /// A bitflag for the types of a Debug Message. + /// + /// Each debug message can belong to one or more of these types. The + /// constants of this struct can be used as bitflags for one message to + /// belong to more than one type. + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + pub struct DebugMessageFlag: u16 { + /// Show information related to parsing the container + const PARSE = 0b0000000000000001; + /// Show video stream related information + const VIDEO_STREAM = 0b0000000000000010; + /// Show GOP and PTS timing information + const TIME = 0b0000000000000100; + /// Show lots of debugging output + const VERBOSE = 0b0000000000001000; + /// Show CC-608 decoder debug + const DECODER_608 = 0b0000000000010000; + /// Show CC-708 decoder debug + const DECODER_708 = 0b0000000000100000; + /// Show XDS decoder debug + const DECODER_XDS = 0b0000000001000000; + /// Show Caption blocks with FTS timing + const CB_RAW = 0b0000000010000000; + /// Generic, always displayed even if no debug is selected + const GENERIC_NOTICE = 0b0000000100000000; + /// Show teletext debug + const TELETEXT = 0b0000001000000000; + /// Show Program Allocation Table dump + const PAT = 0b0000010000000000; + /// Show Program Map Table dump + const PMT = 0b0000100000000000; + /// Show Levenshtein distance calculations + const LEVENSHTEIN = 0b0001000000000000; + /// Show DVB debug + const DVB = 0b0010000000000000; + /// Dump defective TS packets + const DUMP_DEF = 0b0100000000000000; + /// Extracted captions sharing service + #[cfg(feature = "enable_sharing")] + const SHARE = 0b1000000000000000; + } +} + +/// All possible causes for crashing the program instantly. Used in `cause` key of [`fatal!`] macro. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ExitCause { + Ok, + Failure, + NoInputFiles, + TooManyInputFiles, + IncompatibleParameters, + UnableToDetermineFileSize, + MalformedParameter, + ReadError, + NoCaptions, + WithHelp, + NotClassified, + ErrorInCapitalizationFile, + BufferFull, + MissingAsfHeader, + MissingRcwtHeader, + + FileCreationFailed, + Unsupported, + NotEnoughMemory, + Bug, +} + +/// A message to be sent to GUI for XDS. Used in [`send_gui`]. +pub enum GuiXdsMessage<'a> { + ProgramName(&'a str), + ProgramIdNr { + minute: u8, + hour: u8, + date: u8, + month: u8, + }, + ProgramDescription { + line_num: i32, + desc: &'a str, + }, + CallLetters(&'a str), +} + +/// A mask to filter the debug messages based on its type specified by [`DebugMessageFlag`]. +/// +/// This operates on one of the two modes: Normal Mode and Debug Mode. The mask used when in Debug Mode is a superset +/// of the mask used when in Normal Mode. One can switch between the two modes by [`DebugMessageMask::set_debug_mode`]. +#[derive(Debug)] +pub struct DebugMessageMask { + debug_mode: bool, + mask_on_normal: DebugMessageFlag, + mask_on_debug: DebugMessageFlag, +} + +/// A global logger used throughout CCExtractor and stores the settings related to logging. +/// +/// A global logger can be setup up initially using [`set_logger`]. Use the following convenience +/// macros for logging: [`fatal!`], [`error!`], [`info!`] and [`debug!`]. +#[derive(Debug)] +pub struct CCExtractorLogger { + target: OutputTarget, + debug_mask: DebugMessageMask, + gui_mode: bool, +} + +impl DebugMessageMask { + /// Creates a new [`DebugMessageFlag`] given a mask to be used for Normal Mode and an additional mask to be + /// used in Debug Mode. + /// + /// Note that while in Debug Mode, the mask for Normal Mode will still be valid. + /// `extra_mask_on_debug` only specifies additional flags to be set on Debug Mode. + pub const fn new( + mask_on_normal: DebugMessageFlag, + extra_mask_on_debug: DebugMessageFlag, + ) -> DebugMessageMask { + DebugMessageMask { + debug_mode: false, + mask_on_normal, + mask_on_debug: extra_mask_on_debug.union(mask_on_normal), + } + } + + /// Set the mode to Normal or Debug Mode based on `false` or `true` respectively. + pub fn set_debug_mode(&mut self, mode: bool) { + self.debug_mode = mode; + } + + /// Check if the current mode is set to Debug Mode. + pub fn debug_mode(&self) -> bool { + self.debug_mode + } + + /// Return the mask according to its mode. + pub fn mask(&self) -> DebugMessageFlag { + if self.debug_mode { + self.mask_on_debug + } else { + self.mask_on_normal + } + } +} + +impl ExitCause { + /// Returns the exit code associated with the cause of the error. + /// + /// The GUI depends on these exit codes. + /// Exit code of 0 means OK as usual. + /// Exit code below 100 means display whatever was output to stderr as a warning. + /// Exit code above or equal to 100 means display whatever was output to stdout as an error. + pub fn exit_code(&self) -> i32 { + match self { + ExitCause::Ok => 0, + ExitCause::Failure => 1, + ExitCause::NoInputFiles => 2, + ExitCause::TooManyInputFiles => 3, + ExitCause::IncompatibleParameters => 4, + ExitCause::UnableToDetermineFileSize => 6, + ExitCause::MalformedParameter => 7, + ExitCause::ReadError => 8, + ExitCause::NoCaptions => 10, + ExitCause::WithHelp => 11, + ExitCause::NotClassified => 300, + ExitCause::ErrorInCapitalizationFile => 501, + ExitCause::BufferFull => 502, + ExitCause::MissingAsfHeader => 1001, + ExitCause::MissingRcwtHeader => 1002, + + ExitCause::FileCreationFailed => 5, + ExitCause::Unsupported => 9, + ExitCause::NotEnoughMemory => 500, + ExitCause::Bug => 1000, + } + } +} + +impl<'a> CCExtractorLogger { + /// Returns a new instance of CCExtractorLogger with the provided settings. + /// + /// `gui_mode` is used to determine if the log massages are intercepted by a GUI. + /// `target` specifies the location for printing the log messages. + /// `debug_mask` is used to filter debug messages based on its type. + pub const fn new( + target: OutputTarget, + debug_mask: DebugMessageMask, + gui_mode: bool, + ) -> CCExtractorLogger { + CCExtractorLogger { + target, + debug_mask, + gui_mode, + } + } + + /// Set the mode to Normal or Debug Mode based on `false` or `true` respectively for the + /// underlying [`DebugMessageMask`]. + /// + /// This method switches the mask used for filtering debug messages. + /// Similar to [`DebugMessageMask::set_debug_mode`]. + pub fn set_debug_mode(&mut self, mode: bool) { + self.debug_mask.set_debug_mode(mode) + } + + /// Check if the current mode is set to Debug Mode. + /// + /// Similar to [`DebugMessageMask::debug_mode`]. + pub fn debug_mode(&self) -> bool { + self.debug_mask.debug_mode() + } + + /// Returns the currently set target for logging messages. + pub fn target(&self) -> OutputTarget { + self.target + } + + /// Check if the messages are intercepted by GUI. + pub fn is_gui_mode(&self) -> bool { + self.gui_mode + } + + fn print(&self, args: &Arguments<'a>) { + match &self.target { + OutputTarget::Stdout => print!("{}", args), + OutputTarget::Stderr => eprint!("{}", args), + OutputTarget::Quiet => {} + } + } + + /// Log a fatal error message. Use [`fatal!`] instead. + /// + /// Used for logging errors dangerous enough to crash the program instantly. + pub fn log_fatal(&self, exit_cause: ExitCause, args: &Arguments<'a>) -> ! { + self.log_error(args); + println!(); // TODO: print end message + std::process::exit(exit_cause.exit_code()) + } + + /// Log an error message. Use [`error!`] instead. + /// + /// Used for logging general errors occuring in the program. + pub fn log_error(&self, args: &Arguments<'a>) { + if self.gui_mode { + eprint!("###MESSAGE#") + } else { + eprint!("\rError: ") + } + + eprintln!("{}", args); + } + + /// Log an informational message. Use [`info!`] instead. + /// + /// Used for logging extra information about the execution of the program. + pub fn log_info(&self, args: &Arguments<'a>) { + // TODO: call activity_header + self.print(&format_args!("{}", args)); + } + + /// Log a debug message. Use [`debug!`] instead. + /// + /// Used for logging debug messages throughout the program. + pub fn log_debug(&self, message_type: DebugMessageFlag, args: &Arguments<'a>) { + if self.debug_mask.mask().intersects(message_type) { + self.print(&format_args!("{}", args)); + } + } + + /// Send a message to GUI. Use [`send_gui`] instead. + /// + /// Used for sending information related to XDS to the GUI. + pub fn send_gui(&self, _message_type: GuiXdsMessage) { + todo!() + } + + /// Log a hex dump which is helpful for debugging purposes. + /// Use [`hex_dump`] or [`hex_dump_with_start_idx`] instead. + /// + /// Setting `clear_high_bit` to true will ignore the highest bit whien displaying the + /// characters. This makes visual CC inspection easier since the highest bit is usually used + /// as a parity bit. + /// + /// The output will contain byte numbers which can be made to start from any number using + /// `start_idx`. This is usually `0`. + pub fn log_hex_dump( + &self, + message_type: DebugMessageFlag, + data: &[u8], + clear_high_bit: bool, + start_idx: usize, + ) { + if self.debug_mask.mask().intersects(message_type) { + let chunked_data = data.chunks(16); + + for (id, chunk) in chunked_data.enumerate() { + self.print(&format_args!("{:05} | ", id * 16 + start_idx)); + for x in chunk { + self.print(&format_args!("{:02X} ", x)); + } + + for _ in 0..(16 - chunk.len()) { + self.print(&format_args!(" ")); + } + + self.print(&format_args!(" | ")); + + for x in chunk { + let c = if x >= &b' ' { + // 0x7F < remove high bit, convenient for visual CC inspection + x & if clear_high_bit { 0x7F } else { 0xFF } + } else { + b' ' + }; + + self.print(&format_args!("{}", c as char)); + } + + self.print(&format_args!("\n")); + } + } + } +} + +/// Setup the global logger. +/// +/// This function can only be called once throught the execution of program. The logger can then be +/// accessed by [`logger`] and [`logger_mut`]. +pub fn set_logger(logger: CCExtractorLogger) -> Result<(), CCExtractorLogger> { + LOGGER + .set(logger.into()) + .map_err(|x| x.into_inner().unwrap()) +} + +/// Get an immutable instance of the global logger. +/// +/// This function will return [`None`] if the logger is not setup initially by [`set_logger`] or if +/// the underlying RwLock fails to generate a read lock. +/// +/// Use [`logger_mut`] to get a mutable instance. +pub fn logger() -> Option> { + LOGGER.get()?.read().ok() +} + +/// Get a mutable instance of the global logger. +/// +/// This function will return [`None`] if the logger is not setup initially by [`set_logger`] or if +/// the underlying RwLock fails to generate a write lock. +/// +/// Use [`logger`] to get an immutable instance. +pub fn logger_mut() -> Option> { + LOGGER.get()?.write().ok() +} + +/// Log a fatal error message. +/// +/// Used for logging errors dangerous enough to crash the program instantly. This macro does not +/// return (i.e. it returns `!`). A logger needs to be setup initially by [`set_logger`]. +/// +/// # Usage +/// This macro requires an [`ExitCause`] which provides the appropriate exit codes for shutting +/// down program. This is provided using a key called `cause` which comes before the `;`. After +/// `;`, the arguments works the same as a [`println!`] macro. +/// +/// # Examples +/// ```no_run +/// # use lib_ccxr::util::log::*; +/// # let actual = 2; +/// # let required = 1; +/// fatal!( +/// cause = ExitCause::TooManyInputFiles; +/// "{} input files were provided but only {} were needed", actual, required +/// ); +/// ``` +#[macro_export] +macro_rules! fatal { + (cause = $exit_cause:expr; $($args:expr),*) => { + $crate::util::log::logger().expect("Logger is not yet initialized") + .log_fatal($exit_cause, &format_args!($($args),*)) + }; +} + +/// Log an error message. +/// +/// Used for logging general errors occuring in the program. A logger needs to be setup +/// initially by [`set_logger`]. +/// +/// # Usage +/// The arguments works the same as a [`println!`] macro. +/// +/// # Examples +/// ```no_run +/// # use lib_ccxr::util::log::*; +/// # let missing_blocks = 2; +/// error!("missing {} additional blocks", missing_blocks); +/// ``` +#[macro_export] +macro_rules! error { + ($($args:expr),*) => { + $crate::util::log::logger().expect("Logger is not yet initialized") + .log_error(&format_args!($($args),*)) + } +} + +/// Log an informational message. +/// +/// Used for logging extra information about the execution of the program. A logger needs to be +/// setup initially by [`set_logger`]. +/// +/// # Usage +/// The arguments works the same as a [`println!`] macro. +/// +/// # Examples +/// ```no_run +/// # use lib_ccxr::util::log::*; +/// info!("Processing the header section"); +/// ``` +#[macro_export] +macro_rules! info { + ($($args:expr),*) => { + $crate::util::log::logger().expect("Logger is not yet initialized") + .log_info(&format_args!($($args),*)) + }; +} + +/// Log a debug message. +/// +/// Used for logging debug messages throughout the program. A logger needs to be setup initially +/// by [`set_logger`]. +/// +/// # Usage +/// This macro requires an [`DebugMessageFlag`] which represents the type of debug message. It is +/// used for filtering the messages. This is provided using a key called `msg_type` which comes +/// before the `;`. After `;`, the arguments works the same as a [`println!`] macro. +/// +/// # Examples +/// ```no_run +/// # use lib_ccxr::util::log::*; +/// # let byte1 = 23u8; +/// # let byte2 = 45u8; +/// debug!( +/// msg_type = DebugMessageFlag::DECODER_708; +/// "Packet Start with contents {} {}", byte1, byte2 +/// ); +/// ``` +#[macro_export] +macro_rules! debug { + (msg_type = $msg_flag:expr; $($args:expr),*) => { + $crate::util::log::logger().expect("Logger is not yet initialized") + .log_debug($msg_flag, &format_args!($($args),*)) + }; +} + +pub use debug; +pub use error; +pub use fatal; +pub use info; + +/// Log a hex dump which is helpful for debugging purposes. +/// +/// Setting `clear_high_bit` to true will ignore the highest bit whien displaying the +/// characters. This makes visual CC inspection easier since the highest bit is usually used +/// as a parity bit. +/// +/// The byte numbers start from `0` by default. Use [`hex_dump_with_start_idx`] if a +/// different starting index is required. +pub fn hex_dump(message_type: DebugMessageFlag, data: &[u8], clear_high_bit: bool) { + logger() + .expect("Logger is not yet initialized") + .log_hex_dump(message_type, data, clear_high_bit, 0) +} + +/// Log a hex dump which is helpful for debugging purposes. +/// +/// Setting `clear_high_bit` to true will ignore the highest bit whien displaying the +/// characters. This makes visual CC inspection easier since the highest bit is usually used +/// as a parity bit. +/// +/// The output will contain byte numbers which can be made to start from any number using +/// `start_idx`. This is usually `0`. +pub fn hex_dump_with_start_idx( + message_type: DebugMessageFlag, + data: &[u8], + clear_high_bit: bool, + start_idx: usize, +) { + logger() + .expect("Logger is not yet initialized") + .log_hex_dump(message_type, data, clear_high_bit, start_idx) +} + +/// Send a message to GUI. +/// +/// Used for sending information related to XDS to the GUI. +pub fn send_gui(message: GuiXdsMessage) { + logger() + .expect("Logger is not yet initialized") + .send_gui(message) +} diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index 30e215483..c39f5e7a3 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -1,5 +1,6 @@ //! Provides basic utilities used throughout the program. mod bits; +pub mod log; pub use bits::*; From 69907f8ec75e7506900f09ea9c8f072ad3f94657 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 12:25:00 +0530 Subject: [PATCH 03/13] add encoding module --- src/rust/lib_ccxr/src/util/encoding.rs | 745 +++++++++++++++++++++++++ src/rust/lib_ccxr/src/util/mod.rs | 1 + 2 files changed, 746 insertions(+) create mode 100644 src/rust/lib_ccxr/src/util/encoding.rs diff --git a/src/rust/lib_ccxr/src/util/encoding.rs b/src/rust/lib_ccxr/src/util/encoding.rs new file mode 100644 index 000000000..508f039c4 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/encoding.rs @@ -0,0 +1,745 @@ +//! A module for working with different kinds of text encoding formats. +//! +//! Any Text within the entire application can be in one of the following 4 formats which is +//! represented by [`Encoding`]. +//! - [`Line 21`](Encoding::Line21) - Used in 608 captions. +//! - [`Latin-1`](Encoding::Latin1) - ISO/IEC 8859-1. +//! - [`Ucs2`](Encoding::Ucs2) - UCS-2 code points. +//! - [`UTF-8`](Encoding::Utf8) +//! +//! To represent a string in any one of the above encoding, use the following respectively. +//! - [`Line21String`] +//! - [`Latin1String`] +//! - [`Ucs2String`] +//! - [`String`] (same as from rust std) +//! +//! Each of these 4 types can be converted to any other type using [`From::from`] and [`Into::into`]. +//! +//! The above types can be used when the encoding is known at compile-time. If the exact encoding +//! is only known at runtime then [`EncodedString`] can be used. Each of the above 4 types can be +//! converted to [`EncodedString`] using [`From::from`] and [`Into::into`]. An [`EncodedString`] can +//! be converted to any of the 4 types by `to_*` methods. Conversions where the target encoding is +//! only known at runtime can be done using [`EncodedString::encode_to`]. + +/// Represents the different kinds of encoding that [`EncodedString`] can take. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Encoding { + Line21, + Latin1, + Ucs2, + Utf8, +} + +/// Represents a character in Line 21 encoding. +pub type Line21Char = u8; + +/// Represents a character in Latin-1 encoding. +pub type Latin1Char = u8; + +/// Represents a character in UCS-2 encoding. +pub type Ucs2Char = u16; + +/// A String-like type containing a sequence of Line 21 encoded characters. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Line21String(Vec); + +/// A String-like type containing a sequence of Latin-1 encoded characters. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Latin1String(Vec); + +/// A String-like type containing a sequence of UCS-2 code points. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Ucs2String(Vec); + +/// A String-like type that stores its characters in one of the [`Encoding`] formats. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum EncodedString { + Line21(Line21String), + Latin1(Latin1String), + Ucs2(Ucs2String), + Utf8(String), +} + +/// A placeholder for missing characters. +/// +/// It is used for interconverting between [`Encoding`] formats if the target +/// format does not support a character in the source format. +pub const UNAVAILABLE_CHAR: u8 = b'?'; + +impl Line21String { + /// Creates a new empty [`Line21String`]. + pub fn new() -> Line21String { + Line21String(Vec::new()) + } + + /// Creates a new [`Line21String`] from the contents of given [`Vec`]. + pub fn from_vec(v: Vec) -> Line21String { + Line21String(v) + } + + /// Returns a reference to the internal [`Vec`]. + pub fn as_vec(&self) -> &Vec { + &self.0 + } + + /// Returns a mutable reference to the internal [`Vec`]. + pub fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.0 + } + + /// Returns the internal [`Vec`], consuming this [`Line21String`]. + pub fn into_vec(self) -> Vec { + self.0 + } + + /// Converts this [`Line21String`] to a format provided by `encoding`, returning a new [`EncodedString`]. + pub fn encode_to(&self, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => self.clone().into(), + Encoding::Latin1 => EncodedString::Latin1(self.into()), + Encoding::Ucs2 => EncodedString::Ucs2(self.into()), + Encoding::Utf8 => EncodedString::Utf8(self.into()), + } + } + + /// Converts the [`Line21String`] to lowercase, returning a new [`Line21String`]. + pub fn to_lowercase(&self) -> Line21String { + Line21String::from_vec( + self.as_vec() + .iter() + .map(|&c| line21_to_lowercase(c)) + .collect(), + ) + } + + /// Converts the [`Line21String`] to uppercase, returning a new [`Line21String`]. + pub fn to_uppercase(&self) -> Line21String { + Line21String::from_vec( + self.as_vec() + .iter() + .map(|&c| line21_to_uppercase(c)) + .collect(), + ) + } +} + +impl Latin1String { + /// Creates a new empty [`Latin1String`]. + pub fn new() -> Latin1String { + Latin1String(Vec::new()) + } + + /// Creates a new [`Latin1String`] from the contents of given [`Vec`]. + pub fn from_vec(v: Vec) -> Latin1String { + Latin1String(v) + } + + /// Returns a reference to the internal [`Vec`]. + pub fn as_vec(&self) -> &Vec { + &self.0 + } + + /// Returns a mutable reference to the internal [`Vec`]. + pub fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.0 + } + + /// Returns the internal [`Vec`], consuming this [`Latin1String`]. + pub fn into_vec(self) -> Vec { + self.0 + } + + /// Converts this [`Latin1String`] to a format provided by `encoding`, returning a new [`EncodedString`]. + pub fn encode_to(&self, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => todo!(), + Encoding::Latin1 => self.clone().into(), + Encoding::Ucs2 => EncodedString::Ucs2(self.into()), + Encoding::Utf8 => EncodedString::Utf8(self.into()), + } + } +} + +impl Ucs2String { + /// Creates a new empty [`Ucs2String`]. + pub fn new() -> Ucs2String { + Ucs2String(Vec::new()) + } + + /// Creates a new [`Ucs2String`] from the contents of given [`Vec`]. + pub fn from_vec(v: Vec) -> Ucs2String { + Ucs2String(v) + } + + /// Returns a reference to the internal [`Vec`]. + pub fn as_vec(&self) -> &Vec { + &self.0 + } + + /// Returns a mutable reference to the internal [`Vec`]. + pub fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.0 + } + + /// Returns the internal [`Vec`], consuming this [`Ucs2String`]. + pub fn into_vec(self) -> Vec { + self.0 + } + + /// Converts this [`Ucs2String`] to a format provided by `encoding`, returning a new [`EncodedString`]. + pub fn encode_to(&self, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => EncodedString::Line21(self.into()), + Encoding::Latin1 => EncodedString::Latin1(self.into()), + Encoding::Ucs2 => self.clone().into(), + Encoding::Utf8 => EncodedString::Utf8(self.into()), + } + } +} + +impl From<&Ucs2String> for Line21String { + fn from(value: &Ucs2String) -> Line21String { + Line21String::from_vec(value.as_vec().iter().map(|&c| ucs2_to_line21(c)).collect()) + } +} + +impl From<&str> for Line21String { + fn from(value: &str) -> Line21String { + Line21String::from_vec( + value + .chars() + .map(char_to_ucs2) + .map(ucs2_to_line21) + .collect(), + ) + } +} + +impl From<&Line21String> for Latin1String { + fn from(value: &Line21String) -> Latin1String { + Latin1String::from_vec( + value + .as_vec() + .iter() + .map(|&x| line21_to_latin1(x)) + .collect(), + ) + } +} + +impl From<&Ucs2String> for Latin1String { + fn from(value: &Ucs2String) -> Latin1String { + Latin1String::from_vec(value.as_vec().iter().map(|&c| ucs2_to_latin1(c)).collect()) + } +} + +impl From<&str> for Latin1String { + fn from(value: &str) -> Latin1String { + Latin1String::from_vec( + value + .chars() + .map(char_to_ucs2) + .map(ucs2_to_latin1) + .collect(), + ) + } +} + +impl From<&Line21String> for Ucs2String { + fn from(value: &Line21String) -> Ucs2String { + Ucs2String::from_vec(value.as_vec().iter().map(|&x| line21_to_ucs2(x)).collect()) + } +} + +impl From<&Latin1String> for Ucs2String { + fn from(value: &Latin1String) -> Ucs2String { + Ucs2String::from_vec(value.as_vec().iter().map(|&x| x.into()).collect()) + } +} + +impl From<&str> for Ucs2String { + fn from(value: &str) -> Ucs2String { + Ucs2String::from_vec(value.chars().map(char_to_ucs2).collect()) + } +} + +impl From<&Line21String> for String { + fn from(value: &Line21String) -> String { + value + .as_vec() + .iter() + .map(|&x| line21_to_ucs2(x)) + .map(ucs2_to_char) + .collect() + } +} + +impl From<&Latin1String> for String { + fn from(value: &Latin1String) -> String { + value + .as_vec() + .iter() + .map(|&x| Into::::into(x)) + .collect() + } +} + +impl From<&Ucs2String> for String { + fn from(value: &Ucs2String) -> String { + value.as_vec().iter().map(|&x| ucs2_to_char(x)).collect() + } +} + +impl Default for Line21String { + fn default() -> Self { + Self::new() + } +} + +impl Default for Latin1String { + fn default() -> Self { + Self::new() + } +} + +impl Default for Ucs2String { + fn default() -> Self { + Self::new() + } +} + +impl EncodedString { + /// Creates an [`EncodedString`] with the given `encoding` from string slice. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let s = EncodedString::from_str("è", Encoding::Latin1); + /// assert_eq!(s, Latin1String::from_vec(vec![0xe8]).into()) + /// ``` + pub fn from_str(string: &str, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => EncodedString::Line21(string.into()), + Encoding::Latin1 => EncodedString::Latin1(string.into()), + Encoding::Ucs2 => EncodedString::Ucs2(string.into()), + Encoding::Utf8 => EncodedString::Utf8(string.to_string()), + } + } + + /// Returns the [`Encoding`] format of this [`EncodedString`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let s: EncodedString = Line21String::from_vec(vec![b'a', b'b']).into(); + /// assert_eq!(s.encoding(), Encoding::Line21); + /// ``` + pub fn encoding(&self) -> Encoding { + match self { + EncodedString::Line21(_) => Encoding::Line21, + EncodedString::Latin1(_) => Encoding::Latin1, + EncodedString::Ucs2(_) => Encoding::Ucs2, + EncodedString::Utf8(_) => Encoding::Utf8, + } + } + + /// Converts the [`EncodedString`] to Line 21 format, returning a new [`Line21String`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let s = EncodedString::from_str("Hi 😀", Encoding::Utf8); + /// assert_eq!( + /// s.to_line21(), + /// Line21String::from_vec( + /// vec![0x48, 0x69, 0x20, 0x3f] // "Hi ?" + /// ) + /// ) + /// ``` + pub fn to_line21(&self) -> Line21String { + match self { + EncodedString::Line21(l) => l.clone(), + EncodedString::Latin1(_) => todo!(), + EncodedString::Ucs2(u) => u.into(), + EncodedString::Utf8(s) => s.as_str().into(), + } + } + + /// Converts the [`EncodedString`] to Latin-1 format, returning a new [`Latin1String`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let s = EncodedString::from_str("résumé", Encoding::Utf8); + /// assert_eq!( + /// s.to_latin1(), + /// Latin1String::from_vec( + /// vec![0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9] + /// ) + /// ) + /// ``` + pub fn to_latin1(&self) -> Latin1String { + match self { + EncodedString::Line21(l) => l.into(), + EncodedString::Latin1(l) => l.clone(), + EncodedString::Ucs2(u) => u.into(), + EncodedString::Utf8(s) => s.as_str().into(), + } + } + + /// Converts the [`EncodedString`] to UCS-2 format, returing a new [`Ucs2String`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let v = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let s: EncodedString = Line21String::from_vec(v).into(); + /// assert_eq!( + /// s.to_ucs2(), + /// Ucs2String::from_vec( + /// vec![0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9] + /// ) + /// ) + /// ``` + pub fn to_ucs2(&self) -> Ucs2String { + match self { + EncodedString::Line21(l) => l.into(), + EncodedString::Latin1(l) => l.into(), + EncodedString::Ucs2(u) => u.clone(), + EncodedString::Utf8(s) => s.as_str().into(), + } + } + + /// Converts the [`EncodedString`] to UTF-8 format, returning a new [`String`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let v = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let s: EncodedString = Line21String::from_vec(v).into(); + /// assert_eq!(s.to_utf8(), "résumé".to_string()) + /// ``` + pub fn to_utf8(&self) -> String { + match self { + EncodedString::Line21(l) => l.into(), + EncodedString::Latin1(l) => l.into(), + EncodedString::Ucs2(u) => u.into(), + EncodedString::Utf8(s) => s.clone(), + } + } + + /// Converts this [`EncodedString`] to a format provided by `encoding`, returning a new [`EncodedString`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let v = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let s: EncodedString = Line21String::from_vec(v).into(); + /// assert_eq!(s.encode_to(Encoding::Utf8), "résumé".to_string().into()) + /// ``` + pub fn encode_to(&self, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => EncodedString::Line21(self.to_line21()), + Encoding::Latin1 => EncodedString::Latin1(self.to_latin1()), + Encoding::Ucs2 => EncodedString::Ucs2(self.to_ucs2()), + Encoding::Utf8 => EncodedString::Utf8(self.to_utf8()), + } + } + + /// Converts the [`EncodedString`] to lowercase, returning a new [`EncodedString`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let a = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let b = vec![0x72, 0x91, 0x73, 0x75, 0x6d, 0x91]; // RÉSUMÉ in Line 21 encoding + /// let sa: EncodedString = Line21String::from_vec(a).into(); + /// let sb: EncodedString = Line21String::from_vec(b).into(); + /// assert_eq!(sb.to_lowercase(), sa) + /// ``` + pub fn to_lowercase(&self) -> EncodedString { + match self { + EncodedString::Line21(l) => l.to_lowercase().into(), + EncodedString::Latin1(_) => todo!(), + EncodedString::Ucs2(_) => todo!(), + EncodedString::Utf8(s) => s.to_lowercase().into(), + } + } + + /// Converts the [`EncodedString`] to uppercase, returning a new [`EncodedString`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let a = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let b = vec![0x52, 0x91, 0x53, 0x55, 0x4d, 0x91]; // RÉSUMÉ in Line 21 encoding + /// let sa: EncodedString = Line21String::from_vec(a).into(); + /// let sb: EncodedString = Line21String::from_vec(b).into(); + /// assert_eq!(sa.to_uppercase(), sb) + /// ``` + pub fn to_uppercase(&self) -> EncodedString { + match self { + EncodedString::Line21(l) => l.to_uppercase().into(), + EncodedString::Latin1(_) => todo!(), + EncodedString::Ucs2(_) => todo!(), + EncodedString::Utf8(s) => s.to_uppercase().into(), + } + } +} + +impl From for EncodedString { + fn from(value: Line21String) -> Self { + EncodedString::Line21(value) + } +} + +impl From for EncodedString { + fn from(value: Latin1String) -> Self { + EncodedString::Latin1(value) + } +} + +impl From for EncodedString { + fn from(value: Ucs2String) -> Self { + EncodedString::Ucs2(value) + } +} + +impl From for EncodedString { + fn from(value: String) -> Self { + EncodedString::Utf8(value) + } +} + +fn line21_to_latin1(c: Line21Char) -> Latin1Char { + if c < 0x80 { + // Regular line-21 character set, mostly ASCII except these exceptions + match c { + 0x2a => 0xe1, // lowercase a, acute accent + 0x5c => 0xe9, // lowercase e, acute accent + 0x5e => 0xed, // lowercase i, acute accent + 0x5f => 0xf3, // lowercase o, acute accent + 0x60 => 0xfa, // lowercase u, acute accent + 0x7b => 0xe7, // lowercase c with cedilla + 0x7c => 0xf7, // division symbol + 0x7d => 0xd1, // uppercase N tilde + 0x7e => 0xf1, // lowercase n tilde + 0x7f => UNAVAILABLE_CHAR, // Solid block - Does not exist in Latin 1 + _ => c, + } + } else { + match c { + // THIS BLOCK INCLUDES THE 16 EXTENDED (TWO-BYTE) LINE 21 CHARACTERS + // THAT COME FROM HI BYTE=0x11 AND LOW BETWEEN 0x30 AND 0x3F + 0x80 => 0xae, // Registered symbol (R) + 0x81 => 0xb0, // degree sign + 0x82 => 0xbd, // 1/2 symbol + 0x83 => 0xbf, // Inverted (open) question mark + 0x84 => UNAVAILABLE_CHAR, // Trademark symbol (TM) - Does not exist in Latin 1 + 0x85 => 0xa2, // Cents symbol + 0x86 => 0xa3, // Pounds sterling + 0x87 => 0xb6, // Music note - Not in latin 1, so we use 'pilcrow' + 0x88 => 0xe0, // lowercase a, grave accent + 0x89 => 0x20, // transparent space, we make it regular + 0x8a => 0xe8, // lowercase e, grave accent + 0x8b => 0xe2, // lowercase a, circumflex accent + 0x8c => 0xea, // lowercase e, circumflex accent + 0x8d => 0xee, // lowercase i, circumflex accent + 0x8e => 0xf4, // lowercase o, circumflex accent + 0x8f => 0xfb, // lowercase u, circumflex accent + // THIS BLOCK INCLUDES THE 32 EXTENDED (TWO-BYTE) LINE 21 CHARACTERS + // THAT COME FROM HI BYTE=0x12 AND LOW BETWEEN 0x20 AND 0x3F + 0x90 => 0xc1, // capital letter A with acute + 0x91 => 0xc9, // capital letter E with acute + 0x92 => 0xd3, // capital letter O with acute + 0x93 => 0xda, // capital letter U with acute + 0x94 => 0xdc, // capital letter U with diaeresis + 0x95 => 0xfc, // lowercase letter U with diaeresis + 0x96 => 0x27, // apostrophe + 0x97 => 0xa1, // inverted exclamation mark + 0x98 => 0x2a, // asterisk + 0x99 => 0x27, // apostrophe (yes, duped). See CCADI source code. + 0x9a => 0x2d, // em dash + 0x9b => 0xa9, // copyright sign + 0x9c => UNAVAILABLE_CHAR, // Service Mark - not available in latin 1 + 0x9d => 0x2e, // Full stop (.) + 0x9e => 0x22, // Quotation mark + 0x9f => 0x22, // Quotation mark + 0xa0 => 0xc0, // uppercase A, grave accent + 0xa1 => 0xc2, // uppercase A, circumflex + 0xa2 => 0xc7, // uppercase C with cedilla + 0xa3 => 0xc8, // uppercase E, grave accent + 0xa4 => 0xca, // uppercase E, circumflex + 0xa5 => 0xcb, // capital letter E with diaeresis + 0xa6 => 0xeb, // lowercase letter e with diaeresis + 0xa7 => 0xce, // uppercase I, circumflex + 0xa8 => 0xcf, // uppercase I, with diaeresis + 0xa9 => 0xef, // lowercase i, with diaeresis + 0xaa => 0xd4, // uppercase O, circumflex + 0xab => 0xd9, // uppercase U, grave accent + 0xac => 0xf9, // lowercase u, grave accent + 0xad => 0xdb, // uppercase U, circumflex + 0xae => 0xab, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0xaf => 0xbb, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + // THIS BLOCK INCLUDES THE 32 EXTENDED (TWO-BYTE) LINE 21 CHARACTERS + // THAT COME FROM HI BYTE=0x13 AND LOW BETWEEN 0x20 AND 0x3F + 0xb0 => 0xc3, // Uppercase A, tilde + 0xb1 => 0xe3, // Lowercase a, tilde + 0xb2 => 0xcd, // Uppercase I, acute accent + 0xb3 => 0xcc, // Uppercase I, grave accent + 0xb4 => 0xec, // Lowercase i, grave accent + 0xb5 => 0xd2, // Uppercase O, grave accent + 0xb6 => 0xf2, // Lowercase o, grave accent + 0xb7 => 0xd5, // Uppercase O, tilde + 0xb8 => 0xf5, // Lowercase o, tilde + 0xb9 => 0x7b, // Open curly brace + 0xba => 0x7d, // Closing curly brace + 0xbb => 0x5c, // Backslash + 0xbc => 0x5e, // Caret + 0xbd => 0x5f, // Underscore + 0xbe => 0xa6, // Pipe (broken bar) + 0xbf => 0x7e, // Tilde + 0xc0 => 0xc4, // Uppercase A, umlaut + 0xc1 => 0xe3, // Lowercase A, umlaut + 0xc2 => 0xd6, // Uppercase O, umlaut + 0xc3 => 0xf6, // Lowercase o, umlaut + 0xc4 => 0xdf, // Eszett (sharp S) + 0xc5 => 0xa5, // Yen symbol + 0xc6 => 0xa4, // Currency symbol + 0xc7 => 0x7c, // Vertical bar + 0xc8 => 0xc5, // Uppercase A, ring + 0xc9 => 0xe5, // Lowercase A, ring + 0xca => 0xd8, // Uppercase O, slash + 0xcb => 0xf8, // Lowercase o, slash + 0xcc => UNAVAILABLE_CHAR, // Upper left corner + 0xcd => UNAVAILABLE_CHAR, // Upper right corner + 0xce => UNAVAILABLE_CHAR, // Lower left corner + 0xcf => UNAVAILABLE_CHAR, // Lower right corner + _ => UNAVAILABLE_CHAR, // For those that don't have representation + // I'll do it eventually, I promise + // This are weird chars anyway + } + } +} + +fn line21_to_ucs2(c: Line21Char) -> Ucs2Char { + match c { + 0x7f => 0x25A0, // Solid block + 0x84 => 0x2122, // Trademark symbol (TM) + 0x87 => 0x266a, // Music note + 0x9c => 0x2120, // Service Mark + 0xcc => 0x231c, // Upper left corner + 0xcd => 0x231d, // Upper right corner + 0xce => 0x231e, // Lower left corner + 0xcf => 0x231f, // Lower right corner + _ => line21_to_latin1(c).into(), // Everything else, same as latin-1 followed by 00 + } +} + +fn ucs2_to_line21(c: Ucs2Char) -> Line21Char { + if c < 0x80 { + c as u8 + } else { + UNAVAILABLE_CHAR + } +} + +fn ucs2_to_latin1(c: Ucs2Char) -> Latin1Char { + // Code points 0 to U+00FF are the same in both. + if c < 0xff { + c as u8 + } else { + match c { + 0x0152 => 188, // U+0152 = 0xBC: OE ligature + 0x0153 => 189, // U+0153 = 0xBD: oe ligature + 0x0160 => 166, // U+0160 = 0xA6: S with caron + 0x0161 => 168, // U+0161 = 0xA8: s with caron + 0x0178 => 190, // U+0178 = 0xBE: Y with diaresis + 0x017D => 180, // U+017D = 0xB4: Z with caron + 0x017E => 184, // U+017E = 0xB8: z with caron + 0x20AC => 164, // U+20AC = 0xA4: Euro + _ => UNAVAILABLE_CHAR, + } + } +} + +fn line21_to_lowercase(c: Line21Char) -> Line21Char { + if c.is_ascii_uppercase() { + c - b'A' + b'a' + } else { + match c { + 0x7d => 0x7e, // uppercase N tilde + 0x90 => 0x2a, // capital letter A with acute + 0x91 => 0x5c, // capital letter E with acute + 0x92 => 0x5f, // capital letter O with acute + 0x93 => 0x60, // capital letter U with acute + 0xa2 => 0x7b, // uppercase C with cedilla + 0xa0 => 0x88, // uppercase A, grave accent + 0xa3 => 0x8a, // uppercase E, grave accent + 0xa1 => 0x8b, // uppercase A, circumflex + 0xa4 => 0x8c, // uppercase E, circumflex + 0xa7 => 0x8d, // uppercase I, circumflex + 0xaa => 0x8e, // uppercase O, circumflex + 0xad => 0x8f, // uppercase U, circumflex + 0x94 => 0x95, // capital letter U with diaeresis + 0xa5 => 0xa6, // capital letter E with diaeresis + 0xa8 => 0xa9, // uppercase I, with diaeresis + 0xab => 0xac, // uppercase U, grave accent + 0xb0 => 0xb1, // Uppercase A, tilde + 0xb2 => 0x5e, // Uppercase I, acute accent + 0xb3 => 0xb4, // Uppercase I, grave accent + 0xb5 => 0xb6, // Uppercase O, grave accent + 0xb7 => 0xb8, // Uppercase O, tilde + 0xc0 => 0xc1, // Uppercase A, umlaut + 0xc2 => 0xc3, // Uppercase O, umlaut + 0xc8 => 0xc9, // Uppercase A, ring + 0xca => 0xcb, // Uppercase O, slash + x => x, + } + } +} + +fn line21_to_uppercase(c: Line21Char) -> Line21Char { + if c.is_ascii_lowercase() { + c - b'a' + b'A' + } else { + match c { + 0x7e => 0x7d, // lowercase n tilde + 0x2a => 0x90, // lowercase a, acute accent + 0x5c => 0x91, // lowercase e, acute accent + 0x5e => 0xb2, // lowercase i, acute accent + 0x5f => 0x92, // lowercase o, acute accent + 0x60 => 0x93, // lowercase u, acute accent + 0x7b => 0xa2, // lowercase c with cedilla + 0x88 => 0xa0, // lowercase a, grave accent + 0x8a => 0xa3, // lowercase e, grave accent + 0x8b => 0xa1, // lowercase a, circumflex accent + 0x8c => 0xa4, // lowercase e, circumflex accent + 0x8d => 0xa7, // lowercase i, circumflex accent + 0x8e => 0xaa, // lowercase o, circumflex accent + 0x8f => 0xad, // lowercase u, circumflex accent + 0x95 => 0x94, // lowercase letter U with diaeresis + 0xa6 => 0xa5, // lowercase letter e with diaeresis + 0xa9 => 0xa8, // lowercase i, with diaeresis + 0xac => 0xab, // lowercase u, grave accent + 0xb1 => 0xb0, // Lowercase a, tilde + 0xb4 => 0xb3, // Lowercase i, grave accent + 0xb6 => 0xb5, // Lowercase o, grave accent + 0xb8 => 0xb7, // Lowercase o, tilde + 0xc1 => 0xc0, // Lowercase A, umlaut + 0xc3 => 0xc2, // Lowercase o, umlaut + 0xc9 => 0xc8, // Lowercase A, ring + 0xcb => 0xca, // Lowercase o, slash + x => x, + } + } +} + +fn ucs2_to_char(c: Ucs2Char) -> char { + let x: u32 = c.into(); + char::from_u32(x).unwrap_or(UNAVAILABLE_CHAR.into()) +} + +fn char_to_ucs2(c: char) -> Ucs2Char { + (c as u32).try_into().unwrap_or(UNAVAILABLE_CHAR.into()) +} diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index c39f5e7a3..40e798c4f 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -1,6 +1,7 @@ //! Provides basic utilities used throughout the program. mod bits; +pub mod encoding; pub mod log; pub use bits::*; From ae7f21434c8edde907cf12b551cebf27158e1be0 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 12:32:03 +0530 Subject: [PATCH 04/13] add levenshtein module --- src/rust/lib_ccxr/src/util/levenshtein.rs | 79 +++++++++++++++++++++++ src/rust/lib_ccxr/src/util/mod.rs | 2 + 2 files changed, 81 insertions(+) create mode 100644 src/rust/lib_ccxr/src/util/levenshtein.rs diff --git a/src/rust/lib_ccxr/src/util/levenshtein.rs b/src/rust/lib_ccxr/src/util/levenshtein.rs new file mode 100644 index 000000000..6e7ec7a65 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/levenshtein.rs @@ -0,0 +1,79 @@ +use crate::util::encoding::Ucs2Char; +use crate::util::log::{debug, DebugMessageFlag}; +use std::cmp::min; + +/// Calculates the levenshtein distance between two slices. +/// +/// # Examples +/// ```rust +/// # use lib_ccxr::util::levenshtein; +/// assert_eq!(levenshtein(&[1,2,3,4,5], &[1,3,2,4,5,6]), 3); +/// ``` +pub fn levenshtein(a: &[T], b: &[T]) -> usize { + let mut column: Vec = (0..).take(a.len() + 1).collect(); + + for x in 1..=b.len() { + column[0] = x; + let mut lastdiag = x - 1; + for y in 1..=a.len() { + let olddiag = column[y]; + column[y] = min( + min(column[y] + 1, column[y - 1] + 1), + lastdiag + (if a[y - 1] == b[x - 1] { 0 } else { 1 }), + ); + lastdiag = olddiag; + } + } + + column[a.len()] +} + +/// Check the given two lines can be considered similar using levenshtein +/// distance. +/// +/// If the levenshtein distance between `ucs2_buf1` and `ucs2_buf2` is less than either +/// `levdistmincnt` or `levdistmaxpct`% of the length of the shorter line, then the lines are +/// considered to be similar. `c1` and `c2` are used for displaying a debug message only. +/// +/// # Examples +/// ``` +/// # use lib_ccxr::util::fuzzy_cmp; +/// # use lib_ccxr::util::log::*; +/// # let mask = DebugMessageMask::new(DebugMessageFlag::LEVENSHTEIN, DebugMessageFlag::LEVENSHTEIN); +/// # set_logger(CCExtractorLogger::new(OutputTarget::Quiet, mask, false)); +/// let hello_world = [72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100]; +/// let hello_Aorld = [72, 101, 108, 108, 111, 32, 65, 111, 114, 108, 100]; +/// let helld_Aorld = [72, 101, 108, 108, 100, 32, 65, 111, 114, 108, 100]; +/// +/// // Returns true if both lines are same +/// assert!(fuzzy_cmp("", "", &hello_world, &hello_world, 10, 2)); +/// +/// // Returns true since the distance is 1 which is less than 2. +/// assert!(fuzzy_cmp("", "", &hello_world, &hello_Aorld, 10, 2)); +/// +/// // Returns false since the distance is 2 which is not less than both 2 and 10% of length. +/// assert!(!fuzzy_cmp("", "", &hello_world, &helld_Aorld, 10, 2)); +/// +/// // Returns true since the distance is 1 which is less than 20% of length. +/// assert!(fuzzy_cmp("", "", &hello_world, &hello_Aorld, 20, 2)); +/// ``` +pub fn fuzzy_cmp( + c1: &str, + c2: &str, + ucs2_buf1: &[Ucs2Char], + ucs2_buf2: &[Ucs2Char], + levdistmaxpct: u8, + levdistmincnt: u8, +) -> bool { + let short_len = std::cmp::min(ucs2_buf1.len(), ucs2_buf2.len()); + let max = std::cmp::max( + (short_len * levdistmaxpct as usize) / 100, + levdistmincnt.into(), + ); + + // For the second string, only take the first chars (up to the first string length, that's short_len). + let l = levenshtein(ucs2_buf1, &ucs2_buf2[..short_len]); + let is_same = l < max; + debug!(msg_type = DebugMessageFlag::LEVENSHTEIN; "\rLEV | {} | {} | Max: {} | Calc: {} | Match: {}\n", c1, c2, max, l, is_same); + is_same +} diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index 40e798c4f..dd77fd113 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -2,6 +2,8 @@ mod bits; pub mod encoding; +mod levenshtein; pub mod log; pub use bits::*; +pub use levenshtein::*; From 2c102b65e2ec8ac620103b1300f47380394bb02d Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 12:58:49 +0530 Subject: [PATCH 05/13] add common constants module --- src/rust/lib_ccxr/src/common/constants.rs | 415 ++++++++++++++++++++++ src/rust/lib_ccxr/src/common/mod.rs | 3 + src/rust/lib_ccxr/src/lib.rs | 1 + 3 files changed, 419 insertions(+) create mode 100644 src/rust/lib_ccxr/src/common/constants.rs create mode 100644 src/rust/lib_ccxr/src/common/mod.rs diff --git a/src/rust/lib_ccxr/src/common/constants.rs b/src/rust/lib_ccxr/src/common/constants.rs new file mode 100644 index 000000000..b310c95b9 --- /dev/null +++ b/src/rust/lib_ccxr/src/common/constants.rs @@ -0,0 +1,415 @@ +use std::ffi::OsStr; + +pub const DTVCC_MAX_SERVICES: usize = 63; + +/// An enum of all the available formats for the subtitle output. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum OutputFormat { + Raw, + Srt, + Sami, + Transcript, + Rcwt, + Null, + SmpteTt, + SpuPng, + DvdRaw, // See -d at http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/SCC_TOOLS.HTML#CCExtract + WebVtt, + SimpleXml, + G608, + Curl, + Ssa, + Mcc, + Scc, + Ccd, +} + +// AVC NAL types +pub enum AvcNalType { + Unspecified0 = 0, + CodedSliceNonIdrPicture1 = 1, + CodedSlicePartitionA = 2, + CodedSlicePartitionB = 3, + CodedSlicePartitionC = 4, + CodedSliceIdrPicture = 5, + Sei = 6, + SequenceParameterSet7 = 7, + PictureParameterSet = 8, + AccessUnitDelimiter9 = 9, + EndOfSequence = 10, + EndOfStream = 11, + FillerData = 12, + SequenceParameterSetExtension = 13, + PrefixNalUnit = 14, + SubsetSequenceParameterSet = 15, + Reserved16 = 16, + Reserved17 = 17, + Reserved18 = 18, + CodedSliceAuxiliaryPicture = 19, + CodedSliceExtension = 20, + Reserved21 = 21, + Reserved22 = 22, + Reserved23 = 23, + Unspecified24 = 24, + Unspecified25 = 25, + Unspecified26 = 26, + Unspecified27 = 27, + Unspecified28 = 28, + Unspecified29 = 29, + Unspecified30 = 30, + Unspecified31 = 31, +} + +// MPEG-2 TS stream types +pub enum StreamType { + Unknownstream = 0, + /* + The later constants are defined by MPEG-TS standard + Explore at: https://exiftool.org/TagNames/M2TS.html + */ + VideoMpeg1 = 0x01, + VideoMpeg2 = 0x02, + AudioMpeg1 = 0x03, + AudioMpeg2 = 0x04, + PrivateTableMpeg2 = 0x05, + PrivateMpeg2 = 0x06, + MhegPackets = 0x07, + Mpeg2AnnexADsmCc = 0x08, + ItuTH222_1 = 0x09, + IsoIec13818_6TypeA = 0x0a, + IsoIec13818_6TypeB = 0x0b, + IsoIec13818_6TypeC = 0x0c, + IsoIec13818_6TypeD = 0x0d, + AudioAac = 0x0f, + VideoMpeg4 = 0x10, + VideoH264 = 0x1b, + PrivateUserMpeg2 = 0x80, + AudioAc3 = 0x81, + AudioHdmvDts = 0x82, + AudioDts = 0x8a, +} + +pub enum MpegDescriptor { + /* + The later constants are defined by ETSI EN 300 468 standard + Explore at: https://www.etsi.org/deliver/etsi_en/300400_300499/300468/01.11.01_60/en_300468v011101p.pdf + */ + Registration = 0x05, + DataStreamAlignment = 0x06, + Iso639Language = 0x0a, + VbiDataDescriptor = 0x45, + VbiTeletextDescriptor = 0x46, + TeletextDescriptor = 0x56, + DvbSubtitle = 0x59, + /* User defined */ + CaptionService = 0x86, + DataComp = 0xfd, +} + +pub enum DataSource { + File, + Stdin, + Network, + Tcp, +} + +pub enum StreamMode { + ElementaryOrNotFound = 0, + Transport = 1, + Program = 2, + Asf = 3, + McpoodlesRaw = 4, + Rcwt = 5, // raw captions with time, not used yet. + Myth = 6, // use the myth loop + Mp4 = 7, // mp4, iso- + #[cfg(feature = "wtv_debug")] + HexDump = 8, // hexadecimal dump generated by wtvccdump + Wtv = 9, + #[cfg(feature = "enable_ffmpeg")] + Ffmpeg = 10, + Gxf = 11, + Mkv = 12, + Mxf = 13, + Autodetect = 16, +} + +pub enum BufferdataType { + Unknown, + Pes, + Raw, + H264, + Hauppage, + Teletext, + PrivateMpeg2Cc, + DvbSubtitle, + IsdbSubtitle, + /* BUffer where cc data contain 3 byte cc_valid ccdata 1 ccdata 2 */ + RawType, + DvdSubtitle, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum FrameType { + ResetOrUnknown, + IFrame, + PFrame, + BFrame, + DFrame, +} + +pub enum Codec { + Teletext, + Dvb, + IsdbCc, + AtscCc, +} + +pub enum SelectCodec { + All, + Some(Codec), + None, +} + +/// Caption Distribution Packet +pub enum CdpSectionType { + /* + The later constants are defined by SMPTE ST 334 + Purchase for 80$ at: https://ieeexplore.ieee.org/document/8255806 + */ + Data = 0x72, + SvcInfo = 0x73, + Footer = 0x74, +} + +pub enum Language { + Und, // Undefined + Eng, + Afr, + Amh, + Ara, + Asm, + Aze, + Bel, + Ben, + Bod, + Bos, + Bul, + Cat, + Ceb, + Ces, + Chs, + Chi, + Chr, + Cym, + Dan, + Deu, + Dzo, + Ell, + Enm, + Epo, + Equ, + Est, + Eus, + Fas, + Fin, + Fra, + Frk, + Frm, + Gle, + Glg, + Grc, + Guj, + Hat, + Heb, + Hin, + Hrv, + Hun, + Iku, + Ind, + Isl, + Ita, + Jav, + Jpn, + Kan, + Kat, + Kaz, + Khm, + Kir, + Kor, + Kur, + Lao, + Lat, + Lav, + Lit, + Mal, + Mar, + Mkd, + Mlt, + Msa, + Mya, + Nep, + Nld, + Nor, + Ori, + Osd, + Pan, + Pol, + Por, + Pus, + Ron, + Rus, + San, + Sin, + Slk, + Slv, + Spa, + Sqi, + Srp, + Swa, + Swe, + Syr, + Tam, + Tel, + Tgk, + Tgl, + Tha, + Tir, + Tur, + Uig, + Ukr, + Urd, + Uzb, + Vie, + Yid, +} + +impl OutputFormat { + /// Returns the file extension for the output format if it is a file based format. + pub fn file_extension(&self) -> Option<&OsStr> { + match self { + OutputFormat::Raw => Some(OsStr::new(".raw")), + OutputFormat::Srt => Some(OsStr::new(".srt")), + OutputFormat::Sami => Some(OsStr::new(".smi")), + OutputFormat::Transcript => Some(OsStr::new(".txt")), + OutputFormat::Rcwt => Some(OsStr::new(".bin")), + OutputFormat::Null => None, + OutputFormat::SmpteTt => Some(OsStr::new(".ttml")), + OutputFormat::SpuPng => Some(OsStr::new(".xml")), + OutputFormat::DvdRaw => Some(OsStr::new(".dvdraw")), + OutputFormat::WebVtt => Some(OsStr::new(".vtt")), + OutputFormat::SimpleXml => Some(OsStr::new(".xml")), + OutputFormat::G608 => Some(OsStr::new(".g608")), + OutputFormat::Curl => None, + OutputFormat::Ssa => Some(OsStr::new(".ass")), + OutputFormat::Mcc => Some(OsStr::new(".mcc")), + OutputFormat::Scc => Some(OsStr::new(".scc")), + OutputFormat::Ccd => Some(OsStr::new(".ccd")), + } + } +} + +impl Language { + pub fn to_str(&self) -> &'static str { + match self { + Language::Und => "und", // Undefined + Language::Eng => "eng", + Language::Afr => "afr", + Language::Amh => "amh", + Language::Ara => "ara", + Language::Asm => "asm", + Language::Aze => "aze", + Language::Bel => "bel", + Language::Ben => "ben", + Language::Bod => "bod", + Language::Bos => "bos", + Language::Bul => "bul", + Language::Cat => "cat", + Language::Ceb => "ceb", + Language::Ces => "ces", + Language::Chs => "chs", + Language::Chi => "chi", + Language::Chr => "chr", + Language::Cym => "cym", + Language::Dan => "dan", + Language::Deu => "deu", + Language::Dzo => "dzo", + Language::Ell => "ell", + Language::Enm => "enm", + Language::Epo => "epo", + Language::Equ => "equ", + Language::Est => "est", + Language::Eus => "eus", + Language::Fas => "fas", + Language::Fin => "fin", + Language::Fra => "fra", + Language::Frk => "frk", + Language::Frm => "frm", + Language::Gle => "gle", + Language::Glg => "glg", + Language::Grc => "grc", + Language::Guj => "guj", + Language::Hat => "hat", + Language::Heb => "heb", + Language::Hin => "hin", + Language::Hrv => "hrv", + Language::Hun => "hun", + Language::Iku => "iku", + Language::Ind => "ind", + Language::Isl => "isl", + Language::Ita => "ita", + Language::Jav => "jav", + Language::Jpn => "jpn", + Language::Kan => "kan", + Language::Kat => "kat", + Language::Kaz => "kaz", + Language::Khm => "khm", + Language::Kir => "kir", + Language::Kor => "kor", + Language::Kur => "kur", + Language::Lao => "lao", + Language::Lat => "lat", + Language::Lav => "lav", + Language::Lit => "lit", + Language::Mal => "mal", + Language::Mar => "mar", + Language::Mkd => "mkd", + Language::Mlt => "mlt", + Language::Msa => "msa", + Language::Mya => "mya", + Language::Nep => "nep", + Language::Nld => "nld", + Language::Nor => "nor", + Language::Ori => "ori", + Language::Osd => "osd", + Language::Pan => "pan", + Language::Pol => "pol", + Language::Por => "por", + Language::Pus => "pus", + Language::Ron => "ron", + Language::Rus => "rus", + Language::San => "san", + Language::Sin => "sin", + Language::Slk => "slk", + Language::Slv => "slv", + Language::Spa => "spa", + Language::Sqi => "sqi", + Language::Srp => "srp", + Language::Swa => "swa", + Language::Swe => "swe", + Language::Syr => "syr", + Language::Tam => "tam", + Language::Tel => "tel", + Language::Tgk => "tgk", + Language::Tgl => "tgl", + Language::Tha => "tha", + Language::Tir => "tir", + Language::Tur => "tur", + Language::Uig => "uig", + Language::Ukr => "ukr", + Language::Urd => "urd", + Language::Uzb => "uzb", + Language::Vie => "vie", + Language::Yid => "yid", + } + } +} diff --git a/src/rust/lib_ccxr/src/common/mod.rs b/src/rust/lib_ccxr/src/common/mod.rs new file mode 100644 index 000000000..27aa6cccc --- /dev/null +++ b/src/rust/lib_ccxr/src/common/mod.rs @@ -0,0 +1,3 @@ +mod constants; + +pub use constants::*; diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs index 812d1edf2..45ee8e79c 100644 --- a/src/rust/lib_ccxr/src/lib.rs +++ b/src/rust/lib_ccxr/src/lib.rs @@ -1 +1,2 @@ +pub mod common; pub mod util; From e84df54d6d9f56f3adc273956ddde0f0f483ead6 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 13:18:50 +0530 Subject: [PATCH 06/13] add time module --- src/rust/Cargo.lock | 77 +++ src/rust/lib_ccxr/Cargo.lock | 163 ++++++ src/rust/lib_ccxr/Cargo.toml | 3 + src/rust/lib_ccxr/src/util/mod.rs | 1 + src/rust/lib_ccxr/src/util/time/mod.rs | 17 + src/rust/lib_ccxr/src/util/time/timing.rs | 563 ++++++++++++++++++++ src/rust/lib_ccxr/src/util/time/units.rs | 604 ++++++++++++++++++++++ 7 files changed, 1428 insertions(+) create mode 100644 src/rust/lib_ccxr/src/util/time/mod.rs create mode 100644 src/rust/lib_ccxr/src/util/time/timing.rs create mode 100644 src/rust/lib_ccxr/src/util/time/units.rs diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 9d76606fb..13f32c7db 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -174,6 +174,31 @@ dependencies = [ "vec_map", ] +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "deranged" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", +] + [[package]] name = "dyn_buf" version = "0.1.0" @@ -239,6 +264,12 @@ dependencies = [ "libc", ] +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + [[package]] name = "lazy_static" version = "1.4.0" @@ -267,6 +298,9 @@ name = "lib_ccxr" version = "0.1.0" dependencies = [ "bitflags 2.4.0", + "derive_more", + "thiserror", + "time", ] [[package]] @@ -502,6 +536,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + [[package]] name = "rusty_ffmpeg" version = "0.13.1+ffmpeg.6.0" @@ -516,6 +559,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "semver" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" + [[package]] name = "serde" version = "1.0.188" @@ -626,6 +675,34 @@ dependencies = [ "syn 2.0.29", ] +[[package]] +name = "time" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb39ee79a6d8de55f48f2293a830e040392f1c5f16e336bdd1788cd0aadce07" +dependencies = [ + "deranged", + "itoa", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" + +[[package]] +name = "time-macros" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "733d258752e9303d392b94b75230d07b0b9c489350c69b851fc6c065fde3e8f9" +dependencies = [ + "time-core", +] + [[package]] name = "toml" version = "0.5.11" diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index d7ac327fc..6a653589c 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -8,9 +8,172 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "deranged" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + [[package]] name = "lib_ccxr" version = "0.1.0" dependencies = [ "bitflags", + "derive_more", + "thiserror", + "time", +] + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "semver" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.29", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" +dependencies = [ + "thiserror-impl", ] + +[[package]] +name = "thiserror-impl" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.29", +] + +[[package]] +name = "time" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb39ee79a6d8de55f48f2293a830e040392f1c5f16e336bdd1788cd0aadce07" +dependencies = [ + "deranged", + "itoa", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" + +[[package]] +name = "time-macros" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "733d258752e9303d392b94b75230d07b0b9c489350c69b851fc6c065fde3e8f9" +dependencies = [ + "time-core", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index fb032a7ce..8fbfc6a64 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -6,7 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +thiserror = "1.0.39" +time = { version = "0.3.27", features = ["macros", "formatting"] } bitflags = "2.3.1" +derive_more = "0.99.17" [features] default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index dd77fd113..56ae8e111 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -4,6 +4,7 @@ mod bits; pub mod encoding; mod levenshtein; pub mod log; +pub mod time; pub use bits::*; pub use levenshtein::*; diff --git a/src/rust/lib_ccxr/src/util/time/mod.rs b/src/rust/lib_ccxr/src/util/time/mod.rs new file mode 100644 index 000000000..ce090781b --- /dev/null +++ b/src/rust/lib_ccxr/src/util/time/mod.rs @@ -0,0 +1,17 @@ +//! Provide types for storing time in different formats and manage timing information while +//! decoding. +//! +//! Time can be represented in one of following formats: +//! - [`Timestamp`] as number of milliseconds +//! - [`MpegClockTick`] as number of clock ticks (as defined in the MPEG standard) +//! - [`FrameCount`] as number of frames +//! - [`GopTimeCode`] as a GOP time code (as defined in the MPEG standard) +//! +//! [`GLOBAL_TIMING_INFO`] and [`TimingContext`] are used for managing time-related information +//! during the deocoding process. + +mod timing; +mod units; + +pub use timing::*; +pub use units::*; diff --git a/src/rust/lib_ccxr/src/util/time/timing.rs b/src/rust/lib_ccxr/src/util/time/timing.rs new file mode 100644 index 000000000..4b7399c76 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/time/timing.rs @@ -0,0 +1,563 @@ +use crate::common::FrameType; +use crate::util::log::{debug, info, DebugMessageFlag}; +use crate::util::time::{FrameCount, GopTimeCode, MpegClockTick, Timestamp}; +use std::sync::RwLock; + +/// The maximum allowed difference between [`TimingContext::current_pts`] and [`TimingContext::sync_pts`] in seconds. +/// +/// If the difference crosses this value, a PTS jump has occured and is handled accordingly. +const MAX_DIF: i64 = 5; + +/// A unique global instance of [`GlobalTimingInfo`] to be used throughout the program. +pub static GLOBAL_TIMING_INFO: RwLock = RwLock::new(GlobalTimingInfo::new()); + +const DEFAULT_FRAME_RATE: f64 = 30000.0 / 1001.0; + +/// Represents the status of [`TimingContext::current_pts`] and [`TimingContext::min_pts`] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum PtsSet { + No, + Received, + MinPtsSet, +} + +/// Represent the field of 608 or 708 caption. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum CaptionField { + Field1, + Field2, + Cea708, +} + +/// A collective struct for storing timing-related information when decoding a file. +/// +/// [`GlobalTimingInfo`] serves a similar purpose. The only difference is that its lifetime is +/// global. +#[derive(Debug)] +pub struct TimingContext { + pub pts_set: PtsSet, + /// if true then don't adjust again. + min_pts_adjusted: bool, + pub current_pts: MpegClockTick, + pub current_picture_coding_type: FrameType, + /// Store temporal reference of current frame. + pub current_tref: FrameCount, + pub min_pts: MpegClockTick, + pub sync_pts: MpegClockTick, + /// No screen should start before this FTS + pub minimum_fts: Timestamp, + /// Time stamp of current file (w/ fts_offset, w/o fts_global). + pub fts_now: Timestamp, + /// Time before first sync_pts. + pub fts_offset: Timestamp, + /// Time before first GOP. + pub fts_fc_offset: Timestamp, + /// Remember the maximum fts that we saw in current file. + pub fts_max: Timestamp, + /// Duration of previous files (-ve mode). + pub fts_global: Timestamp, + pub sync_pts2fts_set: bool, + pub sync_pts2fts_fts: Timestamp, + pub sync_pts2fts_pts: MpegClockTick, + /// PTS resets when current_pts is lower than prev. + pts_reset: bool, +} + +/// Settings for overall timing functionality in [`TimingContext`]. +#[derive(Debug)] +pub struct TimingSettings { + /// If true, timeline jumps will be ignored. This is important in several input formats that are assumed to have correct timing, no matter what. + pub disable_sync_check: bool, + + /// If true, there will be no sync at all. Mostly useful for debugging. + pub no_sync: bool, + + // Needs to be set, as it's used in set_fts. + pub is_elementary_stream: bool, +} + +/// A collective struct to store global timing-related information while decoding a file. +/// +/// [`TimingContext`] serves a similar purpose. The only difference is that its lifetime is not +/// global and its information could be reset while execution of program. +#[derive(Debug)] +pub struct GlobalTimingInfo { + // Count 608 (per field) and 708 blocks since last set_fts() call + pub cb_field1: u64, + pub cb_field2: u64, + pub cb_708: u64, + pub pts_big_change: bool, + pub current_fps: f64, + pub frames_since_ref_time: FrameCount, + pub total_frames_count: FrameCount, + pub gop_time: Option, + pub first_gop_time: Option, + pub fts_at_gop_start: Timestamp, + pub gop_rollover: bool, + pub timing_settings: TimingSettings, +} + +impl TimingContext { + /// Create a new [`TimingContext`]. + pub fn new() -> TimingContext { + TimingContext { + pts_set: PtsSet::No, + min_pts_adjusted: false, + current_pts: MpegClockTick::new(0), + current_picture_coding_type: FrameType::ResetOrUnknown, + current_tref: FrameCount::new(0), + min_pts: MpegClockTick::new(0x01FFFFFFFF), + sync_pts: MpegClockTick::new(0), + minimum_fts: Timestamp::from_millis(0), + fts_now: Timestamp::from_millis(0), + fts_offset: Timestamp::from_millis(0), + fts_fc_offset: Timestamp::from_millis(0), + fts_max: Timestamp::from_millis(0), + fts_global: Timestamp::from_millis(0), + sync_pts2fts_set: false, + sync_pts2fts_fts: Timestamp::from_millis(0), + sync_pts2fts_pts: MpegClockTick::new(0), + pts_reset: false, + } + } + + /// Add `pts` to `TimingContext::current_pts`. + /// + /// It also checks for PTS resets. + pub fn add_current_pts(&mut self, pts: MpegClockTick) { + self.set_current_pts(self.current_pts + pts) + } + + /// Set `TimingContext::current_pts` to `pts`. + /// + /// It also checks for PTS resets. + pub fn set_current_pts(&mut self, pts: MpegClockTick) { + let prev_pts = self.current_pts; + self.current_pts = pts; + if self.pts_set == PtsSet::No { + self.pts_set = PtsSet::Received + } + debug!(msg_type = DebugMessageFlag::VIDEO_STREAM; "PTS: {} ({:8})", self.current_pts.as_timestamp().to_hms_millis_time(':').unwrap(), self.current_pts.as_i64()); + debug!(msg_type = DebugMessageFlag::VIDEO_STREAM; " FTS: {} \n", self.fts_now.to_hms_millis_time(':').unwrap()); + + // Check if PTS reset + if self.current_pts < prev_pts { + self.pts_reset = true; + } + } + + pub fn set_fts(&mut self) -> bool { + let mut timing_info = GLOBAL_TIMING_INFO.write().unwrap(); + + let mut pts_jump = false; + + // ES don't have PTS unless GOP timing is used + if self.pts_set == PtsSet::No && timing_info.timing_settings.is_elementary_stream { + return true; + } + + // First check for timeline jump (only when min_pts was set (implies sync_pts)). + if self.pts_set == PtsSet::MinPtsSet { + let dif = if timing_info.timing_settings.disable_sync_check { + // Disables sync check. Used for several input formats. + 0 + } else { + (self.current_pts - self.sync_pts).as_timestamp().seconds() + }; + + // This was -0.2 before. TODO: find out why, since dif is integer? + if !(0..MAX_DIF).contains(&dif) { + // ATSC specs: More than 3501 ms means missing component + info!("\nWarning: Reference clock has changed abruptly ({} seconds filepos={}), attempting to synchronize\n", dif, "Unable to get file position"); // TODO: get the file position somehow + info!("Last sync PTS value: {}\n", self.sync_pts.as_i64()); + info!("Current PTS value: {}\n", self.current_pts.as_i64()); + info!("Note: You can disable this behavior by adding -ignoreptsjumps to the parameters.\n"); + + pts_jump = true; + timing_info.pts_big_change = true; + + // Discard the gap if it is not on an I-frame or temporal reference zero. + if self.current_tref.as_u64() != 0 + && self.current_picture_coding_type != FrameType::IFrame + { + self.fts_now = self.fts_max; + info!("Change did not occur on first frame - probably a broken GOP\n"); + return true; + } + } + } + + // If min_pts was set just before a rollover we compensate by "roll-oving" it too + if self.pts_set == PtsSet::MinPtsSet && !self.min_pts_adjusted { + // min_pts set + // We want to be aware of the upcoming rollover, not after it happened, so we don't take + // the 3 most significant bits but the 3 next ones + let min_pts_big_bits = (self.min_pts.as_i64() >> 30) & 7; + let cur_pts_big_bits = (self.current_pts.as_i64() >> 30) & 7; + if cur_pts_big_bits == 7 && min_pts_big_bits == 0 { + // Huge difference possibly means the first min_pts was actually just over the boundary + // Take the current_pts (smaller, accounting for the rollover) instead + self.min_pts = self.current_pts; + self.min_pts_adjusted = true; + } else if (1..=6).contains(&cur_pts_big_bits) { + // Far enough from the boundary + // Prevent the eventual difference with min_pts to make a bad adjustment + self.min_pts_adjusted = true; + } + } + + // Set min_pts, fts_offset + if self.pts_set != PtsSet::No { + self.pts_set = PtsSet::MinPtsSet; + + // Use this part only the first time min_pts is set. Later treat + // it as a reference clock change + if self.current_pts < self.min_pts && !pts_jump { + // If this is the first GOP, and seq 0 was not encountered yet + // we might reset min_pts/fts_offset again + + self.min_pts = self.current_pts; + + // Avoid next async test + self.sync_pts = self.current_pts + - self + .current_tref + .as_mpeg_clock_tick(timing_info.current_fps); + + if self.current_tref.as_u64() == 0 + || (timing_info.total_frames_count - timing_info.frames_since_ref_time).as_u64() + == 0 + { + // Earliest time in GOP. + // OR + // If this is the first frame (PES) there cannot be an offset. + // This part is also reached for dvr-ms/NTSC (RAW) as + // total_frames_count = frames_since_ref_time = 0 when + // this is called for the first time. + self.fts_offset = Timestamp::from_millis(0); + } else { + // It needs to be "+1" because the current frame is + // not yet counted. + let one_frame = FrameCount::new(1); + self.fts_offset = (timing_info.total_frames_count + - timing_info.frames_since_ref_time + + one_frame) + .as_timestamp(timing_info.current_fps); + } + debug!( + msg_type = DebugMessageFlag::TIME; + "\nFirst sync time PTS: {} {:+}ms (time before this PTS)\n", + self.min_pts.as_timestamp().to_hms_millis_time(':').unwrap(), + self.fts_offset.millis() + ); + debug!( + msg_type = DebugMessageFlag::TIME; + "Total_frames_count {} frames_since_ref_time {}\n", + timing_info.total_frames_count.as_u64(), + timing_info.frames_since_ref_time.as_u64() + ); + } + + // -nosync disables syncing + if pts_jump && !timing_info.timing_settings.no_sync { + // The current time in the old time base is calculated using + // sync_pts (set at the beginning of the last GOP) plus the + // time of the frames since then. + self.fts_offset = self.fts_offset + + (self.sync_pts - self.min_pts).as_timestamp() + + timing_info + .frames_since_ref_time + .as_timestamp(timing_info.current_fps); + self.fts_max = self.fts_offset; + + // Start counting again from here + self.pts_set = PtsSet::Received; // Force min to be set again + self.sync_pts2fts_set = false; // Make note of the new conversion values + + // Avoid next async test - the gap might have occured on + // current_tref != 0. + self.sync_pts = self.current_pts + - self + .current_tref + .as_mpeg_clock_tick(timing_info.current_fps); + // Set min_pts = sync_pts as this is used for fts_now + self.min_pts = self.sync_pts; + + debug!( + msg_type = DebugMessageFlag::TIME; + "\nNew min PTS time: {} {:+}ms (time before this PTS)\n", + self.min_pts.as_timestamp().to_hms_millis_time(':').unwrap(), + self.fts_offset.millis() + ); + } + } + + // Set sync_pts, fts_offset + if self.current_tref.as_u64() == 0 { + self.sync_pts = self.current_pts; + } + + // Reset counters + timing_info.cb_field1 = 0; + timing_info.cb_field2 = 0; + timing_info.cb_708 = 0; + + // Avoid wrong "Calc. difference" and "Asynchronous by" numbers + // for uninitialized min_pts + if true { + // CFS: Remove or think decent condition + if self.pts_set != PtsSet::No { + // If pts_set is TRUE we have min_pts + self.fts_now = (self.current_pts - self.min_pts).as_timestamp() + self.fts_offset; + if !self.sync_pts2fts_set { + self.sync_pts2fts_pts = self.current_pts; + self.sync_pts2fts_fts = self.fts_now; + self.sync_pts2fts_set = true; + } + } else { + // No PTS info at all!! + info!("Set PTS called without any global timestamp set\n"); + return false; + } + } + if self.fts_now > self.fts_max { + self.fts_max = self.fts_now; + } + + // If PTS resets, then fix minimum_fts and fts_max + if self.pts_reset { + self.minimum_fts = Timestamp::from_millis(0); + self.fts_max = self.fts_now; + self.pts_reset = false; + } + + true + } + + /// Returns the total FTS. + /// + /// Caption block counters are included. + pub fn get_fts(&self, current_field: CaptionField) -> Timestamp { + let timing_info = GLOBAL_TIMING_INFO.read().unwrap(); + let count = match current_field { + CaptionField::Field1 => timing_info.cb_field1, + CaptionField::Field2 => timing_info.cb_field2, + CaptionField::Cea708 => timing_info.cb_708, + }; + self.fts_now + self.fts_global + Timestamp::from_millis((count as i64) * 1001 / 30) + } + + /// This returns the maximum FTS that belonged to a frame. + /// + /// Caption block counters are not applicable. + pub fn get_fts_max(&self) -> Timestamp { + self.fts_max + self.fts_global + } + + /// Log FTS and PTS information for debugging purpose. + pub fn print_debug_timing(&self) { + let zero = Timestamp::from_millis(0); + let timing_info = GLOBAL_TIMING_INFO.read().unwrap(); + + let gop_time = timing_info.gop_time.map(|x| x.timestamp()).unwrap_or(zero); + let first_gop_time = timing_info + .first_gop_time + .map(|x| x.timestamp()) + .unwrap_or(zero); + + // Avoid wrong "Calc. difference" and "Asynchronous by" numbers + // for uninitialized min_pts + let tempmin_pts = if self.min_pts.as_i64() == 0x01FFFFFFFF { + self.sync_pts + } else { + self.min_pts + }; + + info!( + "Sync time stamps: PTS: {} ", + self.sync_pts + .as_timestamp() + .to_hms_millis_time(':') + .unwrap() + ); + info!( + " GOP start FTS: {}\n", + gop_time.to_hms_millis_time(':').unwrap() + ); + + // Length first GOP to last GOP + let goplenms = gop_time - first_gop_time; + // Length at last sync point + let ptslenms = (self.sync_pts - tempmin_pts).as_timestamp() + self.fts_offset; + + info!( + "Last FTS: {}", + self.get_fts_max().to_hms_millis_time(':').unwrap() + ); + info!( + " GOP start FTS: {}\n", + timing_info + .fts_at_gop_start + .to_hms_millis_time(':') + .unwrap() + ); + + let one_frame = FrameCount::new(1).as_timestamp(timing_info.current_fps); + + // Times are based on last GOP and/or sync time + info!( + "Max FTS diff. to PTS: {:6}ms GOP: {:6}ms\n\n", + (self.get_fts_max() + one_frame - ptslenms) + .to_hms_millis_time(':') + .unwrap(), + (self.get_fts_max() + one_frame - goplenms) + .to_hms_millis_time(':') + .unwrap() + ); + } + + /// Constructs a [`TimingContext`] from its individual fields. + /// + /// # Safety + /// + /// Make sure that [`TimingContext`] is in a valid state. + #[allow(clippy::too_many_arguments)] + pub unsafe fn from_raw_parts( + pts_set: PtsSet, + min_pts_adjusted: bool, + current_pts: MpegClockTick, + current_picture_coding_type: FrameType, + current_tref: FrameCount, + min_pts: MpegClockTick, + sync_pts: MpegClockTick, + minimum_fts: Timestamp, + fts_now: Timestamp, + fts_offset: Timestamp, + fts_fc_offset: Timestamp, + fts_max: Timestamp, + fts_global: Timestamp, + sync_pts2fts_set: bool, + sync_pts2fts_fts: Timestamp, + sync_pts2fts_pts: MpegClockTick, + pts_reset: bool, + ) -> TimingContext { + TimingContext { + pts_set, + min_pts_adjusted, + current_pts, + current_picture_coding_type, + current_tref, + min_pts, + sync_pts, + minimum_fts, + fts_now, + fts_offset, + fts_fc_offset, + fts_max, + fts_global, + sync_pts2fts_set, + sync_pts2fts_fts, + sync_pts2fts_pts, + pts_reset, + } + } + + /// Returns the individual fields of a [`TimingContext`]. + /// + /// # Safety + /// + /// Certain fields are supposed to be private. + #[allow(clippy::type_complexity)] + pub unsafe fn as_raw_parts( + &self, + ) -> ( + PtsSet, + bool, + MpegClockTick, + FrameType, + FrameCount, + MpegClockTick, + MpegClockTick, + Timestamp, + Timestamp, + Timestamp, + Timestamp, + Timestamp, + Timestamp, + bool, + Timestamp, + MpegClockTick, + bool, + ) { + let TimingContext { + pts_set, + min_pts_adjusted, + current_pts, + current_picture_coding_type, + current_tref, + min_pts, + sync_pts, + minimum_fts, + fts_now, + fts_offset, + fts_fc_offset, + fts_max, + fts_global, + sync_pts2fts_set, + sync_pts2fts_fts, + sync_pts2fts_pts, + pts_reset, + } = *self; + + ( + pts_set, + min_pts_adjusted, + current_pts, + current_picture_coding_type, + current_tref, + min_pts, + sync_pts, + minimum_fts, + fts_now, + fts_offset, + fts_fc_offset, + fts_max, + fts_global, + sync_pts2fts_set, + sync_pts2fts_fts, + sync_pts2fts_pts, + pts_reset, + ) + } +} + +impl GlobalTimingInfo { + /// Create a new instance of [`GlobalTimingInfo`]. + const fn new() -> GlobalTimingInfo { + GlobalTimingInfo { + cb_field1: 0, + cb_field2: 0, + cb_708: 0, + pts_big_change: false, + current_fps: DEFAULT_FRAME_RATE, // 29.97 + // TODO: Get from framerates_values[] instead + frames_since_ref_time: FrameCount::new(0), + total_frames_count: FrameCount::new(0), + gop_time: None, + first_gop_time: None, + fts_at_gop_start: Timestamp::from_millis(0), + gop_rollover: false, + timing_settings: TimingSettings { + disable_sync_check: false, + no_sync: false, + is_elementary_stream: false, + }, + } + } +} + +impl Default for TimingContext { + fn default() -> Self { + Self::new() + } +} diff --git a/src/rust/lib_ccxr/src/util/time/units.rs b/src/rust/lib_ccxr/src/util/time/units.rs new file mode 100644 index 000000000..f90a71787 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/time/units.rs @@ -0,0 +1,604 @@ +use derive_more::{Add, Neg, Sub}; +use std::convert::TryInto; +use std::fmt::Write; +use std::num::TryFromIntError; +use std::time::{SystemTime, UNIX_EPOCH}; +use thiserror::Error; +use time::macros::{datetime, format_description}; +use time::{error::Format, Duration}; + +/// Represents a timestamp in milliseconds. +/// +/// The number can be negetive. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub, Neg)] +pub struct Timestamp { + millis: i64, +} + +/// Represents an error during operations on [`Timestamp`]. +#[derive(Error, Debug)] +pub enum TimestampError { + #[error("input parameter given is out of range")] + InputOutOfRangeError, + #[error("timestamp is out of range")] + OutOfRangeError(#[from] TryFromIntError), + #[error("error ocurred during formatting")] + FormattingError(#[from] std::fmt::Error), + #[error("error ocurred during formatting a date")] + DateFormattingError(#[from] Format), + #[error("error ocurred during parsing")] + ParsingError, +} + +/// Represents the different string formats for [`Timestamp`]. +pub enum TimestampFormat { + /// Format: blank string. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_formatted_time(&mut output, TimestampFormat::None); + /// assert_eq!(output, ""); + /// ``` + None, + + /// Format: `{hour:02}:{minute:02}:{second:02}`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_formatted_time(&mut output, TimestampFormat::HHMMSS); + /// assert_eq!(output, "01:48:44"); + /// ``` + HHMMSS, + + /// Format: `{second:02}{millis_separator}{millis:03}`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_formatted_time( + /// &mut output, + /// TimestampFormat::Seconds { + /// millis_separator: ',', + /// }, + /// ); + /// assert_eq!(output, "6524,365"); + /// ``` + Seconds { millis_separator: char }, + + /// Format: + /// `{year:04}{month:02}{day:02}{hour:02}{minute:02}{second:02}{millis_separator}{millis:03}`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// // 11 March 2023 14:53:36.749 in UNIX timestamp. + /// let timestamp = Timestamp::from_millis(1678546416749); + /// let mut output = String::new(); + /// timestamp.write_formatted_time( + /// &mut output, + /// TimestampFormat::Date { + /// millis_separator: ',', + /// }, + /// ); + /// assert_eq!(output, "20230311145336,749"); + /// ``` + Date { millis_separator: char }, + + /// Format: `{hour:02}:{minute:02}:{second:02},{millis:03}`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_formatted_time(&mut output, TimestampFormat::HHMMSSFFF); + /// assert_eq!(output, "01:48:44,365"); + /// ``` + HHMMSSFFF, +} + +impl Timestamp { + /// Create a new [`Timestamp`] based on the number of milliseconds since the Unix Epoch. + pub fn now() -> Timestamp { + let duration = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("System Time cannot be behind the Unix Epoch"); + + Timestamp { + millis: duration.as_millis() as i64, + } + } + + /// Create a new [`Timestamp`] from number of milliseconds. + pub const fn from_millis(millis: i64) -> Timestamp { + Timestamp { millis } + } + + /// Create a new [`Timestamp`] from hours, minutes, seconds and milliseconds. + /// + /// It will fail if any parameter doesn't follow their respective ranges: + /// + /// | Parameter | Range | + /// |-----------|---------| + /// | minutes | 0 - 59 | + /// | seconds | 0 - 59 | + /// | millis | 0 - 999 | + pub fn from_hms_millis( + hours: u8, + minutes: u8, + seconds: u8, + millis: u16, + ) -> Result { + if minutes < 60 && seconds < 60 && millis < 1000 { + Ok(Timestamp::from_millis( + (hours as i64) * 3_600_000 + + (minutes as i64) * 60_000 + + (seconds as i64) * 1000 + + millis as i64, + )) + } else { + Err(TimestampError::InputOutOfRangeError) + } + } + + /// Returns the number of milliseconds. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.millis(), 6524365); + /// ``` + pub fn millis(&self) -> i64 { + self.millis + } + + /// Returns the number of whole seconds. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.seconds(), 6524); + /// ``` + pub fn seconds(&self) -> i64 { + self.millis / 1000 + } + + /// Returns the number of whole seconds and leftover milliseconds as unsigned integers. + /// + /// It will return an [`TimestampError::OutOfRangeError`] if the timestamp is negetive. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.as_sec_millis().unwrap(), (6524, 365)); + /// ``` + pub fn as_sec_millis(&self) -> Result<(u64, u16), TimestampError> { + let millis: u64 = self.millis.try_into()?; + let s = millis / 1000; + let u = millis % 1000; + Ok((s, u as u16)) + } + + /// Returns the time in the form of hours, minutes, seconds and milliseconds as unsigned + /// integers. + /// + /// It will return an [`TimestampError::OutOfRangeError`] if the timestamp is negetive. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.as_hms_millis().unwrap(), (1, 48, 44, 365)); + /// ``` + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampError}; + /// let timestamp = Timestamp::from_millis(1678546416749); + /// assert!(matches!( + /// timestamp.as_hms_millis().unwrap_err(), + /// TimestampError::OutOfRangeError(_) + /// )); + /// ``` + pub fn as_hms_millis(&self) -> Result<(u8, u8, u8, u16), TimestampError> { + let millis: u64 = self.millis.try_into()?; + let h = millis / 3600000; + let m = millis / 60000 - 60 * h; + let s = millis / 1000 - 3600 * h - 60 * m; + let u = millis - 3600000 * h - 60000 * m - 1000 * s; + if h > 24 { + println!("{}", h) + } + Ok((h.try_into()?, m as u8, s as u8, u as u16)) + } + + /// Fills `output` with the [`Timestamp`] using SRT's timestamp format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_srt_time(&mut output); + /// assert_eq!(output, "01:48:44,365"); + /// ``` + pub fn write_srt_time(&self, output: &mut String) -> Result<(), TimestampError> { + let (h, m, s, u) = self.as_hms_millis()?; + write!(output, "{:02}:{:02}:{:02},{:03}", h, m, s, u)?; + Ok(()) + } + + /// Fills `output` with the [`Timestamp`] using VTT's timestamp format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_vtt_time(&mut output); + /// assert_eq!(output, "01:48:44.365"); + /// ``` + pub fn write_vtt_time(&self, output: &mut String) -> Result<(), TimestampError> { + let (h, m, s, u) = self.as_hms_millis()?; + write!(output, "{:02}:{:02}:{:02}.{:03}", h, m, s, u)?; + Ok(()) + } + + /// Fills `output` with the [`Timestamp`] using + /// "{sign}{hour:02}:{minute:02}:{second:02}{sep}{millis:03}" format, where `sign` can be `-` + /// if time is negetive or blank if it is positive. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_hms_millis_time(&mut output, ':'); + /// assert_eq!(output, "01:48:44:365"); + /// ``` + pub fn write_hms_millis_time( + &self, + output: &mut String, + sep: char, + ) -> Result<(), TimestampError> { + let sign = if self.millis < 0 { "-" } else { "" }; + let timestamp = if self.millis < 0 { -*self } else { *self }; + let (h, m, s, u) = timestamp.as_hms_millis()?; + write!(output, "{}{:02}:{:02}:{:02}{}{:03}", sign, h, m, s, sep, u)?; + Ok(()) + } + + /// Fills `output` with the [`Timestamp`] using ctime's format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_ctime(&mut output); + /// assert_eq!(output, "Thu Jan 01 01:48:44 1970"); + /// ``` + pub fn write_ctime(&self, output: &mut String) -> Result<(), TimestampError> { + let (sec, millis) = self.as_sec_millis()?; + let d = datetime!(1970-01-01 0:00) + + Duration::new(sec.try_into()?, (millis as i32) * 1_000_000); + let format = format_description!( + "[weekday repr:short] [month repr:short] [day] [hour]:[minute]:[second] [year]" + ); + write!(output, "{}", d.format(&format)?)?; + Ok(()) + } + + /// Fills `output` with the [`Timestamp`] using format specified by [`TimestampFormat`]. + /// + /// See [`TimestampFormat`] for examples. + pub fn write_formatted_time( + &self, + output: &mut String, + format: TimestampFormat, + ) -> Result<(), TimestampError> { + match format { + TimestampFormat::None => Ok(()), + TimestampFormat::HHMMSS => { + let (h, m, s, _) = self.as_hms_millis()?; + write!(output, "{:02}:{:02}:{:02}", h, m, s)?; + Ok(()) + } + TimestampFormat::Seconds { millis_separator } => { + let (sec, millis) = self.as_sec_millis()?; + write!(output, "{}{}{:03}", sec, millis_separator, millis)?; + Ok(()) + } + TimestampFormat::Date { millis_separator } => { + let (sec, millis) = self.as_sec_millis()?; + let d = datetime!(1970-01-01 0:00) + + Duration::new(sec.try_into()?, (millis as i32) * 1_000_000); + let format1 = format_description!("[year][month][day][hour][minute][second]"); + let format2 = format_description!("[subsecond digits:3]"); + + write!( + output, + "{}{}{}", + d.format(&format1)?, + millis_separator, + d.format(&format2)? + )?; + Ok(()) + } + TimestampFormat::HHMMSSFFF => self.write_srt_time(output), + } + } + + pub fn to_srt_time(&self) -> Result { + let mut s = String::new(); + self.write_srt_time(&mut s)?; + Ok(s) + } + + pub fn to_vtt_time(&self) -> Result { + let mut s = String::new(); + self.write_vtt_time(&mut s)?; + Ok(s) + } + + pub fn to_hms_millis_time(&self, sep: char) -> Result { + let mut s = String::new(); + self.write_hms_millis_time(&mut s, sep)?; + Ok(s) + } + + pub fn to_ctime(&self) -> Result { + let mut s = String::new(); + self.write_ctime(&mut s)?; + Ok(s) + } + + pub fn to_formatted_time(&self, format: TimestampFormat) -> Result { + let mut s = String::new(); + self.write_formatted_time(&mut s, format)?; + Ok(s) + } + + /// Creates a [`Timestamp`] by parsing `input` using format `SS` or `MM:SS` or `HH:MM:SS`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::parse_optional_hhmmss_from_str("01:12:45").unwrap(); + /// assert_eq!(timestamp, Timestamp::from_millis(4_365_000)); + /// ``` + pub fn parse_optional_hhmmss_from_str(input: &str) -> Result { + let mut numbers = input + .split(':') + .map(|x| x.parse::().map_err(|_| TimestampError::ParsingError)) + .rev(); + + let mut millis: u64 = 0; + + let seconds: u64 = numbers.next().ok_or(TimestampError::ParsingError)??.into(); + if seconds > 59 { + return Err(TimestampError::InputOutOfRangeError); + } + millis += seconds * 1000; + + if let Some(x) = numbers.next() { + let minutes: u64 = x?.into(); + if minutes > 59 { + return Err(TimestampError::InputOutOfRangeError); + } + millis += 60_000 * minutes; + } + + if let Some(x) = numbers.next() { + let hours: u64 = x?.into(); + millis += 3_600_000 * hours; + } + + if numbers.next().is_some() { + return Err(TimestampError::ParsingError); + } + + Ok(Timestamp::from_millis(millis.try_into()?)) + } +} + +/// Represent the number of clock ticks as defined in Mpeg standard. +/// +/// This number can never be negetive. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub)] +pub struct MpegClockTick(i64); + +impl MpegClockTick { + /// The ratio to convert a clock tick to time duration. + pub const MPEG_CLOCK_FREQ: i64 = 90000; + + /// Create a value representing `ticks` clock ticks. + pub fn new(ticks: i64) -> MpegClockTick { + MpegClockTick(ticks) + } + + /// Returns the number of clock ticks. + pub fn as_i64(&self) -> i64 { + self.0 + } + + /// Converts the clock ticks to its equivalent time duration. + /// + /// The conversion ratio used is [`MpegClockTick::MPEG_CLOCK_FREQ`]. + pub fn as_timestamp(&self) -> Timestamp { + Timestamp::from_millis(self.0 / (MpegClockTick::MPEG_CLOCK_FREQ / 1000)) + } +} + +/// Represents the number of frames. +/// +/// This number can never be negetive. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub)] +pub struct FrameCount(u64); + +impl FrameCount { + /// Create a value representing `frames` number of frames. + pub const fn new(frames: u64) -> FrameCount { + FrameCount(frames) + } + + /// Returns the number of frames. + pub fn as_u64(&self) -> u64 { + self.0 + } + + /// Converts the frames to its equivalent time duration. + /// + /// The conversion ratio used is `fps`. + pub fn as_timestamp(&self, fps: f64) -> Timestamp { + Timestamp::from_millis((self.0 as f64 * 1000.0 / fps) as i64) + } + + /// Converts the frames to its equivalent number of clock ticks. + /// + /// The conversion ratio used is [`MpegClockTick::MPEG_CLOCK_FREQ`] and `fps`. + pub fn as_mpeg_clock_tick(&self, fps: f64) -> MpegClockTick { + MpegClockTick::new(((self.0 * MpegClockTick::MPEG_CLOCK_FREQ as u64) as f64 / fps) as i64) + } +} + +/// Represents a GOP Time code as defined in the Mpeg standard. +/// +/// This structure stores its time in the form of hours, minutes, seconds and pictures. This +/// structure also stores its time in the form of a [`Timestamp`] when it is created. This +/// [`Timestamp`] can be modified by [`timestamp_mut`](GopTimeCode::timestamp_mut) and an +/// additional 24 hours may be added on rollover, so it is not necessary that the above two +/// formats refer to the same time. Therefore it is recommended to only rely on the +/// [`Timestamp`] instead of the other format. +#[derive(Copy, Clone, Debug)] +pub struct GopTimeCode { + drop_frame: bool, + time_code_hours: u8, + time_code_minutes: u8, + time_code_seconds: u8, + time_code_pictures: u8, + timestamp: Timestamp, +} + +impl GopTimeCode { + /// Create a new [`GopTimeCode`] from the specified parameters. + /// + /// The number of frames or pictures is converted to time duration using `fps`. + /// + /// If `rollover` is true, then an extra of 24 hours will added. + /// + /// It will return [`None`] if any parameter doesn't follow their respective ranges: + /// + /// | Parameter | Range | + /// |-----------|--------| + /// | hours | 0 - 23 | + /// | minutes | 0 - 59 | + /// | seconds | 0 - 59 | + /// | pictures | 0 - 59 | + pub fn new( + drop_frame: bool, + hours: u8, + minutes: u8, + seconds: u8, + pictures: u8, + fps: f64, + rollover: bool, + ) -> Option { + if hours < 24 && minutes < 60 && seconds < 60 && pictures < 60 { + let millis = (1000.0 * (pictures as f64) / fps) as u16; + let extra_hours = if rollover { 24 } else { 0 }; + let timestamp = + Timestamp::from_hms_millis(hours + extra_hours, minutes, seconds, millis) + .expect("The fps given is probably too low"); + + Some(GopTimeCode { + drop_frame, + time_code_hours: hours, + time_code_minutes: minutes, + time_code_seconds: seconds, + time_code_pictures: pictures, + timestamp, + }) + } else { + None + } + } + + /// Returns the GOP time code in its equivalent time duration. + pub fn timestamp(&self) -> Timestamp { + self.timestamp + } + + /// Returns a mutable reference to internal [`Timestamp`]. + pub fn timestamp_mut(&mut self) -> &mut Timestamp { + &mut self.timestamp + } + + /// Check if a rollover has ocurred by comparing the previous [`GopTimeCode`] that is `prev` + /// with the current [`GopTimeCode`]. + pub fn did_rollover(&self, prev: &GopTimeCode) -> bool { + prev.time_code_hours == 23 + && prev.time_code_minutes == 59 + && self.time_code_hours == 0 + && self.time_code_minutes == 0 + } + + /// Constructs a [`GopTimeCode`] from its individual fields. + /// + /// # Safety + /// + /// The fields other than [`Timestamp`] may not be accurate if it is changed using + /// [`timestamp_mut`](GopTimeCode::timestamp_mut). + pub unsafe fn from_raw_parts( + drop_frame: bool, + hours: u8, + minutes: u8, + seconds: u8, + pictures: u8, + timestamp: Timestamp, + ) -> GopTimeCode { + GopTimeCode { + drop_frame, + time_code_hours: hours, + time_code_minutes: minutes, + time_code_seconds: seconds, + time_code_pictures: pictures, + timestamp, + } + } + + /// Returns the individuals field of a [`GopTimeCode`]. + /// + /// # Safety + /// + /// The fields other than [`Timestamp`] may not be accurate if it is changed using + /// [`timestamp_mut`](GopTimeCode::timestamp_mut). + pub unsafe fn as_raw_parts(&self) -> (bool, u8, u8, u8, u8, Timestamp) { + let GopTimeCode { + drop_frame, + time_code_hours, + time_code_minutes, + time_code_seconds, + time_code_pictures, + timestamp, + } = *self; + + ( + drop_frame, + time_code_hours, + time_code_minutes, + time_code_seconds, + time_code_pictures, + timestamp, + ) + } +} From 50bfa6fdb29604eb3bfd95a4826d96abc256d7ff Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 13:32:00 +0530 Subject: [PATCH 07/13] add net module --- src/rust/Cargo.lock | 157 ++++++++++ src/rust/lib_ccxr/Cargo.lock | 175 +++++++++++ src/rust/lib_ccxr/Cargo.toml | 2 + src/rust/lib_ccxr/src/util/mod.rs | 1 + src/rust/lib_ccxr/src/util/net/common.rs | 353 +++++++++++++++++++++++ src/rust/lib_ccxr/src/util/net/mod.rs | 20 ++ src/rust/lib_ccxr/src/util/net/source.rs | 309 ++++++++++++++++++++ src/rust/lib_ccxr/src/util/net/target.rs | 276 ++++++++++++++++++ 8 files changed, 1293 insertions(+) create mode 100644 src/rust/lib_ccxr/src/util/net/common.rs create mode 100644 src/rust/lib_ccxr/src/util/net/mod.rs create mode 100644 src/rust/lib_ccxr/src/util/net/source.rs create mode 100644 src/rust/lib_ccxr/src/util/net/target.rs diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 13f32c7db..6a5e69b6f 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -224,6 +224,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "find-crate" version = "0.6.3" @@ -239,6 +245,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -264,6 +276,16 @@ dependencies = [ "libc", ] +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "itoa" version = "1.0.9" @@ -299,6 +321,8 @@ version = "0.1.0" dependencies = [ "bitflags 2.4.0", "derive_more", + "num_enum", + "socket2", "thiserror", "time", ] @@ -366,6 +390,27 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_enum" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.29", +] + [[package]] name = "once_cell" version = "1.18.0" @@ -456,6 +501,16 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.66" @@ -597,6 +652,16 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +[[package]] +name = "socket2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +dependencies = [ + "libc", + "windows-sys", +] + [[package]] name = "strsim" version = "0.8.0" @@ -712,6 +777,23 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" + +[[package]] +name = "toml_edit" +version = "0.19.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "unicode-ident" version = "1.0.11" @@ -792,3 +874,78 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "winnow" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c2e3184b9c4e92ad5167ca73039d0c42476302ab603e2fec4487511f38ccefc" +dependencies = [ + "memchr", +] diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index 6a653589c..f8442eb8a 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -33,6 +33,28 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "itoa" version = "1.0.9" @@ -45,10 +67,61 @@ version = "0.1.0" dependencies = [ "bitflags", "derive_more", + "num_enum", + "socket2", "thiserror", "time", ] +[[package]] +name = "libc" +version = "0.2.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "num_enum" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.29", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.66" @@ -102,6 +175,16 @@ dependencies = [ "syn 2.0.29", ] +[[package]] +name = "socket2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +dependencies = [ + "libc", + "windows-sys", +] + [[package]] name = "syn" version = "1.0.109" @@ -172,8 +255,100 @@ dependencies = [ "time-core", ] +[[package]] +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" + +[[package]] +name = "toml_edit" +version = "0.19.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "unicode-ident" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "winnow" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c2e3184b9c4e92ad5167ca73039d0c42476302ab603e2fec4487511f38ccefc" +dependencies = [ + "memchr", +] diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index 8fbfc6a64..1599b43be 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -6,10 +6,12 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +socket2 = "0.5.3" thiserror = "1.0.39" time = { version = "0.3.27", features = ["macros", "formatting"] } bitflags = "2.3.1" derive_more = "0.99.17" +num_enum = "0.6.1" [features] default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index 56ae8e111..7b2316da2 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -4,6 +4,7 @@ mod bits; pub mod encoding; mod levenshtein; pub mod log; +pub mod net; pub mod time; pub use bits::*; diff --git a/src/rust/lib_ccxr/src/util/net/common.rs b/src/rust/lib_ccxr/src/util/net/common.rs new file mode 100644 index 000000000..77bad794b --- /dev/null +++ b/src/rust/lib_ccxr/src/util/net/common.rs @@ -0,0 +1,353 @@ +use crate::util::time::Timestamp; +use num_enum::{IntoPrimitive, TryFromPrimitive}; +use std::borrow::Cow; +use std::fmt; +use std::fmt::{Display, Formatter}; +use std::io; +use std::io::Write; +use thiserror::Error; + +/// Default port to be used when port number is not specified for TCP. +pub const DEFAULT_TCP_PORT: u16 = 2048; + +/// The amount of time to wait for a response before reseting the connection. +pub const NO_RESPONSE_INTERVAL: Timestamp = Timestamp::from_millis(20_000); + +/// The time interval between sending ping messages. +pub const PING_INTERVAL: Timestamp = Timestamp::from_millis(3_000); + +/// The size of the `length` section of the [`Block`]'s byte format. +/// +/// See [`BlockStream`] for more information. +pub const LEN_SIZE: usize = 10; + +/// The sequence of bytes used to denote the end of a [`Block`] in its byte format. +/// +/// See [`BlockStream`] for more information. +pub const END_MARKER: &str = "\r\n"; + +/// Represents the different kinds of commands that could be sent or received in a block. +#[derive(Copy, Clone, Debug, Eq, PartialEq, IntoPrimitive, TryFromPrimitive)] +#[repr(u8)] +pub enum Command { + Ok = 1, + + /// Used to send password just after making a TCP connection. + Password = 2, + BinMode = 3, + + /// Used to send description just after making a TCP connection. + CcDesc = 4, + BinHeader = 5, + BinData = 6, + + /// Used to send EPG metadata across network. + EpgData = 7, + Error = 51, + UnknownCommand = 52, + WrongPassword = 53, + ConnLimit = 54, + + /// Used to send ping messages to check network connectivity. + Ping = 55, +} + +/// Represents the smallest unit of data that could be sent or received from network. +/// +/// A [`Block`] consists of a [`Command`] and some binary data associated with it. The kind of +/// block is denoted by its [`Command`]. The binary data has different formats or information based +/// on the kind of [`Command`]. +/// +/// Any subtitle data, metadata, ping, etc, that needs to be sent by network goes through in the +/// form of a [`Block`]. See [`BlockStream`] for more information on how a [`Block`] is sent or +/// received. +pub struct Block<'a> { + command: Command, + data: Cow<'a, [u8]>, +} + +impl<'a> Block<'a> { + fn new(command: Command, data: &'a [u8]) -> Block<'a> { + Block { + command, + data: Cow::from(data), + } + } + + fn new_owned(command: Command, data: Vec) -> Block<'a> { + Block { + command, + data: Cow::from(data), + } + } + + /// Create a new [`Ping`](Command::Ping) Block. + pub fn ping() -> Block<'a> { + Block::new_owned(Command::Ping, vec![]) + } + + /// Create a new [`BinHeader`](Command::BinHeader) Block along with `header` data. + pub fn bin_header(header: &'a [u8]) -> Block<'a> { + Block::new(Command::BinHeader, header) + } + + /// Create a new [`BinData`](Command::BinData) Block along with `data`. + pub fn bin_data(data: &'a [u8]) -> Block<'a> { + Block::new(Command::BinData, data) + } + + /// Create a new [`Password`](Command::Password) Block along with `password` data. + /// + /// The data of the returned [`Block`] will consist of `password` encoded as UTF-8 bytes which + /// is not nul-terminated. + /// + /// # Examples + /// ``` + /// # use lib_ccxr::util::net::Block; + /// let b = Block::password("A"); + /// assert_eq!(b.data(), &[b'A']); + /// ``` + pub fn password(password: &'a str) -> Block<'a> { + Block::new(Command::Password, password.as_bytes()) + } + + /// Create a new [`CcDesc`](Command::CcDesc) Block along with `desc` data. + /// + /// The data of the returned [`Block`] will consist of `desc` encoded as UTF-8 bytes which is + /// not nul-terminated. + /// + /// # Examples + /// ``` + /// # use lib_ccxr::util::net::Block; + /// let b = Block::cc_desc("Teletext"); + /// assert_eq!(b.data(), &[b'T', b'e', b'l', b'e', b't', b'e', b'x', b't']); + /// ``` + pub fn cc_desc(desc: &'a str) -> Block<'a> { + Block::new(Command::CcDesc, desc.as_bytes()) + } + + /// Create a new [`EpgData`](Command::EpgData) Block along with the related metadata used in + /// EPG. + /// + /// All the parameters are encoded as UTF-8 bytes which are nul-terminated. If a parameter is + /// [`None`], then it is considered to be equivalent to an empty String. All these + /// nul-terminated UTF-8 bytes are placed one after the other in the order of `start`, `stop`, + /// `title`, `desc`, `lang`, `category` with nul character acting as the seperator between + /// these sections. + /// + /// # Examples + /// ``` + /// # use lib_ccxr::util::net::Block; + /// let b = Block::epg_data("A", "B", Some("C"), None, Some("D"), None); + /// assert_eq!(b.data(), &[b'A', b'\0', b'B', b'\0', b'C', b'\0', b'\0', b'D', b'\0', b'\0']); + /// ``` + pub fn epg_data( + start: &str, + stop: &str, + title: Option<&str>, + desc: Option<&str>, + lang: Option<&str>, + category: Option<&str>, + ) -> Block<'a> { + let title = title.unwrap_or(""); + let desc = desc.unwrap_or(""); + let lang = lang.unwrap_or(""); + let category = category.unwrap_or(""); + + // Plus 1 to accomodate space for the nul character + let start_len = start.len() + 1; + let stop_len = stop.len() + 1; + let title_len = title.len() + 1; + let desc_len = desc.len() + 1; + let lang_len = lang.len() + 1; + let category_len = category.len() + 1; + + let total_len = start_len + stop_len + title_len + desc_len + lang_len + category_len; + let mut data = Vec::with_capacity(total_len); + + data.extend_from_slice(start.as_bytes()); + data.extend_from_slice("\0".as_bytes()); + data.extend_from_slice(stop.as_bytes()); + data.extend_from_slice("\0".as_bytes()); + data.extend_from_slice(title.as_bytes()); + data.extend_from_slice("\0".as_bytes()); + data.extend_from_slice(desc.as_bytes()); + data.extend_from_slice("\0".as_bytes()); + data.extend_from_slice(lang.as_bytes()); + data.extend_from_slice("\0".as_bytes()); + data.extend_from_slice(category.as_bytes()); + data.extend_from_slice("\0".as_bytes()); + + Block::new_owned(Command::EpgData, data) + } + + /// Returns the kind of [`Block`] denoted by its [`Command`]. + pub fn command(&self) -> Command { + self.command + } + + /// Returns the associated data of [`Block`]. + pub fn data(&self) -> &[u8] { + &self.data + } +} + +impl Display for Block<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let _ = write!(f, "[C] {:?} {} ", self.command, self.data.len()); + + if self.command != Command::BinHeader && self.command != Command::BinData { + let _ = write!(f, "{} ", &*String::from_utf8_lossy(&self.data)); + } + + let _ = write!(f, "\\r\\n"); + + Ok(()) + } +} + +/// The [`BlockStream`] trait allows for sending and receiving [`Block`]s across the network. +/// +/// The only two implementers of [`BlockStream`] are [`SendTarget`] and [`RecvSource`] which are +/// used for sending and receiving blocks respectively. +/// +/// This trait provides an abstraction over the different interfaces of [`TcpStream`] and +/// [`UdpSocket`]. The implementers only need to implement the functionality to send and receive +/// bytes through network by [`BlockStream::send`] and [`BlockStream::recv`]. The functionality to +/// send and receive [`Block`] will be automatically made available by [`BlockStream::send_block`] +/// and [`BlockStream::recv_block`]. +/// +/// A [`Block`] is sent or received across the network using a byte format. Since a [`Block`] +/// consists of `command` and variable sized `data`, it is encoded in the following way. +/// +/// | Section | Length | Description | +/// |------------|--------------|---------------------------------------------------------------------------| +/// | command | 1 | The `command` enconded as its corresponding byte value. | +/// | length | [`LEN_SIZE`] | The length of `data` encoded as nul-terminated string form of the number. | +/// | data | length | The associated `data` bytes whose meaning is dependent on `command`. | +/// | end_marker | 2 | This value is always [`END_MARKER`], signifying end of current block. | +/// +/// The only exception to the above format is a [`Ping`] [`Block`] which is encoded only with its 1-byte +/// command section. It does not have length, data or end_marker sections. +/// +/// [`SendTarget`]: super::target::SendTarget +/// [`RecvSource`]: super::source::RecvSource +/// [`TcpStream`]: std::net::TcpStream +/// [`UdpSocket`]: std::net::UdpSocket +/// [`Ping`]: Command::Ping +pub trait BlockStream { + /// Send the bytes in `buf` across the network. + fn send(&mut self, buf: &[u8]) -> io::Result; + + /// Receive the bytes from network and place them in `buf`. + fn recv(&mut self, buf: &mut [u8]) -> io::Result; + + /// Send a [`Block`] across the network. + /// + /// It returns a [`NetError`] if some transmission failure ocurred, or else it will return a bool + /// that indicates the status of this connection. It will be `false` if the connection shutdown + /// correctly. + fn send_block(&mut self, block: &Block<'_>) -> Result { + let Block { command, data } = block; + + if self.send(&[(*command).into()])? != 1 { + return Ok(false); + } + + if *command == Command::Ping { + return Ok(true); + } + + let mut length_part = [b'\0'; LEN_SIZE]; + let _ = write!(length_part.as_mut_slice(), "{}", data.len()); + if self.send(&length_part)? != LEN_SIZE { + return Ok(false); + } + + if self.send(data)? != data.len() { + return Ok(false); + } + + if self.send(END_MARKER.as_bytes())? != END_MARKER.len() { + return Ok(false); + } + + #[cfg(feature = "debug_out")] + { + eprintln!("{}", block); + } + + Ok(true) + } + + /// Receive a [`Block`] from the network. + /// + /// It returns a [`NetError`] if some transmission failure ocurred or byte format is violated. + /// It will return a [`None`] if the connection has shutdown down correctly. + fn recv_block<'a>(&mut self) -> Result>, NetError> { + let mut command_byte = [0u8; 1]; + if self.recv(&mut command_byte)? != 1 { + return Ok(None); + } + let command: Command = command_byte[0] + .try_into() + .map_err(|_| NetError::InvalidBytes { + location: "command", + })?; + + if command == Command::Ping { + return Ok(Some(Block::ping())); + } + + let mut length_bytes = [0u8; LEN_SIZE]; + if self.recv(&mut length_bytes)? != LEN_SIZE { + return Ok(None); + } + let end = length_bytes + .iter() + .position(|&x| x == b'\0') + .unwrap_or(LEN_SIZE); + let length: usize = String::from_utf8_lossy(&length_bytes[0..end]) + .parse() + .map_err(|_| NetError::InvalidBytes { location: "length" })?; + + let mut data = vec![0u8; length]; + if self.recv(&mut data)? != length { + return Ok(None); + } + + let mut end_marker = [0u8; END_MARKER.len()]; + if self.recv(&mut end_marker)? != END_MARKER.len() { + return Ok(None); + } + if end_marker != END_MARKER.as_bytes() { + return Err(NetError::InvalidBytes { + location: "end_marker", + }); + } + + let block = Block::new_owned(command, data); + + #[cfg(feature = "debug_out")] + { + eprintln!("{}", block); + } + + Ok(Some(block)) + } +} + +/// A collective [`Error`](std::error::Error) type that encompasses all the possible error cases +/// when sending, receiving or parsing data during networking operations. +#[derive(Error, Debug)] +pub enum NetError { + /// An Error ocurred within std giving a [`io::Error`] + #[error(transparent)] + IoError(#[from] io::Error), + + /// The received bytes do not follow a [`Block`]'s byte format. + /// + /// See [`BlockStream`] for more information. + #[error("invalid bytes while parsing {location}")] + InvalidBytes { location: &'static str }, +} diff --git a/src/rust/lib_ccxr/src/util/net/mod.rs b/src/rust/lib_ccxr/src/util/net/mod.rs new file mode 100644 index 000000000..9d0c817d5 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/net/mod.rs @@ -0,0 +1,20 @@ +//! A module for sending and receiving subtitle data across the network. +//! +//! The [`SendTarget`] struct provides methods to send data to the network. It can be constructed +//! from [`SendTargetConfig`]. +//! +//! The [`RecvSource`] struct provides methods to receive data from the network. It can be +//! constructed from [`RecvSourceConfig`]. +//! +//! Any data to be sent across the network in the stored in the form of a [`Block`]. The +//! [`BlockStream`] can encode a [`Block`] as a byte sequence using a custom byte format which can +//! then be sent or received using standard networking primitives. See [`BlockStream`] to know more +//! about the custom byte format. + +mod common; +mod source; +mod target; + +pub use common::*; +pub use source::*; +pub use target::*; diff --git a/src/rust/lib_ccxr/src/util/net/source.rs b/src/rust/lib_ccxr/src/util/net/source.rs new file mode 100644 index 000000000..a268a3053 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/net/source.rs @@ -0,0 +1,309 @@ +use std::io; +use std::io::{Read, Write}; +use std::net::{ + IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, TcpListener, TcpStream, ToSocketAddrs, + UdpSocket, +}; + +use socket2::{Domain, Socket, Type}; + +use crate::util::log::{fatal, info, ExitCause}; +use crate::util::net::{Block, BlockStream, Command, NetError, DEFAULT_TCP_PORT, PING_INTERVAL}; +use crate::util::time::Timestamp; + +/// An enum of configuration parameters to construct [`RecvSource`]. +#[derive(Copy, Clone, Debug)] +pub enum RecvSourceConfig<'a> { + Tcp { + /// The port number where TCP socket will be bound. + /// + /// If no port number is provided then [`DEFAULT_TCP_PORT`] will be used instead. + port: Option, + + /// The password of receiving server. + /// + /// The password sent from client will be compared to this and only allow furthur + /// communication if the passwords match. + password: Option<&'a str>, + }, + Udp { + /// Source's IP address or hostname if trying to open a multicast SSM channel. + source: Option<&'a str>, + + /// The IP address or hostname where UDP socket will be bound. + address: Option<&'a str>, + + /// The port number where UDP socket will be bound. + port: u16, + }, +} + +enum SourceSocket { + Tcp(TcpStream), + Udp { + socket: UdpSocket, + source: Option, + address: Ipv4Addr, + }, +} + +/// A struct used for receiving subtitle data from the network. +/// +/// Even though it exposes methods from [`BlockStream`], it is recommended to not use them. +/// Instead use the methods provided directly by [`RecvSource`] like +/// [`RecvSource::recv_header_or_cc`]. +/// +/// To create a [`RecvSource`], one must first construct a [`RecvSourceConfig`]. +/// +/// ```no_run +/// # use lib_ccxr::util::net::{RecvSource, RecvSourceConfig}; +/// let config = RecvSourceConfig::Tcp { +/// port: None, +/// password: Some("12345678"), +/// }; +/// let mut recv_source = RecvSource::new(config); +/// +/// // Once recv_source is constructed, we can use it to receive data. +/// let block = recv_source.recv_header_or_cc().unwrap(); +/// ``` +pub struct RecvSource { + socket: SourceSocket, + last_ping: Timestamp, +} + +impl BlockStream for RecvSource { + fn send(&mut self, buf: &[u8]) -> io::Result { + match &mut self.socket { + SourceSocket::Tcp(stream) => stream.write(buf), + SourceSocket::Udp { socket, .. } => socket.send(buf), + } + } + + fn recv(&mut self, buf: &mut [u8]) -> io::Result { + match &mut self.socket { + SourceSocket::Tcp(stream) => stream.read(buf), + SourceSocket::Udp { + socket, + source, + address, + } => { + if cfg!(target = "windows") { + socket.recv(buf) + } else { + let should_check_source = address.is_multicast() && source.is_some(); + if should_check_source { + loop { + if let Ok((size, src_addr)) = socket.recv_from(buf) { + if src_addr.ip() == source.unwrap() { + return Ok(size); + } + } + } + } else { + socket.recv(buf) + } + } + } + } + } +} + +impl RecvSource { + /// Create a new [`RecvSource`] from the configuration parameters of [`RecvSourceConfig`]. + /// + /// Note that this method attempts to create a server. It does not return a [`Result`]. When + /// it is unable to start a server, it crashes instantly by calling [`fatal!`]. + /// + /// This method will continously block until a connection with a client is made. + pub fn new(config: RecvSourceConfig) -> RecvSource { + match config { + RecvSourceConfig::Tcp { port, password } => { + let port = port.unwrap_or(DEFAULT_TCP_PORT); + + info!( + "\n\r----------------------------------------------------------------------\n" + ); + info!("Binding to {}\n", port); + + let addresses = [ + SocketAddr::new(IpAddr::V6(Ipv6Addr::UNSPECIFIED), port), + SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), port), + ]; + + let listener = TcpListener::bind(addresses.as_slice()).unwrap_or_else( + |_| fatal!(cause = ExitCause::Failure; "Unable to start server\n"), + ); + + if let Some(pwd) = &password { + info!("Password: {}\n", pwd); + } + + info!("Waiting for connections\n"); + + loop { + if let Ok((socket, _)) = listener.accept() { + let mut source = RecvSource { + socket: SourceSocket::Tcp(socket), + last_ping: Timestamp::from_millis(0), + }; + if check_password(&mut source, password) { + return source; + } + } + } + } + RecvSourceConfig::Udp { + source, + address, + port, + } => { + let address = address + .map(|x| { + for s in (x, 0).to_socket_addrs().unwrap() { + if let SocketAddr::V4(sv4) = s { + return *sv4.ip(); + } + } + fatal!(cause = ExitCause::Failure; "Could not resolve udp address") + }) + .unwrap_or(Ipv4Addr::UNSPECIFIED); + + let source = source.map(|x| { + for s in (x, 0).to_socket_addrs().unwrap() { + if let SocketAddr::V4(sv4) = s { + return *sv4.ip(); + } + } + fatal!(cause = ExitCause::Failure; "Could not resolve udp source") + }); + + let socket = Socket::new(Domain::IPV4, Type::DGRAM, None).unwrap_or_else( + |_| fatal!(cause = ExitCause::Failure; "Socket creation error"), + ); + + if address.is_multicast() { + socket.set_reuse_address(true).unwrap_or_else(|_| { + info!("Cannot not set reuse address\n"); + }); + } + + let binding_address = if cfg!(target_os = "windows") && address.is_multicast() { + Ipv4Addr::UNSPECIFIED + } else { + address + }; + + socket + .bind(&SocketAddrV4::new(binding_address, port).into()) + .unwrap_or_else(|_| fatal!(cause = ExitCause::Bug; "Socket bind error")); + + if address.is_multicast() { + if let Some(src) = source { + socket.join_ssm_v4(&src, &address, &Ipv4Addr::UNSPECIFIED) + } else { + socket.join_multicast_v4(&address, &Ipv4Addr::UNSPECIFIED) + } + .unwrap_or_else( + |_| fatal!(cause = ExitCause::Bug; "Cannot join multicast group"), + ); + } + + info!( + "\n\r----------------------------------------------------------------------\n" + ); + if address == Ipv4Addr::UNSPECIFIED { + info!("\rReading from UDP socket {}\n", port); + } else if let Some(src) = source { + info!("\rReading from UDP socket {}@{}:{}\n", src, address, port); + } else { + info!("\rReading from UDP socket {}:{}\n", address, port); + } + + RecvSource { + socket: SourceSocket::Udp { + socket: socket.into(), + address, + source, + }, + last_ping: Timestamp::from_millis(0), + } + } + } + } + + /// Receive a [`BinHeader`] or [`BinData`] [`Block`]. + /// + /// Note that this method will continously block until it receives a + /// [`BinHeader`] or [`BinData`] [`Block`]. + /// + /// It returns a [`NetError`] if some transmission failure ocurred or byte format is violated. + /// It will return a [`None`] if the connection has shutdown down correctly. + /// + /// [`BinHeader`]: Command::BinHeader + /// [`BinData`]: Command::BinData + pub fn recv_header_or_cc<'a>(&mut self) -> Result>, NetError> { + let now = Timestamp::now(); + if self.last_ping.millis() == 0 { + self.last_ping = now; + } + + if now - self.last_ping > PING_INTERVAL { + self.last_ping = now; + if self.send_ping().is_err() { + fatal!(cause = ExitCause::Failure; "Unable to send keep-alive packet to client\n"); + } + } + + loop { + if let Some(block) = self.recv_block()? { + if block.command() == Command::BinHeader || block.command() == Command::BinData { + return Ok(Some(block)); + } + } else { + return Ok(None); + } + } + } + + /// Send a [`Ping`](Command::Ping) [`Block`]. + /// + /// It returns a [`NetError`] if some transmission failure ocurred, or else it will return a bool + /// that indicates the status of this connection. It will be `false` if the connection shutdown + /// correctly. + fn send_ping(&mut self) -> Result { + self.send_block(&Block::ping()) + } +} + +/// Check if the received password matches with the current password. +/// +/// This methods attempts to read a [`Password`](Command::Password) [`Block`] from `socket`. Any +/// form of error in this operation results in `false`. +/// +/// If `password` is [`None`], then no checking is done and results in `true`. +fn check_password(socket: &mut RecvSource, password: Option<&str>) -> bool { + let block = match socket.recv_block() { + Ok(Some(b)) => b, + _ => return false, + }; + + let pwd = match password { + Some(p) => p, + None => return true, + }; + + if block.command() == Command::Password && String::from_utf8_lossy(block.data()) == *pwd { + true + } else { + #[cfg(feature = "debug_out")] + { + eprintln!("[C] Wrong password"); + eprintln!("[S] PASSWORD"); + } + // TODO: Check if the below portion is really required, since the receiver is not even + // listening for a Password block + let _ = socket.send_block(&Block::password("")); + + false + } +} diff --git a/src/rust/lib_ccxr/src/util/net/target.rs b/src/rust/lib_ccxr/src/util/net/target.rs new file mode 100644 index 000000000..0181aff0c --- /dev/null +++ b/src/rust/lib_ccxr/src/util/net/target.rs @@ -0,0 +1,276 @@ +use crate::util::log::{fatal, info, ExitCause}; +use crate::util::net::{ + Block, BlockStream, Command, NetError, DEFAULT_TCP_PORT, NO_RESPONSE_INTERVAL, PING_INTERVAL, +}; +use crate::util::time::Timestamp; +use std::io; +use std::io::{Read, Write}; +use std::net::TcpStream; + +/// A struct of configuration parameters to construct [`SendTarget`]. +#[derive(Copy, Clone, Debug)] +pub struct SendTargetConfig<'a> { + /// Target's IP address or hostname. + pub target_addr: &'a str, + + /// Target's port number. + /// + /// If no port number is provided then [`DEFAULT_TCP_PORT`] will be used instead. + pub port: Option, + + /// Password to be sent after establishing connection. + pub password: Option<&'a str>, + + /// Description to sent after establishing connection. + pub description: Option<&'a str>, +} + +/// A struct used for sending subtitle data across the network. +/// +/// Even though it exposes methods from [`BlockStream`], it is recommended to not use them. +/// Instead use the methods provided directly by [`SendTarget`] like [`SendTarget::send_header`], +/// [`SendTarget::send_cc`], etc. +/// +/// To create a [`SendTarget`], one must first construct a [`SendTargetConfig`]. +/// +/// ```no_run +/// # use lib_ccxr::util::net::{SendTarget, SendTargetConfig}; +/// let config = SendTargetConfig { +/// target_addr: "192.168.60.133", +/// port: None, +/// password: Some("12345678"), +/// description: None, +/// }; +/// let mut send_target = SendTarget::new(config); +/// +/// // Once send_target is constructed, we can use it to send different kinds of data. +/// # let header = &[0u8; 1]; +/// send_target.send_header(header); +/// # let cc = &[0u8; 1]; +/// send_target.send_cc(cc); +/// ``` +pub struct SendTarget<'a> { + stream: Option, + config: SendTargetConfig<'a>, + header_data: Option>, + last_ping: Timestamp, + last_send_ping: Timestamp, +} + +impl BlockStream for SendTarget<'_> { + fn send(&mut self, buf: &[u8]) -> io::Result { + self.stream.as_mut().unwrap().write(buf) + } + + fn recv(&mut self, buf: &mut [u8]) -> io::Result { + self.stream.as_mut().unwrap().read(buf) + } +} + +impl<'a> SendTarget<'a> { + /// Create a new [`SendTarget`] from the configuration parameters of [`SendTargetConfig`]. + /// + /// Note that this method attempts to connect to a server. It does not return a [`Result`]. When + /// it is unable to connect to a server, it crashes instantly by calling [`fatal!`]. + pub fn new(config: SendTargetConfig<'a>) -> SendTarget<'a> { + let tcp_stream = TcpStream::connect(( + config.target_addr, + config.port.unwrap_or(DEFAULT_TCP_PORT), + )) + .unwrap_or_else( + |_| fatal!(cause = ExitCause::Failure; "Unable to connect (tcp connection error)."), + ); + + tcp_stream.set_nonblocking(true).unwrap_or_else( + |_| fatal!(cause = ExitCause::Failure; "Unable to connect (set nonblocking).\n"), + ); + + let mut send_target = SendTarget { + stream: Some(tcp_stream), + config, + header_data: None, + last_ping: Timestamp::from_millis(0), + last_send_ping: Timestamp::from_millis(0), + }; + + send_target.send_password().unwrap_or_else( + |_| fatal!(cause = ExitCause::Failure; "Unable to connect (sending password).\n"), + ); + + send_target.send_description().unwrap_or_else( + |_| fatal!(cause = ExitCause::Failure; "Unable to connect (sending cc_desc).\n"), + ); + + info!( + "Connected to {}\n", + send_target.stream.as_ref().unwrap().peer_addr().unwrap() + ); + + send_target + } + + /// Consumes the [`SendTarget`] only returning its internal stream. + fn into_stream(self) -> TcpStream { + self.stream.unwrap() + } + + /// Send a [`BinHeader`](Command::BinHeader) [`Block`] returning if the operation was successful. + pub fn send_header(&mut self, data: &[u8]) -> bool { + #[cfg(feature = "debug_out")] + { + eprintln!("Sending header (len = {}): ", data.len()); + eprintln!( + "File created by {:02X} version {:02X}{:02X}", + data[3], data[4], data[5] + ); + eprintln!("File format revision: {:02X}{:02X}", data[6], data[7]); + } + + if let Ok(true) = self.send_block(&Block::bin_header(data)) { + } else { + println!("Can't send BIN header"); + return false; + } + + if self.header_data.is_none() { + self.header_data = Some(data.into()) + } + + true + } + + /// Send a [`BinData`](Command::BinData) [`Block`] returning if the operation was successful. + pub fn send_cc(&mut self, data: &[u8]) -> bool { + #[cfg(feature = "debug_out")] + { + eprintln!("[C] Sending {} bytes", data.len()); + } + + if let Ok(true) = self.send_block(&Block::bin_data(data)) { + } else { + println!("Can't send BIN data"); + return false; + } + + true + } + + /// Send a [`EpgData`](Command::EpgData) [`Block`] returning if the operation was successful. + pub fn send_epg_data( + &mut self, + start: &str, + stop: &str, + title: Option<&str>, + desc: Option<&str>, + lang: Option<&str>, + category: Option<&str>, + ) -> bool { + let block = Block::epg_data(start, stop, title, desc, lang, category); + + #[cfg(feature = "debug_out")] + { + eprintln!("[C] Sending EPG: {} bytes", block.data().len()) + } + + if let Ok(true) = self.send_block(&block) { + } else { + eprintln!("Can't send EPG data"); + return false; + } + + true + } + + /// Send a [`Ping`](Command::Ping) [`Block`]. + /// + /// It returns a [`NetError`] if some transmission failure ocurred, or else it will return a bool + /// that indicates the status of this connection. It will be `false` if the connection shutdown + /// correctly. + fn send_ping(&mut self) -> Result { + self.send_block(&Block::ping()) + } + + /// Send a [`Password`](Command::Password) [`Block`]. + /// + /// It returns a [`NetError`] if some transmission failure ocurred, or else it will return a bool + /// that indicates the status of this connection. It will be `false` if the connection shutdown + /// correctly. + fn send_password(&mut self) -> Result { + let password = self.config.password.unwrap_or(""); + self.send_block(&Block::password(password)) + } + + /// Send a [`CcDesc`](Command::CcDesc) [`Block`]. + /// + /// It returns a [`NetError`] if some transmission failure ocurred, or else it will return a bool + /// that indicates the status of this connection. It will be `false` if the connection shutdown + /// correctly. + fn send_description(&mut self) -> Result { + let description = self.config.description.unwrap_or(""); + self.send_block(&Block::cc_desc(description)) + } + + /// Check the connection health and reset connection if necessary. + /// + /// This method determines the connection health by comparing the time since last [`Ping`] + /// [`Block`] was received with [`NO_RESPONSE_INTERVAL`]. If it exceeds the + /// [`NO_RESPONSE_INTERVAL`], the connection is reset. + /// + /// This method also sends timely [`Ping`] [`Block`]s back to the server based on the + /// [`PING_INTERVAL`]. This method will crash instantly with [`fatal!`] if it is unable to send + /// data. + /// + /// [`Ping`]: Command::Ping + pub fn check_connection(&mut self) { + let now = Timestamp::now(); + + if self.last_ping.millis() == 0 { + self.last_ping = now; + } + + loop { + if self + .recv_block() + .ok() + .flatten() + .map(|x| x.command() == Command::Ping) + .unwrap_or(false) + { + #[cfg(feature = "debug_out")] + { + eprintln!("[S] Received PING"); + } + self.last_ping = now; + } else { + break; + } + } + + if now - self.last_ping > NO_RESPONSE_INTERVAL { + eprintln!( + "[S] No PING received from the server in {} sec, reconnecting", + NO_RESPONSE_INTERVAL.seconds() + ); + + std::mem::drop(self.stream.take().unwrap()); + + self.stream = Some(SendTarget::new(self.config).into_stream()); + + // `self.header_data` is only temporarily taken, since it will be refilled inside + // `send_header` function. + if let Some(header_data) = self.header_data.take() { + self.send_header(header_data.as_slice()); + } + + self.last_ping = now; + } + + if now - self.last_send_ping >= PING_INTERVAL { + if self.send_ping().is_err() { + fatal!(cause = ExitCause::Failure; "Unable to send data\n"); + } + + self.last_send_ping = now; + } + } +} From b1e0cbea6d38a40f31cc8e852a8efb1765c7a534 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 13:37:57 +0530 Subject: [PATCH 08/13] add subtitle module --- src/rust/lib_ccxr/src/lib.rs | 1 + src/rust/lib_ccxr/src/subtitle.rs | 97 +++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 src/rust/lib_ccxr/src/subtitle.rs diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs index 45ee8e79c..74aad8998 100644 --- a/src/rust/lib_ccxr/src/lib.rs +++ b/src/rust/lib_ccxr/src/lib.rs @@ -1,2 +1,3 @@ pub mod common; +pub mod subtitle; pub mod util; diff --git a/src/rust/lib_ccxr/src/subtitle.rs b/src/rust/lib_ccxr/src/subtitle.rs new file mode 100644 index 000000000..4f442a87e --- /dev/null +++ b/src/rust/lib_ccxr/src/subtitle.rs @@ -0,0 +1,97 @@ +//! Provides types to represent different kinds of subtitle data in a unified format. +//! +//! NOTE: This module is incomplete and a lot of work is still left. + +use crate::common::Language; +use crate::util::encoding::EncodedString; +use crate::util::time::Timestamp; + +/// Represents the different formats in which subtitle data could be stored. +/// +/// NOTE: Heavy Work in Progress. +pub enum SubtitleData { + Dvb { + /* bitmap: Bitmap, */ + lang: Language, + is_eod: bool, + time_out: Timestamp, + }, + Dvd { + /* bitmap: Bitmap, */ + lang: Language, + }, + Xds(/* XdsScreen */), + Eia608(/* Eia608Screen */), + Text(EncodedString), + Raw(Vec), +} + +/// Represents a single subtitle instance on a screen with timing info. +pub struct Subtitle { + /// The subtitle data. + data: SubtitleData, + + /// The start time for this subtitle. + start_time: Timestamp, + + /// The end time of this subtitle. + end_time: Timestamp, + + /// A flag to tell that decoder has given output. + got_output: bool, + info: Option, + mode: String, +} + +impl Subtitle { + /// Create a new Text Subtitle. + pub fn new_text( + string: EncodedString, + start_time: Timestamp, + end_time: Timestamp, + info: Option, + mode: String, + ) -> Subtitle { + Subtitle { + data: SubtitleData::Text(string), + start_time, + end_time, + got_output: true, + info, + mode, + } + } + + /// Return a reference to the subtitle data. + pub fn data(&self) -> &SubtitleData { + &self.data + } + + /// Return the start time of this subtitle. + pub fn start_time(&self) -> Timestamp { + self.start_time + } + + /// Return the end time of this subtitle. + pub fn end_time(&self) -> Timestamp { + self.end_time + } + + /// Check if decoder has given output. + pub fn got_output(&self) -> bool { + self.got_output + } + + /// Update the state if decoder has given output. + pub fn set_got_output(&mut self, val: bool) { + self.got_output = val; + } + + pub fn info(&self) -> Option<&str> { + self.info.as_deref() + } + + pub fn mode(&self) -> &str { + &self.mode + } +} From 5eca25ca2d1f1e86eb29c56b55211567be8f684a Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 14:08:54 +0530 Subject: [PATCH 09/13] add teletext module --- src/rust/lib_ccxr/src/lib.rs | 1 + src/rust/lib_ccxr/src/teletext.rs | 1575 +++++++++++++++++++++++++++++ 2 files changed, 1576 insertions(+) create mode 100644 src/rust/lib_ccxr/src/teletext.rs diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs index 74aad8998..35d5f87a2 100644 --- a/src/rust/lib_ccxr/src/lib.rs +++ b/src/rust/lib_ccxr/src/lib.rs @@ -1,3 +1,4 @@ pub mod common; pub mod subtitle; +pub mod teletext; pub mod util; diff --git a/src/rust/lib_ccxr/src/teletext.rs b/src/rust/lib_ccxr/src/teletext.rs new file mode 100644 index 000000000..04d4b7096 --- /dev/null +++ b/src/rust/lib_ccxr/src/teletext.rs @@ -0,0 +1,1575 @@ +//! Provides types to extract subtitles from Teletext streams. + +use num_enum::{IntoPrimitive, TryFromPrimitive}; +use std::cell::Cell; +use std::fmt; +use std::fmt::Write; +use std::sync::RwLock; + +use crate::common::OutputFormat; +use crate::subtitle::Subtitle; +use crate::util::encoding::{Ucs2Char, Ucs2String}; +use crate::util::log::{debug, info, logger, DebugMessageFlag}; +use crate::util::time::{Timestamp, TimestampFormat}; +use crate::util::{decode_hamming_24_18, decode_hamming_8_4, fuzzy_cmp, parity}; + +/// UTC referential value. +/// +/// It has different meanings based on its value: +/// - `u64::MAX` means don't use UNIX +/// - 0 means use current system time as reference +/// - +1 means use a specific reference +pub static UTC_REFVALUE: RwLock = RwLock::new(u64::MAX); + +const MAX_TLT_PAGES: usize = 1000; + +const TELETEXT_COLORS: [&str; 8] = [ + "#000000", // black + "#ff0000", // red + "#00ff00", // green + "#ffff00", // yellow + "#0000ff", // blue + "#ff00ff", // magenta + "#00ffff", // cyan + "#ffffff", // white +]; + +const LATIN_TO_RUSSIAN: [(Ucs2Char, char); 63] = [ + (65, 'А'), + (66, 'Б'), + (87, 'В'), + (71, 'Г'), + (68, 'Д'), + (69, 'Е'), + (86, 'Ж'), + (90, 'З'), + (73, 'И'), + (74, 'Й'), + (75, 'К'), + (76, 'Л'), + (77, 'М'), + (78, 'Н'), + (79, 'О'), + (80, 'П'), + (82, 'Р'), + (83, 'С'), + (84, 'Т'), + (85, 'У'), + (70, 'Ф'), + (72, 'Х'), + (67, 'Ц'), + (238, 'Ч'), + (235, 'Ш'), + (249, 'Щ'), + (35, 'Ы'), + (88, 'Ь'), + (234, 'Э'), + (224, 'Ю'), + (81, 'Я'), + (97, 'а'), + (98, 'б'), + (119, 'в'), + (103, 'г'), + (100, 'д'), + (101, 'е'), + (118, 'ж'), + (122, 'з'), + (105, 'и'), + (106, 'й'), + (107, 'к'), + (108, 'л'), + (109, 'м'), + (110, 'н'), + (111, 'о'), + (112, 'п'), + (114, 'р'), + (115, 'с'), + (116, 'т'), + (117, 'у'), + (102, 'ф'), + (104, 'х'), + (99, 'ц'), + (231, 'ч'), + (226, 'ш'), + (251, 'щ'), + (121, 'ъ'), + (38, 'ы'), + (120, 'ь'), + (244, 'э'), + (232, 'ю'), + (113, 'я'), +]; + +const ENTITIES: [(u8, &str); 3] = [(b'<', "<"), (b'>', ">"), (b'&', "&")]; + +/// Represents a Teletext Packet. +pub struct TeletextPacketPayload { + _clock_in: u8, // clock run in + _framing_code: u8, // framing code, not needed, ETSI 300 706: const 0xe4 + address: [u8; 2], + data: [u8; 40], +} + +/// Represents the possible kinds of G0 character set. +#[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] +#[repr(u8)] +pub enum G0CharsetType { + Latin = 0, + Cyrillic1 = 1, + Cyrillic2 = 2, + Cyrillic3 = 3, + Greek = 4, + Arabic = 5, + Hebrew = 6, +} + +impl G0CharsetType { + /// Create a [`G0CharsetType`] from the triple from a Teletext triplet. + pub fn from_triplet(value: u32) -> G0CharsetType { + // ETS 300 706, Table 32 + if (value & 0x3c00) == 0x1000 { + match value & 0x0380 { + 0x0000 => G0CharsetType::Cyrillic1, + 0x0200 => G0CharsetType::Cyrillic2, + 0x0280 => G0CharsetType::Cyrillic3, + _ => G0CharsetType::Latin, + } + } else { + G0CharsetType::Latin + } + } +} + +/// Represents the bitcode representation of a [`G0LatinNationalSubset`]. +/// +/// It can be easily contructed from a [`u8`]. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct G0LatinNationalSubsetId(u8); + +impl From for G0LatinNationalSubsetId { + fn from(value: u8) -> G0LatinNationalSubsetId { + G0LatinNationalSubsetId(value) + } +} + +impl fmt::Display for G0LatinNationalSubsetId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "0x{:1x}.{:1x}", self.0 >> 3, self.0 & 0x07) + } +} + +/// Represents the possible kinds of National Option Subset for G0 Latin character set. +#[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] +#[repr(u8)] +pub enum G0LatinNationalSubset { + English = 0x0, + French = 0x1, + SwedishFinnishHungarian = 0x2, + CzechSlovak = 0x3, + German = 0x4, + PortugueseSpanish = 0x5, + Italian = 0x6, + Rumanian = 0x7, + Polish = 0x8, + Turkish = 0x9, + SerbianCroatianSlovenian = 0xa, + Estonian = 0xb, + LettishLithuanian = 0xc, +} + +impl fmt::Display for G0LatinNationalSubset { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match self { + G0LatinNationalSubset::English => "English", + G0LatinNationalSubset::French => "French", + G0LatinNationalSubset::SwedishFinnishHungarian => "Swedish, Finnish, Hungarian", + G0LatinNationalSubset::CzechSlovak => "Czech, Slovak", + G0LatinNationalSubset::German => "German", + G0LatinNationalSubset::PortugueseSpanish => "Portuguese, Spanish", + G0LatinNationalSubset::Italian => "Italian", + G0LatinNationalSubset::Rumanian => "Rumanian", + G0LatinNationalSubset::Polish => "Polish", + G0LatinNationalSubset::Turkish => "Turkish", + G0LatinNationalSubset::SerbianCroatianSlovenian => "Serbian, Croatian, Slovenian", + G0LatinNationalSubset::Estonian => "Estonian", + G0LatinNationalSubset::LettishLithuanian => "Lettish, Lithuanian", + } + ) + } +} + +impl G0LatinNationalSubset { + // array positions where chars from G0_LATIN_NATIONAL_SUBSETS are injected into G0[LATIN] + const G0_LATIN_NATIONAL_SUBSETS_POSITIONS: [usize; 13] = [ + 0x03, 0x04, 0x20, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x5b, 0x5c, 0x5d, 0x5e, + ]; + + // ETS 300 706, chapter 15.2, table 32: Function of Default G0 and G2 Character Set Designation + // and National Option Selection bits in packets X/28/0 Format 1, X/28/4, M/29/0 and M/29/4 + + // Latin National Option Sub-sets + const G0_LATIN_NATIONAL_SUBSETS: [[Ucs2Char; 13]; 13] = [ + // English + [ + 0x00a3, 0x0024, 0x0040, 0x00ab, 0x00bd, 0x00bb, 0x005e, 0x0023, 0x002d, 0x00bc, 0x00a6, + 0x00be, 0x00f7, + ], + // French + [ + 0x00e9, 0x00ef, 0x00e0, 0x00eb, 0x00ea, 0x00f9, 0x00ee, 0x0023, 0x00e8, 0x00e2, 0x00f4, + 0x00fb, 0x00e7, + ], + // Swedish, Finnish, Hungarian + [ + 0x0023, 0x00a4, 0x00c9, 0x00c4, 0x00d6, 0x00c5, 0x00dc, 0x005f, 0x00e9, 0x00e4, 0x00f6, + 0x00e5, 0x00fc, + ], + // Czech, Slovak + [ + 0x0023, 0x016f, 0x010d, 0x0165, 0x017e, 0x00fd, 0x00ed, 0x0159, 0x00e9, 0x00e1, 0x011b, + 0x00fa, 0x0161, + ], + // German + [ + 0x0023, 0x0024, 0x00a7, 0x00c4, 0x00d6, 0x00dc, 0x005e, 0x005f, 0x00b0, 0x00e4, 0x00f6, + 0x00fc, 0x00df, + ], + // Portuguese, Spanish + [ + 0x00e7, 0x0024, 0x00a1, 0x00e1, 0x00e9, 0x00ed, 0x00f3, 0x00fa, 0x00bf, 0x00fc, 0x00f1, + 0x00e8, 0x00e0, + ], + // Italian + [ + 0x00a3, 0x0024, 0x00e9, 0x00b0, 0x00e7, 0x00bb, 0x005e, 0x0023, 0x00f9, 0x00e0, 0x00f2, + 0x00e8, 0x00ec, + ], + // Rumanian + [ + 0x0023, 0x00a4, 0x0162, 0x00c2, 0x015e, 0x0102, 0x00ce, 0x0131, 0x0163, 0x00e2, 0x015f, + 0x0103, 0x00ee, + ], + // Polish + [ + 0x0023, 0x0144, 0x0105, 0x017b, 0x015a, 0x0141, 0x0107, 0x00f3, 0x0119, 0x017c, 0x015b, + 0x0142, 0x017a, + ], + // Turkish + [ + 0x0054, 0x011f, 0x0130, 0x015e, 0x00d6, 0x00c7, 0x00dc, 0x011e, 0x0131, 0x015f, 0x00f6, + 0x00e7, 0x00fc, + ], + // Serbian, Croatian, Slovenian + [ + 0x0023, 0x00cb, 0x010c, 0x0106, 0x017d, 0x0110, 0x0160, 0x00eb, 0x010d, 0x0107, 0x017e, + 0x0111, 0x0161, + ], + // Estonian + [ + 0x0023, 0x00f5, 0x0160, 0x00c4, 0x00d6, 0x017e, 0x00dc, 0x00d5, 0x0161, 0x00e4, 0x00f6, + 0x017e, 0x00fc, + ], + // Lettish, Lithuanian + [ + 0x0023, 0x0024, 0x0160, 0x0117, 0x0119, 0x017d, 0x010d, 0x016b, 0x0161, 0x0105, 0x0173, + 0x017e, 0x012f, + ], + ]; + + // References to the G0_LATIN_NATIONAL_SUBSETS array + const G0_LATIN_NATIONAL_SUBSETS_MAP: [u8; 56] = [ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x01, 0x02, 0x03, 0x04, 0xff, 0x06, + 0xff, 0x00, 0x01, 0x02, 0x09, 0x04, 0x05, 0x06, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, + 0xff, 0x07, 0xff, 0xff, 0x0b, 0x03, 0x04, 0xff, 0x0c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x09, 0xff, 0xff, 0xff, 0xff, + ]; + + /// Create a [`G0LatinNationalSubset`] from its bitcode representation stored as a [`G0LatinNationalSubsetId`]. + pub fn from_subset_id(c: G0LatinNationalSubsetId) -> Option { + let p = *Self::G0_LATIN_NATIONAL_SUBSETS_MAP.get(c.0 as usize)?; + if p == 0xff { + None + } else { + Some(p.try_into().ok()?) + } + } + + /// Return an Iterator containing the position of replacement and the character to replace when + /// changing the National Option Subset for G0 Latin character set. + fn replacement_pos_and_char(&self) -> impl Iterator { + let lang_index: u8 = (*self).into(); + Self::G0_LATIN_NATIONAL_SUBSETS_POSITIONS + .into_iter() + .zip(Self::G0_LATIN_NATIONAL_SUBSETS[lang_index as usize].into_iter()) + } +} + +fn map_latin_to_russian(latin_char: Ucs2Char) -> Option { + LATIN_TO_RUSSIAN + .iter() + .find(|&&(latin, _)| latin == latin_char) + .map(|&(_, russian)| russian) +} + +fn map_entities(c: Ucs2Char) -> Option<&'static str> { + let c: u8 = if c >= 0x80 { + return None; + } else { + c as u8 + }; + match ENTITIES.iter().find(|&&(symbol, _)| symbol == c) { + Some(&(_, entity)) => Some(entity), + None => None, + } +} + +/// A collective type to manage the entire G0 character set. +/// +/// This type is used to change the G0 charecter set and its Latin National Option Subset. This +/// type also manages the subset priority between M/29 and X/28 packets. +pub struct G0Charset { + g0_charset: Box<[[Ucs2Char; 96]; 5]>, + charset_type: G0CharsetType, + primary_charset_current: G0LatinNationalSubsetId, + primary_charset_g0_m29: Option, + primary_charset_g0_x28: Option, + verbose_debug: bool, +} + +impl G0Charset { + fn new(verbose_debug: bool) -> G0Charset { + let charset = Box::new([ + [ + // Latin G0 Primary Set + 0x0020, 0x0021, 0x0022, 0x00a3, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, + 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x00ab, + 0x00bd, 0x00bb, 0x005e, 0x0023, 0x002d, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, + 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, + 0x007a, 0x00bc, 0x00a6, 0x00be, 0x00f7, 0x007f, + ], + [ + // Cyrillic G0 Primary Set - Option 1 - Serbian/Croatian + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x044b, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x3200, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x0427, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0408, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x040c, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0412, 0x0403, 0x0409, 0x040a, 0x0417, 0x040b, + 0x0416, 0x0402, 0x0428, 0x040f, 0x0447, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, + 0x0444, 0x0433, 0x0445, 0x0438, 0x0428, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, + 0x043f, 0x042c, 0x0440, 0x0441, 0x0442, 0x0443, 0x0432, 0x0423, 0x0429, 0x042a, + 0x0437, 0x042b, 0x0436, 0x0422, 0x0448, 0x042f, + ], + [ + // Cyrillic G0 Primary Set - Option 2 - Russian/Bulgarian + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x044b, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x042f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042c, 0x042a, 0x0417, 0x0428, + 0x042d, 0x0429, 0x0427, 0x042b, 0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, + 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, + 0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044c, 0x044a, + 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044b, + ], + [ + // Cyrillic G0 Primary Set - Option 3 - Ukrainian + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x00ef, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x042f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042c, 0x0049, 0x0417, 0x0428, + 0x042d, 0x0429, 0x0427, 0x00cf, 0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, + 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, + 0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044c, 0x0069, + 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x00ff, + ], + [ + // Greek G0 Primary Set + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, 0x03a1, + 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, + 0x03ac, 0x03ad, 0x03ae, 0x03af, 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, + 0x03b6, 0x03b7, 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, + 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, + 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x03cf, + ], + ]); + + G0Charset { + g0_charset: charset, + charset_type: G0CharsetType::Latin, + primary_charset_current: G0LatinNationalSubsetId(0), + primary_charset_g0_m29: None, + primary_charset_g0_x28: None, + verbose_debug, + } + } + + /// Return the equivalent UCS-2 character for the given teletext character based on the current + /// character set. + pub fn ucs2_char(&self, telx_char: u8) -> Ucs2Char { + if parity(telx_char) { + debug!(msg_type = DebugMessageFlag::TELETEXT; "- Unrecoverable data error; PARITY({:02x})\n", telx_char); + return 0x20; + } + + let r: Ucs2Char = (telx_char & 0x7f).into(); + if r >= 0x20 { + self.g0_charset[self.charset_type as usize][r as usize - 0x20] + } else { + r + } + } + + /// Change the G0 character set. + pub fn set_charset(&mut self, charset: G0CharsetType) { + self.charset_type = charset; + } + + /// Set the G0 Latin National Option Subset for M/29 packets. + /// + /// It will change the mapping only if a Subset for X/28 is not set since X/28 has a higher + /// priority than M/29. This method will do nothing if the G0 charset is not + /// [`G0CharsetType::Latin`]. + pub fn set_g0_m29_latin_subset(&mut self, subset: G0LatinNationalSubsetId) { + if self.charset_type == G0CharsetType::Latin { + self.primary_charset_g0_m29 = Some(subset); + if self.primary_charset_g0_x28.is_none() { + self.remap_g0_charset(subset); + } + } + } + + /// Set the G0 Latin National Option Subset for X/28 packets. + /// + /// This method will do nothing if the G0 charset is not [`G0CharsetType::Latin`]. + pub fn set_g0_x28_latin_subset(&mut self, subset: G0LatinNationalSubsetId) { + if self.charset_type == G0CharsetType::Latin { + self.primary_charset_g0_x28 = Some(subset); + self.remap_g0_charset(subset); + } + } + + /// Remove the G0 Latin National Option Subset for X/28 packets. + /// + /// It will change the mapping back to the one set for M/29. If the subset for M/29 is not set + /// then `extra_subset` will be used in place of it. This method will do nothing if the G0 + /// charset is not [`G0CharsetType::Latin`]. + pub fn remove_g0_x28_latin_subset(&mut self, extra_subset: G0LatinNationalSubsetId) { + if self.charset_type == G0CharsetType::Latin { + self.primary_charset_g0_x28 = None; + let subset = self.primary_charset_g0_m29.unwrap_or(extra_subset); + self.remap_g0_charset(subset); + } + } + + /// Replace the characters in `g0_charset` based on the given G0 National Option Subset in + /// `subset`. + fn remap_g0_charset(&mut self, subset: G0LatinNationalSubsetId) { + if self.primary_charset_current != subset { + if let Some(s) = G0LatinNationalSubset::from_subset_id(subset) { + for (pos, ch) in s.replacement_pos_and_char() { + self.g0_charset[0x00][pos] = ch; + } + if self.verbose_debug { + eprintln!("- Using G0 Latin National Subset ID {} ({})", subset, s); + } + self.primary_charset_current = subset; + } else { + eprintln!( + "- G0 Latin National Subset ID {} is not implemented", + subset + ); + } + } + } +} + +/// A collective type to manage the entire G0 character set. +pub struct G2Charset; + +impl G2Charset { + const G2_CHARSET: [[Ucs2Char; 96]; 1] = [ + [ + // Latin G2 Supplementary Set + 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x0024, 0x00a5, 0x0023, 0x00a7, 0x00a4, 0x2018, 0x201c, + 0x00ab, 0x2190, 0x2191, 0x2192, 0x2193, 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00d7, 0x00b5, + 0x00b6, 0x00b7, 0x00f7, 0x2019, 0x201d, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x0020, + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0306, 0x0307, 0x0308, 0x0000, 0x030a, 0x0327, + 0x005f, 0x030b, 0x0328, 0x030c, 0x2015, 0x00b9, 0x00ae, 0x00a9, 0x2122, 0x266a, 0x20ac, + 0x2030, 0x03B1, 0x0000, 0x0000, 0x0000, 0x215b, 0x215c, 0x215d, 0x215e, 0x03a9, 0x00c6, + 0x0110, 0x00aa, 0x0126, 0x0000, 0x0132, 0x013f, 0x0141, 0x00d8, 0x0152, 0x00ba, 0x00de, + 0x0166, 0x014a, 0x0149, 0x0138, 0x00e6, 0x0111, 0x00f0, 0x0127, 0x0131, 0x0133, 0x0140, + 0x0142, 0x00f8, 0x0153, 0x00df, 0x00fe, 0x0167, 0x014b, 0x0020, + ], + // [ // Cyrillic G2 Supplementary Set + // ], + // [ // Greek G2 Supplementary Set + // ], + // [ // Arabic G2 Supplementary Set + // ] + ]; + + const G2_ACCENTS: [[Ucs2Char; 52]; 15] = [ + // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z + [ + // grave + 0x00c0, 0x0000, 0x0000, 0x0000, 0x00c8, 0x0000, 0x0000, 0x0000, 0x00cc, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x00d2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d9, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x00e0, 0x0000, 0x0000, 0x0000, 0x00e8, 0x0000, 0x0000, + 0x0000, 0x00ec, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f2, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x00f9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // acute + 0x00c1, 0x0000, 0x0106, 0x0000, 0x00c9, 0x0000, 0x0000, 0x0000, 0x00cd, 0x0000, 0x0000, + 0x0139, 0x0000, 0x0143, 0x00d3, 0x0000, 0x0000, 0x0154, 0x015a, 0x0000, 0x00da, 0x0000, + 0x0000, 0x0000, 0x00dd, 0x0179, 0x00e1, 0x0000, 0x0107, 0x0000, 0x00e9, 0x0000, 0x0123, + 0x0000, 0x00ed, 0x0000, 0x0000, 0x013a, 0x0000, 0x0144, 0x00f3, 0x0000, 0x0000, 0x0155, + 0x015b, 0x0000, 0x00fa, 0x0000, 0x0000, 0x0000, 0x00fd, 0x017a, + ], + [ + // circumflex + 0x00c2, 0x0000, 0x0108, 0x0000, 0x00ca, 0x0000, 0x011c, 0x0124, 0x00ce, 0x0134, 0x0000, + 0x0000, 0x0000, 0x0000, 0x00d4, 0x0000, 0x0000, 0x0000, 0x015c, 0x0000, 0x00db, 0x0000, + 0x0174, 0x0000, 0x0176, 0x0000, 0x00e2, 0x0000, 0x0109, 0x0000, 0x00ea, 0x0000, 0x011d, + 0x0125, 0x00ee, 0x0135, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f4, 0x0000, 0x0000, 0x0000, + 0x015d, 0x0000, 0x00fb, 0x0000, 0x0175, 0x0000, 0x0177, 0x0000, + ], + [ + // tilde + 0x00c3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0128, 0x0000, 0x0000, + 0x0000, 0x0000, 0x00d1, 0x00d5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0168, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x00e3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0129, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f1, 0x00f5, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0169, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // macron + 0x0100, 0x0000, 0x0000, 0x0000, 0x0112, 0x0000, 0x0000, 0x0000, 0x012a, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x014c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016a, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0101, 0x0000, 0x0000, 0x0000, 0x0113, 0x0000, 0x0000, + 0x0000, 0x012b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x014d, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x016b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // breve + 0x0102, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x011e, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016c, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0103, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x011f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x016d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // dot + 0x0000, 0x0000, 0x010a, 0x0000, 0x0116, 0x0000, 0x0120, 0x0000, 0x0130, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x017b, 0x0000, 0x0000, 0x010b, 0x0000, 0x0117, 0x0000, 0x0121, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x017c, + ], + [ + // umlaut + 0x00c4, 0x0000, 0x0000, 0x0000, 0x00cb, 0x0000, 0x0000, 0x0000, 0x00cf, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x00d6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00dc, 0x0000, + 0x0000, 0x0000, 0x0178, 0x0000, 0x00e4, 0x0000, 0x0000, 0x0000, 0x00eb, 0x0000, 0x0000, + 0x0000, 0x00ef, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f6, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x00fc, 0x0000, 0x0000, 0x0000, 0x00ff, 0x0000, + ], + [ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // ring + 0x00c5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016e, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x00e5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x016f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // cedilla + 0x0000, 0x0000, 0x00c7, 0x0000, 0x0000, 0x0000, 0x0122, 0x0000, 0x0000, 0x0000, 0x0136, + 0x013b, 0x0000, 0x0145, 0x0000, 0x0000, 0x0000, 0x0156, 0x015e, 0x0162, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00e7, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0137, 0x013c, 0x0000, 0x0146, 0x0000, 0x0000, 0x0000, 0x0157, + 0x015f, 0x0163, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // double acute + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0150, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0170, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0151, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0171, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // ogonek + 0x0104, 0x0000, 0x0000, 0x0000, 0x0118, 0x0000, 0x0000, 0x0000, 0x012e, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0172, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0105, 0x0000, 0x0000, 0x0000, 0x0119, 0x0000, 0x0000, + 0x0000, 0x012f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0173, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // caron + 0x0000, 0x0000, 0x010c, 0x010e, 0x011a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x013d, 0x0000, 0x0147, 0x0000, 0x0000, 0x0000, 0x0158, 0x0160, 0x0164, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x017d, 0x0000, 0x0000, 0x010d, 0x010f, 0x011b, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x013e, 0x0000, 0x0148, 0x0000, 0x0000, 0x0000, 0x0159, + 0x0161, 0x0165, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x017e, + ], + ]; +} + +/// Represents a Teletext Page Number in its bitcode representation. +/// +/// It can be easily contructed from a [`u16`]. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct TeletextPageNumber(u16); + +impl From for TeletextPageNumber { + fn from(value: u16) -> TeletextPageNumber { + TeletextPageNumber(value) + } +} + +impl fmt::Display for TeletextPageNumber { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:03x}", self.0) + } +} + +impl TeletextPageNumber { + /// Return the magazine and packet bits. + pub fn magazine(&self) -> u8 { + ((self.0 >> 8) & 0x0f) as u8 + } + + /// Return the page bits. + pub fn page(&self) -> u8 { + (self.0 & 0xff) as u8 + } + + /// Return the page number after converting the page bits in bcd format to normal integer. + pub fn bcd_page_to_u16(&self) -> u16 { + ((self.0 & 0xf00) >> 8) * 100 + ((self.0 & 0xf0) >> 4) * 10 + (self.0 & 0xf) + } +} + +/// Represents a teletext page along with timing information. +pub struct TeletextPage { + show_timestamp: Timestamp, // show at timestamp (in ms) + hide_timestamp: Timestamp, // hide at timestamp (in ms) + text: [[Ucs2Char; 40]; 25], // 25 lines x 40 cols (1 screen/page) of wide chars + g2_char_present: [[bool; 40]; 25], // false-Supplementary G2 character set NOT used at this position, true-Supplementary G2 character set used at this position + tainted: bool, // true = text variable contains any data +} + +/// Settings required to contruct a [`TeletextContext`]. +#[allow(dead_code)] +pub struct TeletextConfig { + /// should telxcc logging be verbose? + verbose: bool, + /// teletext page containing cc we want to filter + page: Cell, + /// Page selected by user, which MIGHT be different to `page` depending on autodetection stuff + user_page: u16, + /// false = Don't attempt to correct errors + dolevdist: bool, + /// Means 2 fails or less is "the same" + levdistmincnt: u8, + /// Means 10% or less is also "the same" + levdistmaxpct: u8, + /// Segment we actually process + extraction_start: Option, + /// Segment we actually process + extraction_end: Option, + write_format: OutputFormat, + date_format: TimestampFormat, + /// Do NOT set time automatically? + noautotimeref: bool, + nofontcolor: bool, + nohtmlescape: bool, + latrusmap: bool, +} + +/// Represents the possible states that [`TeletextContext`] can be in. +struct TeletextState { + programme_info_processed: bool, + pts_initialized: bool, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum TransmissionMode { + Parallel, + Serial, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum DataUnit { + EbuTeletextNonsubtitle = 0x02, + EbuTeletextSubtitle = 0x03, + EbuTeletextInverted = 0x0c, + Vps = 0xc3, + ClosedCaptions = 0xc5, +} + +/// A type used for decoding Teletext subtitles. +#[allow(dead_code)] +pub struct TeletextContext<'a> { + config: &'a TeletextConfig, + seen_sub_page: [bool; MAX_TLT_PAGES], + global_timestamp: Timestamp, + + // Current and previous page buffers. This is the output written to file when + // the time comes. + page_buffer: TeletextPage, + page_buffer_prev: Option, + page_buffer_cur: Option, + // Current and previous page compare strings. This is plain text (no colors, + // tags, etc) in UCS2 (fixed length), so we can compare easily. + ucs2_buffer_prev: Option, + ucs2_buffer_cur: Option, + // Buffer timestamp + prev_hide_timestamp: Timestamp, + prev_show_timestamp: Timestamp, + // subtitle type pages bitmap, 2048 bits = 2048 possible pages in teletext (excl. subpages) + cc_map: [u8; 256], + // last timestamp computed + last_timestamp: Timestamp, + states: TeletextState, + // FYI, packet counter + tlt_packet_counter: u32, + // teletext transmission mode + transmission_mode: TransmissionMode, + // flag indicating if incoming data should be processed or ignored + receiving_data: bool, + + using_pts: Option, + delta: Timestamp, + t0: Timestamp, + + sentence_cap: bool, //Set to 1 if -sc is passed + new_sentence: bool, + + g0_charset: G0Charset, + + de_ctr: i32, // a keeps count of packets with flag subtitle ON and data packets +} + +impl<'a> TeletextContext<'a> { + /// Create a new [`TeletextContext`] from parameters in [`TeletextConfig`]. + pub fn new(config: &'a TeletextConfig) -> TeletextContext<'a> { + TeletextContext { + config, + seen_sub_page: [false; MAX_TLT_PAGES], + global_timestamp: Timestamp::from_millis(0), + page_buffer: TeletextPage { + show_timestamp: Timestamp::from_millis(0), + hide_timestamp: Timestamp::from_millis(0), + text: [[0; 40]; 25], + g2_char_present: [[false; 40]; 25], + tainted: false, + }, + page_buffer_prev: None, + page_buffer_cur: None, + ucs2_buffer_prev: None, + ucs2_buffer_cur: None, + prev_hide_timestamp: Timestamp::from_millis(0), + prev_show_timestamp: Timestamp::from_millis(0), + cc_map: [0; 256], + last_timestamp: Timestamp::from_millis(0), + states: TeletextState { + programme_info_processed: false, + pts_initialized: false, + }, + tlt_packet_counter: 0, + transmission_mode: TransmissionMode::Serial, + receiving_data: false, + using_pts: None, + delta: Timestamp::from_millis(0), + t0: Timestamp::from_millis(0), + sentence_cap: false, + new_sentence: false, + g0_charset: G0Charset::new(config.verbose), + de_ctr: 0, + } + } + + /// Fix the case for the sentences stored in `page_buffer_cur`. + /// + /// This method will convert the first character of a sentence to uppercase and the rest of the + /// characters to lowercase. + fn telx_case_fix(&mut self) { + let page_buffer_cur = match self.page_buffer_cur.as_mut() { + None => return, + Some(p) => p, + }; + + let mut fixed_string = String::with_capacity(page_buffer_cur.len()); + + let mut prev_newline = false; + + fixed_string.extend(page_buffer_cur.chars().enumerate().map(|(index, c)| { + let r = match c { + ' ' | '-' => c, // case 0x89: // This is a transparent space + '.' | '?' | '!' | ':' => { + self.new_sentence = true; + c + } + _ => { + let result = if self.new_sentence && index != 0 && !prev_newline { + c.to_ascii_uppercase() + } else if !self.new_sentence && index != 0 && !prev_newline { + c.to_ascii_lowercase() + } else { + c + }; + + self.new_sentence = false; + result + } + }; + + prev_newline = c == '\n'; + r + })); + + *page_buffer_cur = fixed_string; + + todo!() // TODO: telx_correct_case(page_buffer_cur); + } + + /// Reset the page buffers and return its contents in the form of a [`Subtitle`]. + /// + /// It moves `page_buffer_cur` to `page_buffer_prev` and `ucs2_buffer_cur` to + /// `ucs2_buffer_prev`. + fn telxcc_dump_prev_page(&mut self) -> Option { + let page_buffer_prev = self.page_buffer_prev.take()?; + + self.page_buffer_prev = self.page_buffer_cur.take(); + self.ucs2_buffer_prev = self.ucs2_buffer_cur.take(); + + Some(Subtitle::new_text( + page_buffer_prev.into(), + self.prev_show_timestamp, + self.prev_hide_timestamp, + Some(format!("{:03}", self.config.page.get().bcd_page_to_u16())), + "TLT".into(), + )) + } + + fn process_page(&mut self) -> Option { + let mut ans = None; + + if self + .config + .extraction_start + .map(|start| self.page_buffer.hide_timestamp < start) + .unwrap_or(false) + || self + .config + .extraction_end + .map(|end| self.page_buffer.show_timestamp > end) + .unwrap_or(false) + || self.page_buffer.hide_timestamp.millis() == 0 + { + return None; + } + + #[cfg(feature = "debug")] + { + for (index, row) in self.page_buffer.text.iter().enumerate().skip(1) { + print!("DEUBG[{:02}]: ", index); + for c in row { + print!("{:3x} ", c) + } + println!(); + } + println!(); + } + + // optimization: slicing column by column -- higher probability we could find boxed area start mark sooner + let mut page_is_empty = true; + for col in 0..40 { + for row in 1..25 { + if self.page_buffer.text[row][col] == 0x0b { + page_is_empty = false; + break; + } + } + + if !page_is_empty { + break; + } + } + + if page_is_empty { + return None; + } + + if self.page_buffer.show_timestamp > self.page_buffer.hide_timestamp { + self.page_buffer.hide_timestamp = self.page_buffer.show_timestamp; + } + + let mut line_count: u8 = 0; + let mut time_reported = false; + let timecode_show = self + .page_buffer + .show_timestamp + .to_srt_time() + .expect("could not format to SRT time"); + let timecode_hide = self + .page_buffer + .hide_timestamp + .to_srt_time() + .expect("could not format to SRT time"); + + // process data + for row in 1..25 { + let mut col_start: usize = 40; + let col_stop: usize = 40; + + let mut box_open: bool = false; + for col in 0..40 { + // replace all 0/B and 0/A characters with 0/20, as specified in ETS 300 706: + // Unless operating in "Hold Mosaics" mode, each character space occupied by a + // spacing attribute is displayed as a SPACE + if self.page_buffer.text[row][col] == 0x0b { + // open the box + if col_start == 40 { + col_start = col; + line_count += 1; + } else { + self.page_buffer.text[row][col] = 0x20; + } + box_open = true; + } else if self.page_buffer.text[row][col] == 0xa { + // close the box + self.page_buffer.text[row][col] = 0x20; + box_open = false; + } + // characters between 0xA and 0xB shouldn't be displayed + // page->text[row][col] > 0x20 added to preserve color information + else if !box_open && col_start < 40 && self.page_buffer.text[row][col] > 0x20 { + self.page_buffer.text[row][col] = 0x20; + } + } + // line is empty + if col_start > 39 { + continue; + } + + // ETS 300 706, chapter 12.2: Alpha White ("Set-After") - Start-of-row default condition. + // used for colour changes _before_ start box mark + // white is default as stated in ETS 300 706, chapter 12.2 + // black(0), red(1), green(2), yellow(3), blue(4), magenta(5), cyan(6), white(7) + let mut foreground_color: u8 = 0x7; + let mut font_tag_opened = false; + + if line_count > 1 { + match self.config.write_format { + OutputFormat::Transcript => { + self.page_buffer_cur.get_or_insert("".into()).push(' ') + } + OutputFormat::SmpteTt => self + .page_buffer_cur + .get_or_insert("".into()) + .push_str("
"), + _ => self + .page_buffer_cur + .get_or_insert("".into()) + .push_str("\r\n"), + } + } + + if logger().expect("could not access logger").is_gui_mode() { + if !time_reported { + let timecode_show_mmss = &timecode_show[3..8]; + let timecode_hide_mmss = &timecode_hide[3..8]; + // Note, only MM:SS here as we need to save space in the preview window + eprint!( + "###TIME###{}-{}\n###SUBTITLES###", + timecode_show_mmss, timecode_hide_mmss + ); + time_reported = true; + } else { + eprint!("###SUBTITLE###"); + } + } + + for col in 0..=col_stop { + // v is just a shortcut + let mut v = self.page_buffer.text[row][col]; + + if col < col_start && v <= 0x7 { + foreground_color = v as u8; + } + + if col == col_start && (foreground_color != 0x7) && !self.config.nofontcolor { + let buffer = self.page_buffer_cur.get_or_insert("".into()); + let _ = write!( + buffer, + "", + TELETEXT_COLORS[foreground_color as usize] + ); + font_tag_opened = true; + } + + if col >= col_start { + if v <= 0x7 { + // ETS 300 706, chapter 12.2: Unless operating in "Hold Mosaics" mode, + // each character space occupied by a spacing attribute is displayed as a SPACE. + if !self.config.nofontcolor { + if font_tag_opened { + self.page_buffer_cur + .get_or_insert("".into()) + .push_str(""); + font_tag_opened = false; + } + + self.page_buffer_cur.get_or_insert("".into()).push(' '); + // black is considered as white for telxcc purpose + // telxcc writes tags only when needed + if (v > 0x0) && (v < 0x7) { + let buffer = self.page_buffer_cur.get_or_insert("".into()); + let _ = write!( + buffer, + "", + TELETEXT_COLORS[v as usize] + ); + font_tag_opened = true; + } + } else { + v = 0x20; + } + } + + if v >= 0x20 { + self.ucs2_buffer_cur + .get_or_insert(Default::default()) + .as_mut_vec() + .push(v); + + if !font_tag_opened && self.config.latrusmap { + if let Some(ch) = map_latin_to_russian(v) { + v = 0; + self.page_buffer_cur.get_or_insert("".into()).push(ch); + } + } + + // translate some chars into entities, if in colour mode + if !self.config.nofontcolor && !self.config.nohtmlescape { + if let Some(s) = map_entities(v) { + v = 0; + self.page_buffer_cur.get_or_insert("".into()).push_str(s); + } + } + } + + if v >= 0x20 { + let u = char::from_u32(v as u32).unwrap(); + self.page_buffer_cur.get_or_insert("".into()).push(u); + if logger().expect("could not access logger").is_gui_mode() { + // For now we just handle the easy stuff + eprint!("{}", u); + } + } + } + } + + // no tag will left opened! + if !self.config.nofontcolor && font_tag_opened { + self.page_buffer_cur + .get_or_insert("".into()) + .push_str(""); + } + + if logger().expect("could not access logger").is_gui_mode() { + eprintln!(); + } + } + + if self.sentence_cap { + self.telx_case_fix() + } + + match self.config.write_format { + OutputFormat::Transcript | OutputFormat::SmpteTt => { + let page_buffer_prev_len = + self.page_buffer_prev.as_ref().map(|s| s.len()).unwrap_or(0); + if page_buffer_prev_len == 0 { + self.prev_show_timestamp = self.page_buffer.show_timestamp; + } + + let page_buffer_prev = self.page_buffer_prev.as_deref().unwrap_or(""); + let page_buffer_cur = self.page_buffer_cur.as_deref().unwrap_or(""); + let ucs2_buffer_prev = self + .ucs2_buffer_prev + .as_ref() + .map(|x| &x.as_vec()[..]) + .unwrap_or(&[]); + let ucs2_buffer_cur = self + .ucs2_buffer_cur + .as_ref() + .map(|x| &x.as_vec()[..]) + .unwrap_or(&[]); + + if page_buffer_prev_len == 0 + || (self.config.dolevdist + && fuzzy_cmp( + page_buffer_prev, + page_buffer_cur, + ucs2_buffer_prev, + ucs2_buffer_cur, + self.config.levdistmaxpct, + self.config.levdistmincnt, + )) + { + // If empty previous buffer, we just start one with the + // current page and do nothing. Wait until we see more. + self.page_buffer_prev = self.page_buffer_cur.take(); + self.ucs2_buffer_prev = self.ucs2_buffer_cur.take(); + self.prev_hide_timestamp = self.page_buffer.hide_timestamp; + } else { + // OK, the old and new buffer don't match. So write the old + ans = self.telxcc_dump_prev_page(); + self.prev_hide_timestamp = self.page_buffer.hide_timestamp; + self.prev_show_timestamp = self.page_buffer.show_timestamp; + } + } + _ => { + ans = Some(Subtitle::new_text( + self.page_buffer_cur.take().unwrap().into(), + self.page_buffer.show_timestamp, + self.page_buffer.hide_timestamp + Timestamp::from_millis(1), + None, + "TLT".into(), + )); + } + } + + // Also update GUI... + + self.page_buffer_cur = None; + ans + } + + /// Process the teletext `packet` and append the extracted subtitles in `subtitles`. + pub fn process_telx_packet( + &mut self, + data_unit: DataUnit, + packet: &TeletextPacketPayload, + timestamp: Timestamp, + subtitles: &mut Vec, + ) { + // variable names conform to ETS 300 706, chapter 7.1.2 + let address = (decode_hamming_8_4(packet.address[1]).unwrap() << 4) + | decode_hamming_8_4(packet.address[0]).unwrap(); + let mut m = address & 0x7; + if m == 0 { + m = 8; + } + let y = (address >> 3) & 0x1f; + let designation_code = if y > 25 { + decode_hamming_8_4(packet.data[0]).unwrap() + } else { + 0x00 + }; + + if y == 0 { + // CC map + let i = (decode_hamming_8_4(packet.data[1]).unwrap() << 4) + | decode_hamming_8_4(packet.data[0]).unwrap(); + let flag_subtitle = (decode_hamming_8_4(packet.data[5]).unwrap() & 0x08) >> 3; + self.cc_map[i as usize] |= flag_subtitle << (m - 1); + + let flag_subtitle = flag_subtitle != 0; + + if flag_subtitle && (i < 0xff) { + let mut thisp = ((m as u32) << 8) + | ((decode_hamming_8_4(packet.data[1]).unwrap() as u32) << 4) + | (decode_hamming_8_4(packet.data[0]).unwrap() as u32); + let t1 = format!("{:x}", thisp); // Example: 1928 -> 788 + thisp = t1.parse().unwrap(); + if !self.seen_sub_page[thisp as usize] { + self.seen_sub_page[thisp as usize] = true; + info!( + "\rNotice: Teletext page with possible subtitles detected: {:03}\n", + thisp + ); + } + } + if (self.config.page.get() == 0.into()) && flag_subtitle && (i < 0xff) { + self.config.page.replace( + (((m as u16) << 8) + | ((decode_hamming_8_4(packet.data[1]).unwrap() as u16) << 4) + | (decode_hamming_8_4(packet.data[0]).unwrap() as u16)) + .into(), + ); + info!("- No teletext page specified, first received suitable page is {}, not guaranteed\n", self.config.page.get()); + } + + // Page number and control bits + let page_number: TeletextPageNumber = (((m as u16) << 8) + | ((decode_hamming_8_4(packet.data[1]).unwrap() as u16) << 4) + | (decode_hamming_8_4(packet.data[0]).unwrap() as u16)) + .into(); + let charset = ((decode_hamming_8_4(packet.data[7]).unwrap() & 0x08) + | (decode_hamming_8_4(packet.data[7]).unwrap() & 0x04) + | (decode_hamming_8_4(packet.data[7]).unwrap() & 0x02)) + >> 1; + // let flag_suppress_header = decode_hamming_8_4(packet.data[6]).unwrap() & 0x01; + // let flag_inhibit_display = (decode_hamming_8_4(packet.data[6]).unwrap() & 0x08) >> 3; + + // ETS 300 706, chapter 9.3.1.3: + // When set to '1' the service is designated to be in Serial mode and the transmission of a page is terminated + // by the next page header with a different page number. + // When set to '0' the service is designated to be in Parallel mode and the transmission of a page is terminated + // by the next page header with a different page number but the same magazine number. + // The same setting shall be used for all page headers in the service. + // ETS 300 706, chapter 7.2.1: Page is terminated by and excludes the next page header packet + // having the same magazine address in parallel transmission mode, or any magazine address in serial transmission mode. + self.transmission_mode = if decode_hamming_8_4(packet.data[7]).unwrap() & 0x01 == 0 { + TransmissionMode::Parallel + } else { + TransmissionMode::Serial + }; + + // FIXME: Well, this is not ETS 300 706 kosher, however we are interested in EBU_TELETEXT_SUBTITLE only + if (self.transmission_mode == TransmissionMode::Parallel) + && (data_unit != DataUnit::EbuTeletextSubtitle) + && !(self.de_ctr != 0 && flag_subtitle && self.receiving_data) + { + return; + } + + if self.receiving_data + && (((self.transmission_mode == TransmissionMode::Serial) + && (page_number.page() != self.config.page.get().page())) + || ((self.transmission_mode == TransmissionMode::Parallel) + && (page_number.page() != self.config.page.get().page()) + && (m == self.config.page.get().magazine()))) + { + self.receiving_data = false; + if !(self.de_ctr != 0 && flag_subtitle) { + return; + } + } + + // Page transmission is terminated, however now we are waiting for our new page + if page_number != self.config.page.get() + && !(self.de_ctr != 0 && flag_subtitle && self.receiving_data) + { + return; + } + + // Now we have the begining of page transmission; if there is page_buffer pending, process it + if self.page_buffer.tainted { + // Convert telx to UCS-2 before processing + for yt in 1..=23 { + for it in 0..40 { + if self.page_buffer.text[yt][it] != 0x00 + && !self.page_buffer.g2_char_present[yt][it] + { + self.page_buffer.text[yt][it] = self + .g0_charset + .ucs2_char(self.page_buffer.text[yt][it].try_into().unwrap()); + } + } + } + // it would be nice, if subtitle hides on previous video frame, so we contract 40 ms (1 frame @25 fps) + self.page_buffer.hide_timestamp = timestamp - Timestamp::from_millis(40); + if self.page_buffer.hide_timestamp > timestamp { + self.page_buffer.hide_timestamp = Timestamp::from_millis(0); + } + if let Some(sub) = self.process_page() { + subtitles.push(sub); + } + self.de_ctr = 0; + } + + self.page_buffer.show_timestamp = timestamp; + self.page_buffer.hide_timestamp = Timestamp::from_millis(0); + self.page_buffer.text = [[0; 40]; 25]; + self.page_buffer.g2_char_present = [[false; 40]; 25]; + self.page_buffer.tainted = false; + self.receiving_data = false; + if self.g0_charset.charset_type == G0CharsetType::Latin { + // G0 Character National Option Sub-sets selection required only for Latin Character Sets + self.g0_charset.remove_g0_x28_latin_subset(charset.into()) + } + /* + // I know -- not needed; in subtitles we will never need disturbing teletext page status bar + // displaying tv station name, current time etc. + if (flag_suppress_header == NO) { + for (uint8_t i = 14; i < 40; i++) page_buffer.text[y][i] = telx_to_ucs2(packet->data[i]); + //page_buffer.tainted = YES; + } + */ + } else if (m == self.config.page.get().magazine()) + && (1..=23).contains(&y) + && self.receiving_data + { + // ETS 300 706, chapter 9.4.1: Packets X/26 at presentation Levels 1.5, 2.5, 3.5 are used for addressing + // a character location and overwriting the existing character defined on the Level 1 page + // ETS 300 706, annex B.2.2: Packets with Y = 26 shall be transmitted before any packets with Y = 1 to Y = 25; + // so page_buffer.text[y][i] may already contain any character received + // in frame number 26, skip original G0 character + for i in 0..40 { + if self.page_buffer.text[y as usize][i] == 0x00 { + self.page_buffer.text[y as usize][i] = packet.data[i] as Ucs2Char; + } + } + self.page_buffer.tainted = true; + self.de_ctr -= 1; + } else if (m == self.config.page.get().magazine()) && (y == 26) && self.receiving_data { + // ETS 300 706, chapter 12.3.2: X/26 definition + let mut x26_row: u8 = 0; + + let mut triplets: [u32; 13] = [0; 13]; + for (j, triplet) in triplets.iter_mut().enumerate() { + *triplet = decode_hamming_24_18( + ((packet.data[j * 3 + 3] as u32) << 16) + | ((packet.data[j * 3 + 2] as u32) << 8) + | (packet.data[j * 3 + 1] as u32), + ) + .unwrap_or(0xffffffff); + } + + for triplet in triplets { + // invalid data (HAM24/18 uncorrectable error detected), skip group + if triplet == 0xffffffff { + debug!(msg_type = DebugMessageFlag::TELETEXT; "- Unrecoverable data error; UNHAM24/18()={:04x}\n", triplet); + continue; + } + + let data = ((triplet & 0x3f800) >> 11) as u8; + let mode = ((triplet & 0x7c0) >> 6) as u8; + let address = (triplet & 0x3f) as u8; + let row_address_group = (40..=63).contains(&address); + + // ETS 300 706, chapter 12.3.1, table 27: set active position + if (mode == 0x04) && row_address_group { + x26_row = address - 40; + if x26_row == 0 { + x26_row = 24; + } + } + + // ETS 300 706, chapter 12.3.1, table 27: termination marker + if (0x11..=0x1f).contains(&mode) && row_address_group { + break; + } + + // ETS 300 706, chapter 12.3.1, table 27: character from G2 set + if (mode == 0x0f) && !row_address_group && data > 31 { + self.page_buffer.text[x26_row as usize][address as usize] = + G2Charset::G2_CHARSET[0][data as usize - 0x20]; + self.page_buffer.g2_char_present[x26_row as usize][address as usize] = true; + } + + // ETS 300 706 v1.2.1, chapter 12.3.4, Table 29: G0 character without diacritical mark (display '@' instead of '*') + if (mode == 0x10) && !row_address_group && data == 64 { + // check for @ symbol + self.g0_charset.remap_g0_charset(0.into()); + self.page_buffer.text[x26_row as usize][address as usize] = 0x40; + } + + // ETS 300 706, chapter 12.3.1, table 27: G0 character with diacritical mark + if (0x11..=0x1f).contains(&mode) && !row_address_group { + // A - Z + if (65..=90).contains(&data) { + self.page_buffer.text[x26_row as usize][address as usize] = + G2Charset::G2_ACCENTS[mode as usize - 0x11][data as usize - 65]; + } + // a - z + else if (97..=122).contains(&data) { + self.page_buffer.text[x26_row as usize][address as usize] = + G2Charset::G2_ACCENTS[mode as usize - 0x11][data as usize - 71]; + // other + } else { + self.page_buffer.text[x26_row as usize][address as usize] = + self.g0_charset.ucs2_char(data); + } + self.page_buffer.g2_char_present[x26_row as usize][address as usize] = true; + } + } + } else if (m == self.config.page.get().magazine()) && (y == 28) && self.receiving_data { + // TODO: + // ETS 300 706, chapter 9.4.7: Packet X/28/4 + // Where packets 28/0 and 28/4 are both transmitted as part of a page, packet 28/0 takes precedence over 28/4 for all but the colour map entry coding. + if (designation_code == 0) || (designation_code == 4) { + // ETS 300 706, chapter 9.4.2: Packet X/28/0 Format 1 + // ETS 300 706, chapter 9.4.7: Packet X/28/4 + if let Some(triplet0) = decode_hamming_24_18( + ((packet.data[3] as u32) << 16) + | ((packet.data[2] as u32) << 8) + | packet.data[1] as u32, + ) { + // ETS 300 706, chapter 9.4.2: Packet X/28/0 Format 1 only + if (triplet0 & 0x0f) == 0x00 { + // ETS 300 706, Table 32 + self.g0_charset + .set_charset(G0CharsetType::from_triplet(triplet0)); // Deciding G0 Character Set + self.g0_charset + .set_g0_x28_latin_subset((((triplet0 & 0x3f80) >> 7) as u8).into()) + } + } else { + // invalid data (HAM24/18 uncorrectable error detected), skip group + debug!(msg_type = DebugMessageFlag::TELETEXT; "! Unrecoverable data error; UNHAM24/18()={:04x}\n", 0xffffffffu32); + } + } + } else if (m == self.config.page.get().magazine()) && (y == 29) { + // TODO: + // ETS 300 706, chapter 9.5.1 Packet M/29/0 + // Where M/29/0 and M/29/4 are transmitted for the same magazine, M/29/0 takes precedence over M/29/4. + if (designation_code == 0) || (designation_code == 4) { + // ETS 300 706, chapter 9.5.1: Packet M/29/0 + // ETS 300 706, chapter 9.5.3: Packet M/29/4 + if let Some(triplet0) = decode_hamming_24_18( + ((packet.data[3] as u32) << 16) + | ((packet.data[2] as u32) << 8) + | packet.data[1] as u32, + ) { + // ETS 300 706, table 11: Coding of Packet M/29/0 + // ETS 300 706, table 13: Coding of Packet M/29/4 + if (triplet0 & 0xff) == 0x00 { + self.g0_charset + .set_charset(G0CharsetType::from_triplet(triplet0)); + self.g0_charset + .set_g0_m29_latin_subset((((triplet0 & 0x3f80) >> 7) as u8).into()) + } + } else { + // invalid data (HAM24/18 uncorrectable error detected), skip group + debug!(msg_type = DebugMessageFlag::TELETEXT; "! Unrecoverable data error; UNHAM24/18()={:04x}\n", 0xffffffffu32); + } + } + } else if (m == 8) && (y == 30) { + // ETS 300 706, chapter 9.8: Broadcast Service Data Packets + if !self.states.programme_info_processed { + // ETS 300 706, chapter 9.8.1: Packet 8/30 Format 1 + if decode_hamming_8_4(packet.data[0]) + .map(|x| x < 2) + .unwrap_or(false) + { + let mut t: u32 = 0; + info!("- Programme Identification Data = "); + for i in 20..40 { + let c = self.g0_charset.ucs2_char(packet.data[i]); + // strip any control codes from PID, eg. TVP station + if c < 0x20 { + continue; + } + + info!("{}", char::from_u32(c as u32).unwrap()); + } + info!("\n"); + + // OMG! ETS 300 706 stores timestamp in 7 bytes in Modified Julian Day in BCD format + HH:MM:SS in BCD format + // + timezone as 5-bit count of half-hours from GMT with 1-bit sign + // In addition all decimals are incremented by 1 before transmission. + // 1st step: BCD to Modified Julian Day + t += ((packet.data[10] & 0x0f) as u32) * 10000; + t += (((packet.data[11] & 0xf0) >> 4) as u32) * 1000; + t += ((packet.data[11] & 0x0f) as u32) * 100; + t += (((packet.data[12] & 0xf0) >> 4) as u32) * 10; + t += (packet.data[12] & 0x0f) as u32; + t -= 11111; + // 2nd step: conversion Modified Julian Day to unix timestamp + t = (t - 40587) * 86400; + // 3rd step: add time + t += 3600 + * (((packet.data[13] & 0xf0) >> 4) as u32 * 10 + + (packet.data[13] & 0x0f) as u32); + t += 60 + * (((packet.data[14] & 0xf0) >> 4) as u32 * 10 + + (packet.data[14] & 0x0f) as u32); + t += ((packet.data[15] & 0xf0) >> 4) as u32 * 10 + + (packet.data[15] & 0x0f) as u32; + t -= 40271; + // 4th step: conversion to time_t + let t0 = Timestamp::from_millis((t as i64) * 1000); + + info!( + "- Universal Time Co-ordinated = {}\n", + t0.to_ctime().unwrap() + ); + + debug!(msg_type = DebugMessageFlag::TELETEXT; "- Transmission mode = {:?}\n", self.transmission_mode); + + if self.config.write_format == OutputFormat::Transcript + && matches!(self.config.date_format, TimestampFormat::Date { .. }) + && !self.config.noautotimeref + { + info!("- Broadcast Service Data Packet received, resetting UTC referential value to {}\n", t0.to_ctime().unwrap()); + *UTC_REFVALUE.write().unwrap() = t as u64; + self.states.pts_initialized = false; + } + + self.states.programme_info_processed = true; + } + } + } + } + + /// Consumes the [`TeletextContext`] and appends the pending extracted subtitles in `subtitles`. + pub fn close(mut self, subtitles: Option<&mut Vec>) { + info!( + "\nTeletext decoder: {} packets processed \n", + self.tlt_packet_counter + ); + if self.config.write_format != OutputFormat::Rcwt { + if let Some(subtitles) = subtitles { + // output any pending close caption + if self.page_buffer.tainted { + // Convert telx to UCS-2 before processing + for yt in 1..=23 { + for it in 0..40 { + if self.page_buffer.text[yt][it] != 0x00 + && !self.page_buffer.g2_char_present[yt][it] + { + self.page_buffer.text[yt][it] = self + .g0_charset + .ucs2_char(self.page_buffer.text[yt][it].try_into().unwrap()); + } + } + } + // this time we do not subtract any frames, there will be no more frames + self.page_buffer.hide_timestamp = self.last_timestamp; + if let Some(sub) = self.process_page() { + subtitles.push(sub); + } + } + + self.telxcc_dump_prev_page(); + } + } + } +} From 2eba5034aae8875876a016b375a953a94ffb811d Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 14:13:59 +0530 Subject: [PATCH 10/13] add options and hardsubx module --- src/rust/Cargo.lock | 67 ++++++ src/rust/lib_ccxr/Cargo.lock | 67 ++++++ src/rust/lib_ccxr/Cargo.toml | 1 + src/rust/lib_ccxr/src/common/mod.rs | 2 + src/rust/lib_ccxr/src/common/options.rs | 306 ++++++++++++++++++++++++ src/rust/lib_ccxr/src/hardsubx.rs | 16 ++ src/rust/lib_ccxr/src/lib.rs | 1 + 7 files changed, 460 insertions(+) create mode 100644 src/rust/lib_ccxr/src/common/options.rs create mode 100644 src/rust/lib_ccxr/src/hardsubx.rs diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 6a5e69b6f..413a1db89 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -239,6 +239,15 @@ dependencies = [ "toml", ] +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + [[package]] name = "glob" version = "0.3.1" @@ -276,6 +285,16 @@ dependencies = [ "libc", ] +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "indexmap" version = "2.0.0" @@ -325,6 +344,7 @@ dependencies = [ "socket2", "thiserror", "time", + "url", ] [[package]] @@ -453,6 +473,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + [[package]] name = "phf" version = "0.11.2" @@ -768,6 +794,21 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "toml" version = "0.5.11" @@ -794,18 +835,44 @@ dependencies = [ "winnow", ] +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + [[package]] name = "unicode-ident" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-width" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "url" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index f8442eb8a..f8d1fc1c7 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -39,12 +39,31 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + [[package]] name = "hashbrown" version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "indexmap" version = "2.0.0" @@ -71,6 +90,7 @@ dependencies = [ "socket2", "thiserror", "time", + "url", ] [[package]] @@ -112,6 +132,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -255,6 +281,21 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "toml_datetime" version = "0.6.3" @@ -272,12 +313,38 @@ dependencies = [ "winnow", ] +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + [[package]] name = "unicode-ident" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index 1599b43be..c0a92468c 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -12,6 +12,7 @@ time = { version = "0.3.27", features = ["macros", "formatting"] } bitflags = "2.3.1" derive_more = "0.99.17" num_enum = "0.6.1" +url = "2.4.0" [features] default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] diff --git a/src/rust/lib_ccxr/src/common/mod.rs b/src/rust/lib_ccxr/src/common/mod.rs index 27aa6cccc..954b6a778 100644 --- a/src/rust/lib_ccxr/src/common/mod.rs +++ b/src/rust/lib_ccxr/src/common/mod.rs @@ -1,3 +1,5 @@ mod constants; +mod options; pub use constants::*; +pub use options::*; diff --git a/src/rust/lib_ccxr/src/common/options.rs b/src/rust/lib_ccxr/src/common/options.rs new file mode 100644 index 000000000..9cd9b86b6 --- /dev/null +++ b/src/rust/lib_ccxr/src/common/options.rs @@ -0,0 +1,306 @@ +use url::Url; + +use std::path::PathBuf; + +use crate::common::{ + DataSource, Language, OutputFormat, SelectCodec, StreamMode, StreamType, DTVCC_MAX_SERVICES, +}; +use crate::hardsubx::{ColorHue, OcrMode}; +use crate::util::encoding::Encoding; +use crate::util::log::OutputTarget; +use crate::util::time::{Timestamp, TimestampFormat}; + +pub enum DtvccServiceCharset { + Same(String), + Unique(Box<[String; DTVCC_MAX_SERVICES]>), +} + +#[allow(dead_code)] +pub struct DemuxerConfig { + /// Regular TS or M2TS + m2ts: bool, + auto_stream: StreamMode, + + /* subtitle codec type */ + codec: SelectCodec, + nocodec: SelectCodec, + + /// Try to find a stream with captions automatically (no -pn needed) + ts_autoprogram: bool, + ts_allprogram: bool, + /// PID for stream that holds caption information + ts_cappids: Vec, + /// If 1, never mess with the selected PID + ts_forced_cappid: bool, + /// Specific program to process in TS files, if ts_forced_program_selected==1 + ts_forced_program: Option, + /// User WANTED stream type (i.e. use the stream that has this type) + ts_datastreamtype: StreamType, + /// User selected (forced) stream type + ts_forced_streamtype: StreamType, +} + +#[allow(dead_code)] +pub struct EncoderConfig { + /// Extract 1st (1), 2nd (2) or both fields (12) + extract: u8, + dtvcc_extract: bool, + // If true, output in stderr progress updates so the GUI can grab them + gui_mode_reports: bool, + output_filename: String, + write_format: OutputFormat, + keep_output_closed: bool, + /// Force flush on content write + force_flush: bool, + /// Append mode for output files + append_mode: bool, + /// true if -UCLA used, false if not + ucla: bool, + + encoding: Encoding, + date_format: TimestampFormat, + /// Add dashes (-) before each speaker automatically? + autodash: bool, + /// " Remove spaces at sides? " + trim_subs: bool, + /// FIX CASE? = Fix case? + sentence_cap: bool, + /// Split text into complete sentences and prorate time? + splitbysentence: bool, + + /// If out=curl, where do we send the data to? + #[cfg(feature = "with_libcurl")] + curlposturl: Option, + + /// Censors profane words from subtitles + filter_profanity: bool, + + /// Write a .sem file on file open and delete it on close? + with_semaphore: bool, + /* Credit stuff */ + start_credits_text: String, + end_credits_text: String, + startcreditsnotbefore: Timestamp, // Where to insert start credits, if possible + startcreditsnotafter: Timestamp, + startcreditsforatleast: Timestamp, // How long to display them? + startcreditsforatmost: Timestamp, + endcreditsforatleast: Timestamp, + endcreditsforatmost: Timestamp, + + /// Keeps the settings for generating transcript output files. + /* ccx_encoders_transcript_format transcript_settings; */ + send_to_srv: bool, + /// Set to true when no BOM (Byte Order Mark) should be used for files. + /// Note, this might make files unreadable in windows! + no_bom: bool, + first_input_file: String, + multiple_files: bool, + no_font_color: bool, + no_type_setting: bool, + /// If this is set to true, the stdout will be flushed when data was written to the screen during a process_608 call. + cc_to_stdout: bool, + /// false = CRLF, true = LF + line_terminator_lf: bool, + /// ms to delay (or advance) subs + subs_delay: Timestamp, + program_number: u32, + in_format: u8, + // true if we don't want to OCR bitmaps to add the text as comments in the XML file in spupng + nospupngocr: bool, + + // MCC File + /// true if dropframe frame count should be used. defaults to no drop frame. + force_dropframe: bool, + + // text -> png (text render) + /// The font used to render text if needed (e.g. teletext->spupng) + render_font: PathBuf, + render_font_italics: PathBuf, + + //CEA-708 + services_enabled: [bool; DTVCC_MAX_SERVICES], + services_charsets: DtvccServiceCharset, + // true if only 708 subs extraction is enabled + extract_only_708: bool, +} + +/// Options from user parameters +pub struct Options { + /// Extract 1st, 2nd or both fields. Can be 1, 2 or 12 respectively. + pub extract: u8, + /// Disable roll-up emulation (no duplicate output in generated file) + pub no_rollup: bool, + pub noscte20: bool, + pub webvtt_create_css: bool, + /// Channel we want to dump in srt mode + pub cc_channel: u8, + pub buffer_input: bool, + pub nofontcolor: bool, + pub nohtmlescape: bool, + pub notypesetting: bool, + /// The start of the segment we actually process + pub extraction_start: Timestamp, + /// The end of the segment we actually process + pub extraction_end: Timestamp, + pub print_file_reports: bool, + /// Contains the settings for the 608 decoder. + /* ccx_decoder_608_settings settings_608, */ + /// Same for 708 decoder + /* ccx_decoder_dtvcc_settings settings_dtvcc, */ + /// Is 608 enabled by explicitly using flags(-1,-2,-12) + pub is_608_enabled: bool, + /// Is 708 enabled by explicitly using flags(-svc) + pub is_708_enabled: bool, + + /// Disabled by -ve or --videoedited + pub binary_concat: bool, + /// Use GOP instead of PTS timing (None=do as needed, true=always, false=never) + pub use_gop_as_pts: Option, + /// Replace 0000 with 8080 in HDTV (needed for some cards) + pub fix_padding: bool, + /// If true, output in stderr progress updates so the GUI can grab them + pub gui_mode_reports: bool, + /// If true, suppress the output of the progress to stdout + pub no_progress_bar: bool, + /// Extra capitalization word file + pub sentence_cap_file: PathBuf, + /// 0 -> Not a complete file but a live stream, without timeout + /// + /// None -> A regular file + /// + /// \>0 -> Live stream with a timeout of this value in seconds + pub live_stream: Option, + /// Extra profanity word file + pub filter_profanity_file: PathBuf, + pub messages_target: OutputTarget, + /// If true, add WebVTT X-TIMESTAMP-MAP header + pub timestamp_map: bool, + /* Levenshtein's parameters, for string comparison */ + /// false => don't attempt to correct typos with this algorithm + pub dolevdist: bool, + /// Means 2 fails or less is "the same" + pub levdistmincnt: u8, + /// Means 10% or less is also "the same" + pub levdistmaxpct: u8, + /// Look for captions in all packets when everything else fails + pub investigate_packets: bool, + /// Disable pruning of padding cc blocks + pub fullbin: bool, + /// Disable syncing + pub nosync: bool, + /// If true, use PID=1003, process specially and so on + pub hauppauge_mode: bool, + /// Fix broken Windows 7 conversion + pub wtvconvertfix: bool, + pub wtvmpeg2: bool, + /// Use myth-tv mpeg code? false=no, true=yes, None=auto + pub auto_myth: Option, + /* MP4 related stuff */ + /// Process the video track even if a CC dedicated track exists. + pub mp4vidtrack: bool, + /// If true, extracts chapters (if present), from MP4 files. + pub extract_chapters: bool, + /* General settings */ + /// Force the use of pic_order_cnt_lsb in AVC/H.264 data streams + pub usepicorder: bool, + /// 1 = full output. 2 = live output. 3 = both + pub xmltv: u8, + /// interval in seconds between writing xmltv output files in live mode + pub xmltvliveinterval: Timestamp, + /// interval in seconds between writing xmltv full file output + pub xmltvoutputinterval: Timestamp, + pub xmltvonlycurrent: bool, + pub keep_output_closed: bool, + /// Force flush on content write + pub force_flush: bool, + /// Append mode for output files + pub append_mode: bool, + /// true if UCLA used, false if not + pub ucla: bool, + /// true if ticker text style burned in subs, false if not + pub tickertext: bool, + /// true if burned-in subtitles to be extracted + pub hardsubx: bool, + /// true if both burned-in and not burned in need to be extracted + pub hardsubx_and_common: bool, + /// The name of the language stream for DVB + pub dvblang: Option, + /// The name of the .traineddata file to be loaded with tesseract + pub ocrlang: PathBuf, + /// The Tesseract OEM mode, could be 0 (default), 1 or 2 + pub ocr_oem: u8, + /// How to quantize the bitmap before passing to to tesseract + /// (false = no quantization at all, true = CCExtractor's internal) + pub ocr_quantmode: bool, + /// The name of the language stream for MKV + pub mkvlang: Option, + /// If true, the video stream will be processed even if we're using a different one for subtitles. + pub analyze_video_stream: bool, + + /*HardsubX related stuff*/ + pub hardsubx_ocr_mode: OcrMode, + pub hardsubx_min_sub_duration: Timestamp, + pub hardsubx_detect_italics: bool, + pub hardsubx_conf_thresh: f64, + pub hardsubx_hue: ColorHue, + pub hardsubx_lum_thresh: f64, + + /// Keeps the settings for generating transcript output files. + /* ccx_encoders_transcript_format transcript_settings; */ + pub date_format: TimestampFormat, + pub send_to_srv: bool, + pub write_format: OutputFormat, + pub write_format_rewritten: bool, + pub use_ass_instead_of_ssa: bool, + pub use_webvtt_styling: bool, + + /* Networking */ + pub udpsrc: Option, + pub udpaddr: Option, + /// Non-zero => Listen for UDP packets on this port, no files. + pub udpport: u16, + pub tcpport: Option, + pub tcp_password: Option, + pub tcp_desc: Option, + pub srv_addr: Option, + pub srv_port: Option, + /// Do NOT set time automatically? + pub noautotimeref: bool, + /// Files, stdin or network + pub input_source: DataSource, + + pub output_filename: Option, + + /// List of files to process + pub inputfile: Option>, + pub demux_cfg: DemuxerConfig, + pub enc_cfg: EncoderConfig, + /// ms to delay (or advance) subs + pub subs_delay: Timestamp, + /// If true, the stdout will be flushed when data was written to the screen during a process_608 call. + pub cc_to_stdout: bool, + /// If true, the PES Header will be printed to console (debugging purposes) + pub pes_header_to_stdout: bool, + /// If true, the program will ignore PTS jumps. + /// Sometimes this parameter is required for DVB subs with > 30s pause time + pub ignore_pts_jumps: bool, + pub multiprogram: bool, + pub out_interval: i32, + pub segment_on_key_frames_only: bool, + + #[cfg(feature = "with_libcurl")] + pub curlposturl: Option, + + //CC sharing + #[cfg(feature = "enable_sharing")] + pub sharing_enabled: bool, + #[cfg(feature = "enable_sharing")] + pub sharing_url: Option, + #[cfg(feature = "enable_sharing")] + //Translating + pub translate_enabled: bool, + #[cfg(feature = "enable_sharing")] + pub translate_langs: Option, + #[cfg(feature = "enable_sharing")] + pub translate_key: Option, +} diff --git a/src/rust/lib_ccxr/src/hardsubx.rs b/src/rust/lib_ccxr/src/hardsubx.rs new file mode 100644 index 000000000..19352ebed --- /dev/null +++ b/src/rust/lib_ccxr/src/hardsubx.rs @@ -0,0 +1,16 @@ +pub enum OcrMode { + Frame, + Letter, + Word, +} + +pub enum ColorHue { + White, + Yellow, + Green, + Cyan, + Blue, + Magenta, + Red, + Custom(f64), +} diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs index 35d5f87a2..2b6e5db18 100644 --- a/src/rust/lib_ccxr/src/lib.rs +++ b/src/rust/lib_ccxr/src/lib.rs @@ -1,4 +1,5 @@ pub mod common; +pub mod hardsubx; pub mod subtitle; pub mod teletext; pub mod util; From a131ce9b98ee6ee5047123bd74510f59c11705a1 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 14:51:34 +0530 Subject: [PATCH 11/13] create libccxr_exports, integrate log and levenshtein into C --- src/lib_ccx/lib_ccx.c | 8 ++ src/lib_ccx/utility.c | 18 +++++ src/rust/Cargo.lock | 10 +++ src/rust/build.rs | 1 + src/rust/lib_ccxr/Cargo.lock | 16 ++++ src/rust/lib_ccxr/Cargo.toml | 1 + src/rust/lib_ccxr/src/util/c_functions.rs | 19 +++++ src/rust/lib_ccxr/src/util/mod.rs | 2 + src/rust/src/lib.rs | 2 + src/rust/src/libccxr_exports/mod.rs | 91 +++++++++++++++++++++++ 10 files changed, 168 insertions(+) create mode 100644 src/rust/lib_ccxr/src/util/c_functions.rs create mode 100644 src/rust/src/libccxr_exports/mod.rs diff --git a/src/lib_ccx/lib_ccx.c b/src/lib_ccx/lib_ccx.c index 74a0bc88a..48a0ed7a6 100644 --- a/src/lib_ccx/lib_ccx.c +++ b/src/lib_ccx/lib_ccx.c @@ -6,6 +6,10 @@ #include "ccx_decoders_708.h" #include "ccx_decoders_isdb.h" +#ifndef DISABLE_RUST +extern void ccxr_init_basic_logger(); +#endif + struct ccx_common_logging_t ccx_common_logging; static struct ccx_decoders_common_settings_t *init_decoder_setting( struct ccx_s_options *opt) @@ -100,6 +104,10 @@ struct lib_ccx_ctx *init_libraries(struct ccx_s_options *opt) ccx_common_logging.log_ftn = &mprint; ccx_common_logging.gui_ftn = &activity_library_process; +#ifndef DISABLE_RUST + ccxr_init_basic_logger(); +#endif + struct lib_ccx_ctx *ctx = malloc(sizeof(struct lib_ccx_ctx)); if (!ctx) ccx_common_logging.fatal_ftn(EXIT_NOT_ENOUGH_MEMORY, "init_libraries: Not enough memory allocating lib_ccx_ctx context."); diff --git a/src/lib_ccx/utility.c b/src/lib_ccx/utility.c index cb3cb6152..c110a1ec3 100644 --- a/src/lib_ccx/utility.c +++ b/src/lib_ccx/utility.c @@ -9,6 +9,12 @@ int temp_debug = 0; // This is a convenience variable used to enable/disable debug on variable conditions. Find references to understand. volatile sig_atomic_t change_filename_requested = 0; +#ifndef DISABLE_RUST +extern int ccxr_verify_crc32(uint8_t *buf, int len); +extern int ccxr_levenshtein_dist(const uint64_t *s1, const uint64_t *s2, unsigned s1len, unsigned s2len); +extern int ccxr_levenshtein_dist_char(const char *s1, const char *s2, unsigned s1len, unsigned s2len); +#endif + static uint32_t crc32_table[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, @@ -77,6 +83,10 @@ static uint32_t crc32_table[] = { int verify_crc32(uint8_t *buf, int len) { +#ifndef DISABLE_RUST + return ccxr_verify_crc32(buf, len); +#endif + int i = 0; int32_t crc = -1; for (i = 0; i < len; i++) @@ -151,6 +161,10 @@ void timestamp_to_vtttime(uint64_t timestamp, char *buffer) int levenshtein_dist(const uint64_t *s1, const uint64_t *s2, unsigned s1len, unsigned s2len) { +#ifndef DISABLE_RUST + return ccxr_levenshtein_dist(s1, s2, s1len, s2len); +#endif + unsigned int x, y, v, lastdiag, olddiag; unsigned int *column = (unsigned *)malloc((s1len + 1) * sizeof(unsigned int)); for (y = 1; y <= s1len; y++) @@ -172,6 +186,10 @@ int levenshtein_dist(const uint64_t *s1, const uint64_t *s2, unsigned s1len, uns int levenshtein_dist_char(const char *s1, const char *s2, unsigned s1len, unsigned s2len) { +#ifndef DISABLE_RUST + return ccxr_levenshtein_dist_char(s1, s2, s1len, s2len); +#endif + unsigned int x, y, v, lastdiag, olddiag; unsigned int *column = (unsigned *)malloc((s1len + 1) * sizeof(unsigned int)); for (y = 1; y <= s1len; y++) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 413a1db89..2e6525f29 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -180,6 +180,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "deranged" version = "0.3.8" @@ -339,6 +348,7 @@ name = "lib_ccxr" version = "0.1.0" dependencies = [ "bitflags 2.4.0", + "crc32fast", "derive_more", "num_enum", "socket2", diff --git a/src/rust/build.rs b/src/rust/build.rs index f8ecc04c8..4df835845 100644 --- a/src/rust/build.rs +++ b/src/rust/build.rs @@ -26,6 +26,7 @@ fn main() { "lib_cc_decode", "cc_subtitle", "ccx_output_format", + "ccx_s_options", ]); #[cfg(feature = "hardsubx_ocr")] diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index f8d1fc1c7..cd6c3aa33 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -8,12 +8,27 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "convert_case" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "deranged" version = "0.3.8" @@ -85,6 +100,7 @@ name = "lib_ccxr" version = "0.1.0" dependencies = [ "bitflags", + "crc32fast", "derive_more", "num_enum", "socket2", diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index c0a92468c..713c372ab 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" socket2 = "0.5.3" thiserror = "1.0.39" time = { version = "0.3.27", features = ["macros", "formatting"] } +crc32fast = "1.3.2" bitflags = "2.3.1" derive_more = "0.99.17" num_enum = "0.6.1" diff --git a/src/rust/lib_ccxr/src/util/c_functions.rs b/src/rust/lib_ccxr/src/util/c_functions.rs new file mode 100644 index 000000000..e3fcc7923 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/c_functions.rs @@ -0,0 +1,19 @@ +//! Provides Rust equivalent for functions in C. Uses Rust-native types as input and output. + +use super::*; +use crc32fast::hash; + +/// Rust equivalent for `verify_crc32` function in C. Uses Rust-native types as input and output. +pub fn verify_crc32(buf: &[u8]) -> bool { + hash(buf) == 0 +} + +/// Rust equivalent for `levenshtein_dist` function in C. Uses Rust-native types as input and output. +pub fn levenshtein_dist(s1: &[u64], s2: &[u64]) -> usize { + levenshtein(s1, s2) +} + +/// Rust equivalent for `levenshtein_dist_char` function in C. Uses Rust-native types as input and output. +pub fn levenshtein_dist_char(s1: &[T], s2: &[T]) -> usize { + levenshtein(s1, s2) +} diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index 7b2316da2..74e9258ac 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -7,5 +7,7 @@ pub mod log; pub mod net; pub mod time; +pub mod c_functions; + pub use bits::*; pub use levenshtein::*; diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index a76faa419..f2d7ba171 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -15,6 +15,7 @@ pub mod bindings { pub mod decoder; #[cfg(feature = "hardsubx_ocr")] pub mod hardsubx; +pub mod libccxr_exports; pub mod utils; #[cfg(windows)] @@ -32,6 +33,7 @@ extern "C" { static mut cb_708: c_int; static mut cb_field1: c_int; static mut cb_field2: c_int; + static mut ccx_options: ccx_s_options; } /// Initialize env logger with custom format, using stdout as target diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs new file mode 100644 index 000000000..68faa5dfd --- /dev/null +++ b/src/rust/src/libccxr_exports/mod.rs @@ -0,0 +1,91 @@ +//! Provides C-FFI functions that are direct equivalent of functions available in C. + +use crate::ccx_options; +use lib_ccxr::util::c_functions::*; +use lib_ccxr::util::log::*; +use std::convert::TryInto; +use std::os::raw::{c_char, c_int, c_uint}; + +/// Initializes the logger at the rust side. +/// +/// # Safety +/// +/// `ccx_options` in C must initialized properly before calling this function. +#[no_mangle] +pub unsafe extern "C" fn ccxr_init_basic_logger() { + let debug_mask = + DebugMessageFlag::from_bits(ccx_options.debug_mask.try_into().unwrap()).unwrap(); + let debug_mask_on_debug = + DebugMessageFlag::from_bits(ccx_options.debug_mask_on_debug.try_into().unwrap()).unwrap(); + let mask = DebugMessageMask::new(debug_mask, debug_mask_on_debug); + let gui_mode_reports = ccx_options.gui_mode_reports != 0; + let messages_target = match ccx_options.messages_target { + 0 => OutputTarget::Stdout, + 1 => OutputTarget::Stderr, + 2 => OutputTarget::Quiet, + _ => panic!("incorrect value for messages_target"), + }; + set_logger(CCExtractorLogger::new( + messages_target, + mask, + gui_mode_reports, + )) + .unwrap(); +} + +/// Rust equivalent for `verify_crc32` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `buf` should not be a NULL pointer and the length of buffer pointed by `buf` should be equal to +/// or less than `len`. +#[no_mangle] +pub unsafe extern "C" fn ccxr_verify_crc32(buf: *const u8, len: c_int) -> c_int { + let buf = std::slice::from_raw_parts(buf, len as usize); + if verify_crc32(buf) { + 1 + } else { + 0 + } +} + +/// Rust equivalent for `levenshtein_dist` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `s1` and `s2` must valid slices of data with lengths of `s1len` and `s2len` respectively. +#[no_mangle] +pub unsafe extern "C" fn ccxr_levenshtein_dist( + s1: *const u64, + s2: *const u64, + s1len: c_uint, + s2len: c_uint, +) -> c_int { + let s1 = std::slice::from_raw_parts(s1, s1len.try_into().unwrap()); + let s2 = std::slice::from_raw_parts(s2, s2len.try_into().unwrap()); + + let ans = levenshtein_dist(s1, s2); + + ans.try_into().unwrap() +} + +/// Rust equivalent for `levenshtein_dist_char` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `s1` and `s2` must valid slices of data and therefore not be null. They must have lengths +/// of `s1len` and `s2len` respectively. +#[no_mangle] +pub unsafe extern "C" fn ccxr_levenshtein_dist_char( + s1: *const c_char, + s2: *const c_char, + s1len: c_uint, + s2len: c_uint, +) -> c_int { + let s1 = std::slice::from_raw_parts(s1, s1len.try_into().unwrap()); + let s2 = std::slice::from_raw_parts(s2, s2len.try_into().unwrap()); + + let ans = levenshtein_dist_char(s1, s2); + + ans.try_into().unwrap() +} From dd362a8923142228064b3d0c3f0443beba924525 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 15:05:57 +0530 Subject: [PATCH 12/13] integrate time into C --- src/lib_ccx/ccx_common_timing.c | 48 ++ src/lib_ccx/utility.c | 30 + src/rust/build.rs | 10 +- .../lib_ccxr/src/util/time/c_functions.rs | 93 +++ src/rust/lib_ccxr/src/util/time/mod.rs | 2 + src/rust/src/lib.rs | 358 ++++++------ src/rust/src/libccxr_exports/mod.rs | 4 + src/rust/src/libccxr_exports/time.rs | 529 ++++++++++++++++++ 8 files changed, 900 insertions(+), 174 deletions(-) create mode 100644 src/rust/lib_ccxr/src/util/time/c_functions.rs create mode 100644 src/rust/src/libccxr_exports/time.rs diff --git a/src/lib_ccx/ccx_common_timing.c b/src/lib_ccx/ccx_common_timing.c index d1fca4282..e30e9fc3f 100644 --- a/src/lib_ccx/ccx_common_timing.c +++ b/src/lib_ccx/ccx_common_timing.c @@ -30,6 +30,18 @@ int gop_rollover = 0; struct ccx_common_timing_settings_t ccx_common_timing_settings; +#ifndef DISABLE_RUST +void ccxr_add_current_pts(struct ccx_common_timing_ctx *ctx, LLONG pts); +void ccxr_set_current_pts(struct ccx_common_timing_ctx *ctx, LLONG pts); +int ccxr_set_fts(struct ccx_common_timing_ctx *ctx); +LLONG ccxr_get_fts(struct ccx_common_timing_ctx *ctx, int current_field); +LLONG ccxr_get_fts_max(struct ccx_common_timing_ctx *ctx); +char *ccxr_print_mstime_static(LLONG mstime, char *buf); +void ccxr_print_debug_timing(struct ccx_common_timing_ctx *ctx); +void ccxr_calculate_ms_gop_time(struct gop_time_code *g); +int ccxr_gop_accepted(struct gop_time_code *g); +#endif + void ccx_common_timing_init(LLONG *file_position, int no_sync) { ccx_common_timing_settings.disable_sync_check = 0; @@ -73,11 +85,19 @@ struct ccx_common_timing_ctx *init_timing_ctx(struct ccx_common_timing_settings_ void add_current_pts(struct ccx_common_timing_ctx *ctx, LLONG pts) { +#ifndef DISABLE_RUST + return ccxr_add_current_pts(ctx, pts); +#endif + set_current_pts(ctx, ctx->current_pts + pts); } void set_current_pts(struct ccx_common_timing_ctx *ctx, LLONG pts) { +#ifndef DISABLE_RUST + return ccxr_set_current_pts(ctx, pts); +#endif + LLONG prev_pts = ctx->current_pts; ctx->current_pts = pts; if (ctx->pts_set == 0) @@ -95,6 +115,10 @@ void set_current_pts(struct ccx_common_timing_ctx *ctx, LLONG pts) int set_fts(struct ccx_common_timing_ctx *ctx) { +#ifndef DISABLE_RUST + return ccxr_set_fts(ctx); +#endif + int pts_jump = 0; // ES don't have PTS unless GOP timing is used @@ -266,6 +290,10 @@ int set_fts(struct ccx_common_timing_ctx *ctx) LLONG get_fts(struct ccx_common_timing_ctx *ctx, int current_field) { +#ifndef DISABLE_RUST + return ccxr_get_fts(ctx, current_field); +#endif + LLONG fts; switch (current_field) @@ -290,6 +318,10 @@ LLONG get_fts(struct ccx_common_timing_ctx *ctx, int current_field) LLONG get_fts_max(struct ccx_common_timing_ctx *ctx) { +#ifndef DISABLE_RUST + return ccxr_get_fts_max(ctx); +#endif + // This returns the maximum FTS that belonged to a frame. Caption block // counters are not applicable. return ctx->fts_max + ctx->fts_global; @@ -322,6 +354,10 @@ size_t print_mstime_buff(LLONG mstime, char *fmt, char *buf) char *print_mstime_static(LLONG mstime) { static char buf[15]; // 14 should be long enough +#ifndef DISABLE_RUST + return ccxr_print_mstime_static(mstime, buf); +#endif + print_mstime_buff(mstime, "%02u:%02u:%02u:%03u", buf); return buf; } @@ -329,6 +365,10 @@ char *print_mstime_static(LLONG mstime) /* Helper function for to display debug timing info. */ void print_debug_timing(struct ccx_common_timing_ctx *ctx) { +#ifndef DISABLE_RUST + return ccxr_print_debug_timing(ctx); +#endif + // Avoid wrong "Calc. difference" and "Asynchronous by" numbers // for uninitialized min_pts LLONG tempmin_pts = (ctx->min_pts == 0x01FFFFFFFFLL ? ctx->sync_pts : ctx->min_pts); @@ -355,6 +395,10 @@ void print_debug_timing(struct ccx_common_timing_ctx *ctx) void calculate_ms_gop_time(struct gop_time_code *g) { +#ifndef DISABLE_RUST + return ccxr_calculate_ms_gop_time(g); +#endif + int seconds = (g->time_code_hours * 3600) + (g->time_code_minutes * 60) + g->time_code_seconds; g->ms = (LLONG)(1000 * (seconds + g->time_code_pictures / current_fps)); if (gop_rollover) @@ -363,6 +407,10 @@ void calculate_ms_gop_time(struct gop_time_code *g) int gop_accepted(struct gop_time_code *g) { +#ifndef DISABLE_RUST + return ccxr_gop_accepted(g); +#endif + if (!((g->time_code_hours <= 23) && (g->time_code_minutes <= 59) && (g->time_code_seconds <= 59) && (g->time_code_pictures <= 59))) return 0; diff --git a/src/lib_ccx/utility.c b/src/lib_ccx/utility.c index c110a1ec3..0e92b2d15 100644 --- a/src/lib_ccx/utility.c +++ b/src/lib_ccx/utility.c @@ -11,6 +11,10 @@ volatile sig_atomic_t change_filename_requested = 0; #ifndef DISABLE_RUST extern int ccxr_verify_crc32(uint8_t *buf, int len); +extern void ccxr_timestamp_to_srttime(uint64_t timestamp, char *buffer); +extern void ccxr_timestamp_to_vtttime(uint64_t timestamp, char *buffer); +extern void ccxr_millis_to_date(uint64_t timestamp, char *buffer, enum ccx_output_date_format date_format, char millis_separator); +extern int ccxr_stringztoms(const char *s, struct ccx_boundary_time *bt); extern int ccxr_levenshtein_dist(const uint64_t *s1, const uint64_t *s2, unsigned s1len, unsigned s2len); extern int ccxr_levenshtein_dist_char(const char *s1, const char *s2, unsigned s1len, unsigned s2len); #endif @@ -96,6 +100,10 @@ int verify_crc32(uint8_t *buf, int len) int stringztoms(const char *s, struct ccx_boundary_time *bt) { +#ifndef DISABLE_RUST + return ccxr_stringztoms(s, bt); +#endif + unsigned ss = 0, mm = 0, hh = 0; int value = -1; int colons = 0; @@ -140,6 +148,10 @@ int stringztoms(const char *s, struct ccx_boundary_time *bt) } void timestamp_to_srttime(uint64_t timestamp, char *buffer) { +#ifndef DISABLE_RUST + return ccxr_timestamp_to_srttime(timestamp, buffer); +#endif + uint64_t p = timestamp; uint8_t h = (uint8_t)(p / 3600000); uint8_t m = (uint8_t)(p / 60000 - 60 * h); @@ -149,6 +161,10 @@ void timestamp_to_srttime(uint64_t timestamp, char *buffer) } void timestamp_to_vtttime(uint64_t timestamp, char *buffer) { +#ifndef DISABLE_RUST + return ccxr_timestamp_to_vtttime(timestamp, buffer); +#endif + uint64_t p = timestamp; uint8_t h = (uint8_t)(p / 3600000); uint8_t m = (uint8_t)(p / 60000 - 60 * h); @@ -211,6 +227,20 @@ int levenshtein_dist_char(const char *s1, const char *s2, unsigned s1len, unsign void millis_to_date(uint64_t timestamp, char *buffer, enum ccx_output_date_format date_format, char millis_separator) { +#ifndef DISABLE_RUST + switch (date_format) + { + case ODF_NONE: + case ODF_HHMMSS: + case ODF_HHMMSSMS: + case ODF_SECONDS: + case ODF_DATE: + return ccxr_millis_to_date(timestamp, buffer, date_format, millis_separator); + default: + fatal(CCX_COMMON_EXIT_BUG_BUG, "Invalid value for date_format in millis_to_date()\n"); + } +#endif + time_t secs; unsigned int millis; char c_temp[80]; diff --git a/src/rust/build.rs b/src/rust/build.rs index 4df835845..82772f29b 100644 --- a/src/rust/build.rs +++ b/src/rust/build.rs @@ -26,6 +26,9 @@ fn main() { "lib_cc_decode", "cc_subtitle", "ccx_output_format", + "ccx_boundary_time", + "gop_time_code", + "ccx_common_timing_settings_t", "ccx_s_options", ]); @@ -72,4 +75,9 @@ fn main() { .expect("Couldn't write bindings!"); } -const RUSTIFIED_ENUMS: &[&str] = &["dtvcc_(window|pen)_.*", "ccx_output_format"]; +const RUSTIFIED_ENUMS: &[&str] = &[ + "dtvcc_(window|pen)_.*", + "ccx_output_format", + "ccx_output_date_format", + "ccx_frame_type", +]; diff --git a/src/rust/lib_ccxr/src/util/time/c_functions.rs b/src/rust/lib_ccxr/src/util/time/c_functions.rs new file mode 100644 index 000000000..4460e0175 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/time/c_functions.rs @@ -0,0 +1,93 @@ +//! Provides Rust equivalent for functions in C. Uses Rust-native types as input and output. + +use super::*; + +/// Rust equivalent for `timestamp_to_srttime` function in C. Uses Rust-native types as input and +/// output. +pub fn timestamp_to_srttime( + timestamp: Timestamp, + buffer: &mut String, +) -> Result<(), TimestampError> { + timestamp.write_srt_time(buffer) +} + +/// Rust equivalent for `timestamp_to_vtttime` function in C. Uses Rust-native types as input and +/// output. +pub fn timestamp_to_vtttime( + timestamp: Timestamp, + buffer: &mut String, +) -> Result<(), TimestampError> { + timestamp.write_vtt_time(buffer) +} + +/// Rust equivalent for `millis_to_date` function in C. Uses Rust-native types as input and output. +pub fn millis_to_date( + timestamp: Timestamp, + buffer: &mut String, + date_format: TimestampFormat, +) -> Result<(), TimestampError> { + timestamp.write_formatted_time(buffer, date_format) +} + +/// Rust equivalent for `stringztoms` function in C. Uses Rust-native types as input and output. +pub fn stringztoms(s: &str) -> Option { + Timestamp::parse_optional_hhmmss_from_str(s).ok() +} + +/// Rust equivalent for `add_current_pts` function in C. Uses Rust-native types as input and output. +pub fn add_current_pts(ctx: &mut TimingContext, pts: MpegClockTick) { + ctx.add_current_pts(pts) +} + +/// Rust equivalent for `set_current_pts` function in C. Uses Rust-native types as input and output. +pub fn set_current_pts(ctx: &mut TimingContext, pts: MpegClockTick) { + ctx.set_current_pts(pts) +} + +/// Rust equivalent for `set_fts` function in C. Uses Rust-native types as input and output. +pub fn set_fts(ctx: &mut TimingContext) -> bool { + ctx.set_fts() +} + +/// Rust equivalent for `get_fts` function in C. Uses Rust-native types as input and output. +pub fn get_fts(ctx: &mut TimingContext, current_field: CaptionField) -> Timestamp { + ctx.get_fts(current_field) +} + +/// Rust equivalent for `get_fts_max` function in C. Uses Rust-native types as input and output. +pub fn get_fts_max(ctx: &mut TimingContext) -> Timestamp { + ctx.get_fts_max() +} + +/// Rust equivalent for `print_mstime_static` function in C. Uses Rust-native types as input and output. +pub fn print_mstime_static(mstime: Timestamp, sep: char) -> String { + mstime.to_hms_millis_time(sep).unwrap() +} + +/// Rust equivalent for `print_debug_timing` function in C. Uses Rust-native types as input and output. +pub fn print_debug_timing(ctx: &mut TimingContext) { + ctx.print_debug_timing() +} + +/// Rust equivalent for `calculate_ms_gop_time` function in C. Uses Rust-native types as input and output. +pub fn calculate_ms_gop_time(g: GopTimeCode) -> Timestamp { + g.timestamp() +} + +/// Rust equivalent for `gop_accepted` function in C. Uses Rust-native types as input and output. +pub fn gop_accepted(g: GopTimeCode) -> bool { + let mut timing_info = GLOBAL_TIMING_INFO.write().unwrap(); + + let gop_time = if let Some(gt) = timing_info.gop_time { + gt + } else { + return true; + }; + + if g.did_rollover(&gop_time) { + timing_info.gop_rollover = true; + true + } else { + gop_time.timestamp() <= g.timestamp() + } +} diff --git a/src/rust/lib_ccxr/src/util/time/mod.rs b/src/rust/lib_ccxr/src/util/time/mod.rs index ce090781b..ebee657ed 100644 --- a/src/rust/lib_ccxr/src/util/time/mod.rs +++ b/src/rust/lib_ccxr/src/util/time/mod.rs @@ -13,5 +13,7 @@ mod timing; mod units; +pub mod c_functions; + pub use timing::*; pub use units::*; diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index f2d7ba171..9fbce6bb3 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -1,173 +1,185 @@ -//! Rust library for CCExtractor -//! -//! Currently we are in the process of porting the 708 decoder to rust. See [decoder] - -// Allow C naming style -#![allow(non_upper_case_globals)] -#![allow(non_camel_case_types)] -#![allow(non_snake_case)] - -/// CCExtractor C bindings generated by bindgen -#[allow(clippy::all)] -pub mod bindings { - include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -} -pub mod decoder; -#[cfg(feature = "hardsubx_ocr")] -pub mod hardsubx; -pub mod libccxr_exports; -pub mod utils; - -#[cfg(windows)] -use std::os::windows::io::{FromRawHandle, RawHandle}; -use std::{io::Write, os::raw::c_int}; - -use bindings::*; -use decoder::Dtvcc; -use utils::is_true; - -use env_logger::{builder, Target}; -use log::{warn, LevelFilter}; - -extern "C" { - static mut cb_708: c_int; - static mut cb_field1: c_int; - static mut cb_field2: c_int; - static mut ccx_options: ccx_s_options; -} - -/// Initialize env logger with custom format, using stdout as target -#[no_mangle] -pub extern "C" fn ccxr_init_logger() { - builder() - .format(|buf, record| writeln!(buf, "[CEA-708] {}", record.args())) - .filter_level(LevelFilter::Debug) - .target(Target::Stdout) - .init(); -} - -/// Process cc_data -/// -/// # Safety -/// dec_ctx should not be a null pointer -/// data should point to cc_data of length cc_count -#[no_mangle] -extern "C" fn ccxr_process_cc_data( - dec_ctx: *mut lib_cc_decode, - data: *const ::std::os::raw::c_uchar, - cc_count: c_int, -) -> c_int { - let mut ret = -1; - let mut cc_data: Vec = (0..cc_count * 3) - .map(|x| unsafe { *data.add(x as usize) }) - .collect(); - let dec_ctx = unsafe { &mut *dec_ctx }; - let dtvcc_ctx = unsafe { &mut *dec_ctx.dtvcc }; - let mut dtvcc = Dtvcc::new(dtvcc_ctx); - for cc_block in cc_data.chunks_exact_mut(3) { - if !validate_cc_pair(cc_block) { - continue; - } - let success = do_cb(dec_ctx, &mut dtvcc, cc_block); - if success { - ret = 0; - } - } - ret -} - -/// Returns `true` if cc_block pair is valid -/// -/// For CEA-708 data, only cc_valid is checked -/// For CEA-608 data, parity is also checked -pub fn validate_cc_pair(cc_block: &mut [u8]) -> bool { - let cc_valid = (cc_block[0] & 4) >> 2; - let cc_type = cc_block[0] & 3; - if cc_valid == 0 { - return false; - } - if cc_type == 0 || cc_type == 1 { - // For CEA-608 data we verify parity. - if verify_parity(cc_block[2]) { - // If the second byte doesn't pass parity, ignore pair - return false; - } - if verify_parity(cc_block[1]) { - // If the first byte doesn't pass parity, - // we replace it with a solid blank and process the pair. - cc_block[1] = 0x7F; - } - } - true -} - -/// Returns `true` if data has odd parity -/// -/// CC uses odd parity (i.e., # of 1's in byte is odd.) -pub fn verify_parity(data: u8) -> bool { - if data.count_ones() & 1 == 1 { - return true; - } - false -} - -/// Process CC data according to its type -pub fn do_cb(ctx: &mut lib_cc_decode, dtvcc: &mut Dtvcc, cc_block: &[u8]) -> bool { - let cc_valid = (cc_block[0] & 4) >> 2; - let cc_type = cc_block[0] & 3; - let mut timeok = true; - - if ctx.write_format != ccx_output_format::CCX_OF_DVDRAW - && ctx.write_format != ccx_output_format::CCX_OF_RAW - && (cc_block[0] == 0xFA || cc_block[0] == 0xFC || cc_block[0] == 0xFD) - && (cc_block[1] & 0x7F) == 0 - && (cc_block[2] & 0x7F) == 0 - { - return true; - } - - if cc_valid == 1 || cc_type == 3 { - ctx.cc_stats[cc_type as usize] += 1; - match cc_type { - // Type 0 and 1 are for CEA-608 data. Handled by C code, do nothing - 0 | 1 => {} - // Type 2 and 3 are for CEA-708 data. - 2 | 3 => { - let current_time = unsafe { (*ctx.timing).get_fts(ctx.current_field as u8) }; - ctx.current_field = 3; - - // Check whether current time is within start and end bounds - if is_true(ctx.extraction_start.set) - && current_time < ctx.extraction_start.time_in_ms - { - timeok = false; - } - if is_true(ctx.extraction_end.set) && current_time > ctx.extraction_end.time_in_ms { - timeok = false; - ctx.processed_enough = 1; - } - - if timeok && ctx.write_format != ccx_output_format::CCX_OF_RAW { - dtvcc.process_cc_data(cc_valid, cc_type, cc_block[1], cc_block[2]); - } - unsafe { cb_708 += 1 } - } - _ => warn!("Invalid cc_type"), - } - } - true -} - -#[cfg(windows)] -#[no_mangle] -extern "C" fn ccxr_close_handle(handle: RawHandle) { - use std::fs::File; - - if handle.is_null() { - return; - } - unsafe { - // File will close automatically (due to Drop) once it goes out of scope - let _file = File::from_raw_handle(handle); - } -} +//! Rust library for CCExtractor +//! +//! Currently we are in the process of porting the 708 decoder to rust. See [decoder] + +// Allow C naming style +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +/// CCExtractor C bindings generated by bindgen +#[allow(clippy::all)] +pub mod bindings { + include!(concat!(env!("OUT_DIR"), "/bindings.rs")); +} +pub mod decoder; +#[cfg(feature = "hardsubx_ocr")] +pub mod hardsubx; +pub mod libccxr_exports; +pub mod utils; + +#[cfg(windows)] +use std::os::windows::io::{FromRawHandle, RawHandle}; +use std::{ + io::Write, + os::raw::{c_double, c_int, c_long, c_uint}, +}; + +use bindings::*; +use decoder::Dtvcc; +use utils::is_true; + +use env_logger::{builder, Target}; +use log::{warn, LevelFilter}; + +extern "C" { + static mut cb_708: c_int; + static mut cb_field1: c_int; + static mut cb_field2: c_int; + static mut pts_big_change: c_uint; + static mut current_fps: c_double; + static mut frames_since_ref_time: c_int; + static mut total_frames_count: c_uint; + static mut gop_time: gop_time_code; + static mut first_gop_time: gop_time_code; + static mut fts_at_gop_start: c_long; + static mut gop_rollover: c_int; + static mut ccx_common_timing_settings: ccx_common_timing_settings_t; + static mut ccx_options: ccx_s_options; +} + +/// Initialize env logger with custom format, using stdout as target +#[no_mangle] +pub extern "C" fn ccxr_init_logger() { + builder() + .format(|buf, record| writeln!(buf, "[CEA-708] {}", record.args())) + .filter_level(LevelFilter::Debug) + .target(Target::Stdout) + .init(); +} + +/// Process cc_data +/// +/// # Safety +/// dec_ctx should not be a null pointer +/// data should point to cc_data of length cc_count +#[no_mangle] +extern "C" fn ccxr_process_cc_data( + dec_ctx: *mut lib_cc_decode, + data: *const ::std::os::raw::c_uchar, + cc_count: c_int, +) -> c_int { + let mut ret = -1; + let mut cc_data: Vec = (0..cc_count * 3) + .map(|x| unsafe { *data.add(x as usize) }) + .collect(); + let dec_ctx = unsafe { &mut *dec_ctx }; + let dtvcc_ctx = unsafe { &mut *dec_ctx.dtvcc }; + let mut dtvcc = Dtvcc::new(dtvcc_ctx); + for cc_block in cc_data.chunks_exact_mut(3) { + if !validate_cc_pair(cc_block) { + continue; + } + let success = do_cb(dec_ctx, &mut dtvcc, cc_block); + if success { + ret = 0; + } + } + ret +} + +/// Returns `true` if cc_block pair is valid +/// +/// For CEA-708 data, only cc_valid is checked +/// For CEA-608 data, parity is also checked +pub fn validate_cc_pair(cc_block: &mut [u8]) -> bool { + let cc_valid = (cc_block[0] & 4) >> 2; + let cc_type = cc_block[0] & 3; + if cc_valid == 0 { + return false; + } + if cc_type == 0 || cc_type == 1 { + // For CEA-608 data we verify parity. + if verify_parity(cc_block[2]) { + // If the second byte doesn't pass parity, ignore pair + return false; + } + if verify_parity(cc_block[1]) { + // If the first byte doesn't pass parity, + // we replace it with a solid blank and process the pair. + cc_block[1] = 0x7F; + } + } + true +} + +/// Returns `true` if data has odd parity +/// +/// CC uses odd parity (i.e., # of 1's in byte is odd.) +pub fn verify_parity(data: u8) -> bool { + if data.count_ones() & 1 == 1 { + return true; + } + false +} + +/// Process CC data according to its type +pub fn do_cb(ctx: &mut lib_cc_decode, dtvcc: &mut Dtvcc, cc_block: &[u8]) -> bool { + let cc_valid = (cc_block[0] & 4) >> 2; + let cc_type = cc_block[0] & 3; + let mut timeok = true; + + if ctx.write_format != ccx_output_format::CCX_OF_DVDRAW + && ctx.write_format != ccx_output_format::CCX_OF_RAW + && (cc_block[0] == 0xFA || cc_block[0] == 0xFC || cc_block[0] == 0xFD) + && (cc_block[1] & 0x7F) == 0 + && (cc_block[2] & 0x7F) == 0 + { + return true; + } + + if cc_valid == 1 || cc_type == 3 { + ctx.cc_stats[cc_type as usize] += 1; + match cc_type { + // Type 0 and 1 are for CEA-608 data. Handled by C code, do nothing + 0 | 1 => {} + // Type 2 and 3 are for CEA-708 data. + 2 | 3 => { + let current_time = unsafe { (*ctx.timing).get_fts(ctx.current_field as u8) }; + ctx.current_field = 3; + + // Check whether current time is within start and end bounds + if is_true(ctx.extraction_start.set) + && current_time < ctx.extraction_start.time_in_ms + { + timeok = false; + } + if is_true(ctx.extraction_end.set) && current_time > ctx.extraction_end.time_in_ms { + timeok = false; + ctx.processed_enough = 1; + } + + if timeok && ctx.write_format != ccx_output_format::CCX_OF_RAW { + dtvcc.process_cc_data(cc_valid, cc_type, cc_block[1], cc_block[2]); + } + unsafe { cb_708 += 1 } + } + _ => warn!("Invalid cc_type"), + } + } + true +} + +#[cfg(windows)] +#[no_mangle] +extern "C" fn ccxr_close_handle(handle: RawHandle) { + use std::fs::File; + + if handle.is_null() { + return; + } + unsafe { + // File will close automatically (due to Drop) once it goes out of scope + let _file = File::from_raw_handle(handle); + } +} diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs index 68faa5dfd..2209d4b31 100644 --- a/src/rust/src/libccxr_exports/mod.rs +++ b/src/rust/src/libccxr_exports/mod.rs @@ -6,6 +6,10 @@ use lib_ccxr::util::log::*; use std::convert::TryInto; use std::os::raw::{c_char, c_int, c_uint}; +mod time; + +pub use time::*; + /// Initializes the logger at the rust side. /// /// # Safety diff --git a/src/rust/src/libccxr_exports/time.rs b/src/rust/src/libccxr_exports/time.rs new file mode 100644 index 000000000..0a96e2be9 --- /dev/null +++ b/src/rust/src/libccxr_exports/time.rs @@ -0,0 +1,529 @@ +#![allow(clippy::useless_conversion)] + +use crate::{ + bindings::*, cb_708, cb_field1, cb_field2, ccx_common_timing_settings as timing_settings, + current_fps, first_gop_time, frames_since_ref_time, fts_at_gop_start, gop_rollover, gop_time, + pts_big_change, total_frames_count, +}; + +use std::convert::TryInto; +use std::ffi::CStr; +use std::os::raw::{c_char, c_int, c_long}; + +use lib_ccxr::common::FrameType; +use lib_ccxr::util::time::{c_functions as c, *}; + +/// Helper function that converts a Rust-String (`string`) to C-String (`buffer`). +/// +/// # Safety +/// +/// `buffer` must have enough allocated space for `string` to fit. +unsafe fn write_string_into_pointer(buffer: *mut c_char, string: &str) { + let buffer = std::slice::from_raw_parts_mut(buffer as *mut u8, string.len() + 1); + buffer[..string.len()].copy_from_slice(string.as_bytes()); + buffer[string.len()] = b'\0'; +} + +/// Rust equivalent for `timestamp_to_srttime` function in C. Uses C-native types as input and +/// output. +/// +/// # Safety +/// +/// `buffer` must have enough allocated space for the formatted `timestamp` to fit. +#[no_mangle] +pub unsafe extern "C" fn ccxr_timestamp_to_srttime(timestamp: u64, buffer: *mut c_char) { + let mut s = String::new(); + let timestamp = Timestamp::from_millis(timestamp as i64); + + let _ = c::timestamp_to_srttime(timestamp, &mut s); + + write_string_into_pointer(buffer, &s); +} + +/// Rust equivalent for `timestamp_to_vtttime` function in C. Uses C-native types as input and +/// output. +/// +/// # Safety +/// +/// `buffer` must have enough allocated space for the formatted `timestamp` to fit. +#[no_mangle] +pub unsafe extern "C" fn ccxr_timestamp_to_vtttime(timestamp: u64, buffer: *mut c_char) { + let mut s = String::new(); + let timestamp = Timestamp::from_millis(timestamp as i64); + + let _ = c::timestamp_to_vtttime(timestamp, &mut s); + + write_string_into_pointer(buffer, &s); +} + +/// Rust equivalent for `millis_to_date` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `buffer` must have enough allocated space for the formatted `timestamp` to fit. +#[no_mangle] +pub unsafe extern "C" fn ccxr_millis_to_date( + timestamp: u64, + buffer: *mut c_char, + date_format: ccx_output_date_format, + millis_separator: c_char, +) { + let mut s = String::new(); + let timestamp = Timestamp::from_millis(timestamp as i64); + let date_format = match date_format { + ccx_output_date_format::ODF_NONE => TimestampFormat::None, + ccx_output_date_format::ODF_HHMMSS => TimestampFormat::HHMMSS, + ccx_output_date_format::ODF_HHMMSSMS => TimestampFormat::HHMMSSFFF, + ccx_output_date_format::ODF_SECONDS => TimestampFormat::Seconds { + millis_separator: millis_separator as u8 as char, + }, + ccx_output_date_format::ODF_DATE => TimestampFormat::Date { + millis_separator: millis_separator as u8 as char, + }, + }; + + let _ = c::millis_to_date(timestamp, &mut s, date_format); + + write_string_into_pointer(buffer, &s); +} + +/// Rust equivalent for `stringztoms` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `s` must contain valid utf-8 data and have a nul terminator at the end of the string. +#[no_mangle] +pub unsafe extern "C" fn ccxr_stringztoms(s: *const c_char, bt: *mut ccx_boundary_time) -> c_int { + let s = CStr::from_ptr(s).to_str().unwrap(); + + let option_timestamp = c::stringztoms(s); + + if let Some(timestamp) = option_timestamp { + if let Ok((h, m, s, _)) = timestamp.as_hms_millis() { + (*bt).set = 1; + (*bt).hh = h.into(); + (*bt).mm = m.into(); + (*bt).ss = s.into(); + (*bt).time_in_ms = (timestamp.millis() / 1000) * 1000; + return 0; + } + }; + + -1 +} + +/// Construct a [`TimingContext`] from a pointer of `ccx_common_timing_ctx`. +/// +/// It is used to move data of [`TimingContext`] from C to Rust. +/// +/// # Safety +/// +/// `ctx` should not be null. +unsafe fn generate_timing_context(ctx: *const ccx_common_timing_ctx) -> TimingContext { + let pts_set = match (*ctx).pts_set { + 0 => PtsSet::No, + 1 => PtsSet::Received, + 2 => PtsSet::MinPtsSet, + _ => panic!("incorrect value for pts_set"), + }; + + let min_pts_adjusted = (*ctx).min_pts_adjusted != 0; + let current_pts = MpegClockTick::new((*ctx).current_pts); + + let current_picture_coding_type = match (*ctx).current_picture_coding_type { + ccx_frame_type::CCX_FRAME_TYPE_RESET_OR_UNKNOWN => FrameType::ResetOrUnknown, + ccx_frame_type::CCX_FRAME_TYPE_I_FRAME => FrameType::IFrame, + ccx_frame_type::CCX_FRAME_TYPE_P_FRAME => FrameType::PFrame, + ccx_frame_type::CCX_FRAME_TYPE_B_FRAME => FrameType::BFrame, + ccx_frame_type::CCX_FRAME_TYPE_D_FRAME => FrameType::DFrame, + }; + + let current_tref = FrameCount::new((*ctx).current_tref.try_into().unwrap()); + let min_pts = MpegClockTick::new((*ctx).min_pts); + let sync_pts = MpegClockTick::new((*ctx).sync_pts); + let minimum_fts = Timestamp::from_millis((*ctx).minimum_fts); + let fts_now = Timestamp::from_millis((*ctx).fts_now); + let fts_offset = Timestamp::from_millis((*ctx).fts_offset); + let fts_fc_offset = Timestamp::from_millis((*ctx).fts_fc_offset); + let fts_max = Timestamp::from_millis((*ctx).fts_max); + let fts_global = Timestamp::from_millis((*ctx).fts_global); + let sync_pts2fts_set = (*ctx).sync_pts2fts_set != 0; + let sync_pts2fts_fts = Timestamp::from_millis((*ctx).sync_pts2fts_fts); + let sync_pts2fts_pts = MpegClockTick::new((*ctx).sync_pts2fts_pts); + let pts_reset = (*ctx).pts_reset != 0; + + TimingContext::from_raw_parts( + pts_set, + min_pts_adjusted, + current_pts, + current_picture_coding_type, + current_tref, + min_pts, + sync_pts, + minimum_fts, + fts_now, + fts_offset, + fts_fc_offset, + fts_max, + fts_global, + sync_pts2fts_set, + sync_pts2fts_fts, + sync_pts2fts_pts, + pts_reset, + ) +} + +/// Copy the contents [`TimingContext`] to a `ccx_common_timing_ctx`. +/// +/// It is used to move data of [`TimingContext`] from Rust to C. +/// +/// # Safety +/// +/// `ctx` should not be null. +unsafe fn write_back_to_common_timing_ctx( + ctx: *mut ccx_common_timing_ctx, + timing_ctx: &TimingContext, +) { + let ( + pts_set, + min_pts_adjusted, + current_pts, + current_picture_coding_type, + current_tref, + min_pts, + sync_pts, + minimum_fts, + fts_now, + fts_offset, + fts_fc_offset, + fts_max, + fts_global, + sync_pts2fts_set, + sync_pts2fts_fts, + sync_pts2fts_pts, + pts_reset, + ) = timing_ctx.as_raw_parts(); + + (*ctx).pts_set = match pts_set { + PtsSet::No => 0, + PtsSet::Received => 1, + PtsSet::MinPtsSet => 2, + }; + + (*ctx).min_pts_adjusted = if min_pts_adjusted { 1 } else { 0 }; + (*ctx).current_pts = current_pts.as_i64(); + + (*ctx).current_picture_coding_type = match current_picture_coding_type { + FrameType::ResetOrUnknown => ccx_frame_type::CCX_FRAME_TYPE_RESET_OR_UNKNOWN, + FrameType::IFrame => ccx_frame_type::CCX_FRAME_TYPE_I_FRAME, + FrameType::PFrame => ccx_frame_type::CCX_FRAME_TYPE_P_FRAME, + FrameType::BFrame => ccx_frame_type::CCX_FRAME_TYPE_B_FRAME, + FrameType::DFrame => ccx_frame_type::CCX_FRAME_TYPE_D_FRAME, + }; + + (*ctx).current_tref = current_tref.as_u64().try_into().unwrap(); + (*ctx).min_pts = min_pts.as_i64(); + (*ctx).sync_pts = sync_pts.as_i64(); + (*ctx).minimum_fts = minimum_fts.millis(); + (*ctx).fts_now = fts_now.millis(); + (*ctx).fts_offset = fts_offset.millis(); + (*ctx).fts_fc_offset = fts_fc_offset.millis(); + (*ctx).fts_max = fts_max.millis(); + (*ctx).fts_global = fts_global.millis(); + (*ctx).sync_pts2fts_set = if sync_pts2fts_set { 1 } else { 0 }; + (*ctx).sync_pts2fts_fts = sync_pts2fts_fts.millis(); + (*ctx).sync_pts2fts_pts = sync_pts2fts_pts.as_i64(); + (*ctx).pts_reset = if pts_reset { 1 } else { 0 }; +} + +/// Write to [`GLOBAL_TIMING_INFO`] from the equivalent static variables in C. +/// +/// It is used to move data of [`GLOBAL_TIMING_INFO`] from C to Rust. +/// +/// # Safety +/// +/// All the static variables should be initialized and in valid state. +unsafe fn apply_timing_info() { + let mut timing_info = GLOBAL_TIMING_INFO.write().unwrap(); + + timing_info.cb_field1 = cb_field1.try_into().unwrap(); + timing_info.cb_field2 = cb_field2.try_into().unwrap(); + timing_info.cb_708 = cb_708.try_into().unwrap(); + timing_info.pts_big_change = pts_big_change != 0; + timing_info.current_fps = current_fps; + timing_info.frames_since_ref_time = FrameCount::new(frames_since_ref_time.try_into().unwrap()); + timing_info.total_frames_count = FrameCount::new(total_frames_count.try_into().unwrap()); + timing_info.gop_time = generate_gop_time_code(gop_time); + timing_info.first_gop_time = generate_gop_time_code(first_gop_time); + timing_info.fts_at_gop_start = Timestamp::from_millis(fts_at_gop_start.try_into().unwrap()); + timing_info.gop_rollover = gop_rollover != 0; + timing_info.timing_settings.disable_sync_check = timing_settings.disable_sync_check != 0; + timing_info.timing_settings.no_sync = timing_settings.no_sync != 0; + timing_info.timing_settings.is_elementary_stream = timing_settings.is_elementary_stream != 0; +} + +/// Write from [`GLOBAL_TIMING_INFO`] to the equivalent static variables in C. +/// +/// It is used to move data of [`GLOBAL_TIMING_INFO`] from Rust to C. +/// +/// # Safety +/// +/// All the static variables should be initialized and in valid state. +unsafe fn write_back_from_timing_info() { + let timing_info = GLOBAL_TIMING_INFO.read().unwrap(); + + cb_field1 = timing_info.cb_field1.try_into().unwrap(); + cb_field2 = timing_info.cb_field2.try_into().unwrap(); + cb_708 = timing_info.cb_708.try_into().unwrap(); + pts_big_change = if timing_info.pts_big_change { 1 } else { 0 }; + current_fps = timing_info.current_fps; + frames_since_ref_time = timing_info + .frames_since_ref_time + .as_u64() + .try_into() + .unwrap(); + total_frames_count = timing_info.total_frames_count.as_u64().try_into().unwrap(); + gop_time = write_gop_time_code(timing_info.gop_time); + first_gop_time = write_gop_time_code(timing_info.first_gop_time); + fts_at_gop_start = timing_info.fts_at_gop_start.millis().try_into().unwrap(); + gop_rollover = if timing_info.gop_rollover { 1 } else { 0 }; + timing_settings.disable_sync_check = if timing_info.timing_settings.disable_sync_check { + 1 + } else { + 0 + }; + timing_settings.no_sync = if timing_info.timing_settings.no_sync { + 1 + } else { + 0 + }; + timing_settings.is_elementary_stream = if timing_info.timing_settings.is_elementary_stream { + 1 + } else { + 0 + }; +} + +/// Construct a [`GopTimeCode`] from `gop_time_code`. +unsafe fn generate_gop_time_code(g: gop_time_code) -> Option { + if g.inited == 0 { + None + } else { + Some(GopTimeCode::from_raw_parts( + g.drop_frame_flag != 0, + g.time_code_hours.try_into().unwrap(), + g.time_code_minutes.try_into().unwrap(), + g.time_code_seconds.try_into().unwrap(), + g.time_code_pictures.try_into().unwrap(), + Timestamp::from_millis(g.ms), + )) + } +} + +/// Construct a `gop_time_code` from [`GopTimeCode`]. +unsafe fn write_gop_time_code(g: Option) -> gop_time_code { + if let Some(gop) = g { + let ( + drop_frame, + time_code_hours, + time_code_minutes, + time_code_seconds, + time_code_pictures, + timestamp, + ) = gop.as_raw_parts(); + + gop_time_code { + drop_frame_flag: if drop_frame { 1 } else { 0 }, + time_code_hours: time_code_hours.try_into().unwrap(), + time_code_minutes: time_code_minutes.try_into().unwrap(), + marker_bit: 0, + time_code_seconds: time_code_seconds.try_into().unwrap(), + time_code_pictures: time_code_pictures.try_into().unwrap(), + inited: 1, + ms: timestamp.millis(), + } + } else { + gop_time_code { + drop_frame_flag: 0, + time_code_hours: 0, + time_code_minutes: 0, + marker_bit: 0, + time_code_seconds: 0, + time_code_pictures: 0, + inited: 0, + ms: 0, + } + } +} + +/// Rust equivalent for `add_current_pts` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `ctx` must not be null. +#[no_mangle] +pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, pts: c_long) { + apply_timing_info(); + let mut context = generate_timing_context(ctx); + + c::add_current_pts(&mut context, MpegClockTick::new(pts.try_into().unwrap())); + + write_back_to_common_timing_ctx(ctx, &context); + write_back_from_timing_info(); +} + +/// Rust equivalent for `set_current_pts` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `ctx` must not be null. +#[no_mangle] +pub unsafe extern "C" fn ccxr_set_current_pts(ctx: *mut ccx_common_timing_ctx, pts: c_long) { + apply_timing_info(); + let mut context = generate_timing_context(ctx); + + c::set_current_pts(&mut context, MpegClockTick::new(pts.try_into().unwrap())); + + write_back_to_common_timing_ctx(ctx, &context); + write_back_from_timing_info(); +} + +/// Rust equivalent for `set_fts` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `ctx` must not be null. +#[no_mangle] +pub unsafe extern "C" fn ccxr_set_fts(ctx: *mut ccx_common_timing_ctx) -> c_int { + apply_timing_info(); + let mut context = generate_timing_context(ctx); + + let ans = c::set_fts(&mut context); + + write_back_to_common_timing_ctx(ctx, &context); + write_back_from_timing_info(); + + if ans { + 1 + } else { + 0 + } +} + +/// Rust equivalent for `get_fts` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `ctx` must not be null. `current_field` must be 1, 2 or 3. +#[no_mangle] +pub unsafe extern "C" fn ccxr_get_fts( + ctx: *mut ccx_common_timing_ctx, + current_field: c_int, +) -> c_long { + apply_timing_info(); + let mut context = generate_timing_context(ctx); + + let caption_field = match current_field { + 1 => CaptionField::Field1, + 2 => CaptionField::Field2, + 3 => CaptionField::Cea708, + _ => panic!("incorrect value for caption field"), + }; + + let ans = c::get_fts(&mut context, caption_field); + + write_back_to_common_timing_ctx(ctx, &context); + write_back_from_timing_info(); + + ans.millis().try_into().unwrap() +} + +/// Rust equivalent for `get_fts_max` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `ctx` must not be null. +#[no_mangle] +pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_long { + apply_timing_info(); + let mut context = generate_timing_context(ctx); + + let ans = c::get_fts_max(&mut context); + + write_back_to_common_timing_ctx(ctx, &context); + write_back_from_timing_info(); + + ans.millis().try_into().unwrap() +} + +/// Rust equivalent for `print_mstime_static` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `buf` must not be null. It must have sufficient length to hold the time in string form. +#[no_mangle] +pub unsafe extern "C" fn ccxr_print_mstime_static(mstime: c_long, buf: *mut c_char) -> *mut c_char { + let time = Timestamp::from_millis(mstime.try_into().unwrap()); + let ans = c::print_mstime_static(time, ':'); + write_string_into_pointer(buf, &ans); + buf +} + +/// Rust equivalent for `print_debug_timing` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `ctx` must not be null. +#[no_mangle] +pub unsafe extern "C" fn ccxr_print_debug_timing(ctx: *mut ccx_common_timing_ctx) { + apply_timing_info(); + let mut context = generate_timing_context(ctx); + + c::print_debug_timing(&mut context); + + write_back_to_common_timing_ctx(ctx, &context); + write_back_from_timing_info(); +} + +/// Rust equivalent for `calculate_ms_gop_time` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `g` must not be null. +#[no_mangle] +pub unsafe extern "C" fn ccxr_calculate_ms_gop_time(g: *mut gop_time_code) { + apply_timing_info(); + let timing_info = GLOBAL_TIMING_INFO.read().unwrap(); + + (*g).ms = GopTimeCode::new( + (*g).drop_frame_flag != 0, + (*g).time_code_hours.try_into().unwrap(), + (*g).time_code_minutes.try_into().unwrap(), + (*g).time_code_seconds.try_into().unwrap(), + (*g).time_code_pictures.try_into().unwrap(), + timing_info.current_fps, + timing_info.gop_rollover, + ) + .unwrap() + .timestamp() + .millis() +} + +/// Rust equivalent for `gop_accepted` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `g` must not be null. +#[no_mangle] +pub unsafe extern "C" fn ccxr_gop_accepted(g: *mut gop_time_code) -> c_int { + if let Some(gop) = generate_gop_time_code(*g) { + let ans = c::gop_accepted(gop); + if ans { + 1 + } else { + 0 + } + } else { + 0 + } +} From ff0658925fa248d5b1d05b4e44e323777abee73f Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 15:08:19 +0530 Subject: [PATCH 13/13] integrate net into C --- src/lib_ccx/networking.c | 54 ++++ src/rust/lib_ccxr/src/util/net/c_functions.rs | 95 +++++++ src/rust/lib_ccxr/src/util/net/mod.rs | 2 + src/rust/src/libccxr_exports/mod.rs | 2 + src/rust/src/libccxr_exports/net.rs | 232 ++++++++++++++++++ 5 files changed, 385 insertions(+) create mode 100644 src/rust/lib_ccxr/src/util/net/c_functions.rs create mode 100644 src/rust/src/libccxr_exports/net.rs diff --git a/src/lib_ccx/networking.c b/src/lib_ccx/networking.c index 4cafdb7fd..0fbc076e5 100644 --- a/src/lib_ccx/networking.c +++ b/src/lib_ccx/networking.c @@ -36,6 +36,24 @@ #define NO_RESPONCE_INTERVAL 20 #define PING_INTERVAL 3 +#ifndef DISABLE_RUST +extern void ccxr_connect_to_srv(const char *addr, const char *port, const char *cc_desc, const char *pwd); +extern void ccxr_net_send_header(const unsigned char *data, size_t len); +extern int ccxr_net_send_cc(const unsigned char *data, int length, void *private_data, struct cc_subtitle *sub); +extern void ccxr_net_check_conn(); +extern void ccxr_net_send_epg( + const char *start, + const char *stop, + const char *title, + const char *desc, + const char *lang, + const char *category); +extern int ccxr_net_tcp_read(int socket, void *buffer, size_t length); +extern int ccxr_net_udp_read(int socket, void *buffer, size_t length, const char *src_str, const char *addr_str); +extern int ccxr_start_tcp_srv(const char *port, const char *pwd); +extern int ccxr_start_udp_srv(const char *src, const char *addr, unsigned port); +#endif + int srv_sd = -1; /* Server socket descriptor */ const char *srv_addr; @@ -84,6 +102,10 @@ int set_nonblocking(int fd); void connect_to_srv(const char *addr, const char *port, const char *cc_desc, const char *pwd) { +#ifndef DISABLE_RUST + return ccxr_connect_to_srv(addr, port, cc_desc, pwd); +#endif + if (NULL == addr) { mprint("Server address is not set\n"); @@ -115,6 +137,10 @@ void connect_to_srv(const char *addr, const char *port, const char *cc_desc, con void net_send_header(const unsigned char *data, size_t len) { +#ifndef DISABLE_RUST + return ccxr_net_send_header(data, len); +#endif + assert(srv_sd > 0); #if DEBUG_OUT @@ -141,6 +167,10 @@ void net_send_header(const unsigned char *data, size_t len) int net_send_cc(const unsigned char *data, int len, void *private_data, struct cc_subtitle *sub) { +#ifndef DISABLE_RUST + return ccxr_net_send_cc(data, len, private_data, sub); +#endif + assert(srv_sd > 0); #if DEBUG_OUT @@ -160,6 +190,10 @@ int net_send_cc(const unsigned char *data, int len, void *private_data, struct c void net_check_conn() { +#ifndef DISABLE_RUST + return ccxr_net_check_conn(); +#endif + time_t now; static time_t last_ping = 0; char c = 0; @@ -221,6 +255,10 @@ void net_send_epg( const char *lang, const char *category) { +#ifndef DISABLE_RUST + return ccxr_net_send_epg(start, stop, title, desc, lang, category); +#endif + size_t st; size_t sp; size_t t; @@ -301,6 +339,10 @@ void net_send_epg( int net_tcp_read(int socket, void *buffer, size_t length) { +#ifndef DISABLE_RUST + return ccxr_net_tcp_read(socket, buffer, length); +#endif + assert(buffer != NULL); assert(length > 0); @@ -333,6 +375,10 @@ int net_tcp_read(int socket, void *buffer, size_t length) int net_udp_read(int socket, void *buffer, size_t length, const char *src_str, const char *addr_str) { +#ifndef DISABLE_RUST + return ccxr_net_udp_read(socket, buffer, length, src_str, addr_str); +#endif + assert(buffer != NULL); assert(length > 0); @@ -519,6 +565,10 @@ int tcp_connect(const char *host, const char *port) int start_tcp_srv(const char *port, const char *pwd) { +#ifndef DISABLE_RUST + return ccxr_start_tcp_srv(port, pwd); +#endif + if (NULL == port) port = DFT_PORT; @@ -974,6 +1024,10 @@ ssize_t read_byte(int fd, char *ch) int start_upd_srv(const char *src_str, const char *addr_str, unsigned port) { +#ifndef DISABLE_RUST + return ccxr_start_udp_srv(src_str, addr_str, port); +#endif + init_sockets(); in_addr_t src; diff --git a/src/rust/lib_ccxr/src/util/net/c_functions.rs b/src/rust/lib_ccxr/src/util/net/c_functions.rs new file mode 100644 index 000000000..e0155a97a --- /dev/null +++ b/src/rust/lib_ccxr/src/util/net/c_functions.rs @@ -0,0 +1,95 @@ +use crate::util::net::*; +use std::sync::RwLock; + +static TARGET: RwLock> = RwLock::new(None); +static SOURCE: RwLock> = RwLock::new(None); + +/// Rust equivalent for `connect_to_srv` function in C. Uses Rust-native types as input and output. +pub fn connect_to_srv( + addr: &'static str, + port: Option, + cc_desc: Option<&'static str>, + pwd: Option<&'static str>, +) { + let mut send_target = TARGET.write().unwrap(); + *send_target = Some(SendTarget::new(SendTargetConfig { + target_addr: addr, + port, + password: pwd, + description: cc_desc, + })); +} + +/// Rust equivalent for `net_send_header` function in C. Uses Rust-native types as input and output. +pub fn net_send_header(data: &[u8]) { + let mut send_target = TARGET.write().unwrap(); + send_target.as_mut().unwrap().send_header(data); +} + +/// Rust equivalent for `net_send_cc` function in C. Uses Rust-native types as input and output. +pub fn net_send_cc(data: &[u8]) -> bool { + let mut send_target = TARGET.write().unwrap(); + send_target.as_mut().unwrap().send_cc(data) +} + +/// Rust equivalent for `net_check_conn` function in C. Uses Rust-native types as input and output. +pub fn net_check_conn() { + let mut send_target = TARGET.write().unwrap(); + send_target.as_mut().unwrap().check_connection(); +} + +/// Rust equivalent for `net_send_epg` function in C. Uses Rust-native types as input and output. +pub fn net_send_epg( + start: &str, + stop: &str, + title: Option<&str>, + desc: Option<&str>, + lang: Option<&str>, + category: Option<&str>, +) { + let mut send_target = TARGET.write().unwrap(); + send_target + .as_mut() + .unwrap() + .send_epg_data(start, stop, title, desc, lang, category); +} + +/// Rust equivalent for `net_tcp_read` function in C. Uses Rust-native types as input and output. +pub fn net_tcp_read(buffer: &mut [u8]) -> Option { + let mut recv_source = SOURCE.write().unwrap(); + if let Ok(b) = recv_source.as_mut().unwrap().recv_header_or_cc() { + if let Some(block) = b { + buffer[..block.data().len()].copy_from_slice(block.data()); + Some(block.data().len()) + } else { + Some(0) + } + } else { + None + } +} + +/// Rust equivalent for `net_udp_read` function in C. Uses Rust-native types as input and output. +pub fn net_udp_read(buffer: &mut [u8]) -> Option { + let mut recv_source = SOURCE.write().unwrap(); + recv_source.as_mut().unwrap().recv(buffer).ok() +} + +/// Rust equivalent for `start_tcp_srv` function in C. Uses Rust-native types as input and output. +pub fn start_tcp_srv(port: Option, pwd: Option<&'static str>) { + let mut recv_source = SOURCE.write().unwrap(); + *recv_source = Some(RecvSource::new(RecvSourceConfig::Tcp { + port, + password: pwd, + })); +} + +/// Rust equivalent for `start_udp_srv` function in C. Uses Rust-native types as input and output. +pub fn start_udp_srv(src: Option<&'static str>, addr: Option<&'static str>, port: u16) { + let mut recv_source = SOURCE.write().unwrap(); + *recv_source = Some(RecvSource::new(RecvSourceConfig::Udp { + source: src, + address: addr, + port, + })); +} diff --git a/src/rust/lib_ccxr/src/util/net/mod.rs b/src/rust/lib_ccxr/src/util/net/mod.rs index 9d0c817d5..9a9f03aa4 100644 --- a/src/rust/lib_ccxr/src/util/net/mod.rs +++ b/src/rust/lib_ccxr/src/util/net/mod.rs @@ -15,6 +15,8 @@ mod common; mod source; mod target; +pub mod c_functions; + pub use common::*; pub use source::*; pub use target::*; diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs index 2209d4b31..13019f13c 100644 --- a/src/rust/src/libccxr_exports/mod.rs +++ b/src/rust/src/libccxr_exports/mod.rs @@ -6,8 +6,10 @@ use lib_ccxr::util::log::*; use std::convert::TryInto; use std::os::raw::{c_char, c_int, c_uint}; +mod net; mod time; +pub use net::*; pub use time::*; /// Initializes the logger at the rust side. diff --git a/src/rust/src/libccxr_exports/net.rs b/src/rust/src/libccxr_exports/net.rs new file mode 100644 index 000000000..a9014613b --- /dev/null +++ b/src/rust/src/libccxr_exports/net.rs @@ -0,0 +1,232 @@ +use crate::bindings::*; + +use std::convert::TryInto; +use std::ffi::CStr; +use std::os::raw::{c_char, c_int, c_uchar, c_uint, c_void}; + +use lib_ccxr::util::net::c_functions::*; + +/// Rust equivalent for `connect_to_srv` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `addr` must not be null. All the strings must end with a nul character. +#[no_mangle] +pub unsafe extern "C" fn ccxr_connect_to_srv( + addr: *const c_char, + port: *const c_char, + cc_desc: *const c_char, + pwd: *const c_char, +) { + let addr = CStr::from_ptr(addr).to_str().unwrap(); + + let port = if !port.is_null() { + Some(CStr::from_ptr(port).to_str().unwrap().parse().unwrap()) + } else { + None + }; + + let cc_desc = if !cc_desc.is_null() { + Some(CStr::from_ptr(cc_desc).to_str().unwrap()) + } else { + None + }; + + let pwd = if !pwd.is_null() { + Some(CStr::from_ptr(pwd).to_str().unwrap()) + } else { + None + }; + + connect_to_srv(addr, port, cc_desc, pwd); +} + +/// Rust equivalent for `net_send_header` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `data` must not be null and should have a length of `len`. +/// [`ccxr_connect_to_srv`] or `connect_to_srv` must have been called before this function. +#[no_mangle] +pub unsafe extern "C" fn ccxr_net_send_header(data: *const c_uchar, len: usize) { + let buffer = std::slice::from_raw_parts(data, len); + net_send_header(buffer); +} + +/// Rust equivalent for `net_send_cc` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `data` must not be null and should have a length of `len`. +/// [`ccxr_connect_to_srv`] or `connect_to_srv` must have been called before this function. +#[no_mangle] +pub unsafe extern "C" fn ccxr_net_send_cc( + data: *const c_uchar, + len: usize, + _private_data: *const c_void, + _sub: *const cc_subtitle, +) -> c_int { + let buffer = std::slice::from_raw_parts(data, len); + if net_send_cc(buffer) { + 1 + } else { + -1 + } +} + +/// Rust equivalent for `net_check_conn` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// [`ccxr_connect_to_srv`] or `connect_to_srv` must have been called before this function. +#[no_mangle] +pub unsafe extern "C" fn ccxr_net_check_conn() { + net_check_conn() +} + +/// Rust equivalent for `net_send_epg` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `start` and `stop` must not be null. All the strings must end with a nul character. +/// [`ccxr_connect_to_srv`] or `connect_to_srv` must have been called before this function. +#[no_mangle] +pub unsafe extern "C" fn ccxr_net_send_epg( + start: *const c_char, + stop: *const c_char, + title: *const c_char, + desc: *const c_char, + lang: *const c_char, + category: *const c_char, +) { + let start = CStr::from_ptr(start).to_str().unwrap(); + let stop = CStr::from_ptr(stop).to_str().unwrap(); + + let title = if !title.is_null() { + Some(CStr::from_ptr(title).to_str().unwrap()) + } else { + None + }; + + let desc = if !desc.is_null() { + Some(CStr::from_ptr(desc).to_str().unwrap()) + } else { + None + }; + + let lang = if !lang.is_null() { + Some(CStr::from_ptr(lang).to_str().unwrap()) + } else { + None + }; + + let category = if !category.is_null() { + Some(CStr::from_ptr(category).to_str().unwrap()) + } else { + None + }; + + net_send_epg(start, stop, title, desc, lang, category) +} + +/// Rust equivalent for `net_tcp_read` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `buffer` should not be null. it should be of size `length`. +/// [`ccxr_start_tcp_srv`] or `start_tcp_srv` must have been called before this function. +#[no_mangle] +pub unsafe extern "C" fn ccxr_net_tcp_read( + _socket: c_int, + buffer: *mut c_void, + length: usize, +) -> c_int { + let buffer = std::slice::from_raw_parts_mut(buffer as *mut u8, length); + let ans = net_tcp_read(buffer); + match ans { + Some(x) => x.try_into().unwrap(), + None => -1, + } +} + +/// Rust equivalent for `net_udp_read` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `buffer` should not be null. it should be of size `length`. +/// [`ccxr_start_udp_srv`] or `start_udp_srv` must have been called before this function. +#[no_mangle] +pub unsafe extern "C" fn ccxr_net_udp_read( + _socket: c_int, + buffer: *mut c_void, + length: usize, + _src_str: *const c_char, + _addr_str: *const c_char, +) -> c_int { + let buffer = std::slice::from_raw_parts_mut(buffer as *mut u8, length); + let ans = net_udp_read(buffer); + match ans { + Some(x) => x.try_into().unwrap(), + None => -1, + } +} + +/// Rust equivalent for `start_tcp_srv` function in C. Uses C-native types as input and output. +/// +/// Note that this function always returns 1 as an fd, since it will not be used anyway. +/// +/// # Safety +/// +/// `port` should be a numerical 16-bit value. All the strings must end with a nul character. +/// The output file desciptor should not be used. +#[no_mangle] +pub unsafe extern "C" fn ccxr_start_tcp_srv(port: *const c_char, pwd: *const c_char) -> c_int { + let port = if !port.is_null() { + Some(CStr::from_ptr(port).to_str().unwrap().parse().unwrap()) + } else { + None + }; + + let pwd = if !pwd.is_null() { + Some(CStr::from_ptr(pwd).to_str().unwrap()) + } else { + None + }; + + start_tcp_srv(port, pwd); + + 1 +} + +/// Rust equivalent for `start_udp_srv` function in C. Uses C-native types as input and output. +/// +/// Note that this function always returns 1 as an fd, since it will not be used anyway. +/// +/// # Safety +/// +/// `port` should be a 16-bit value. All the strings must end with a nul character. +/// The output file desciptor should not be used. +#[no_mangle] +pub unsafe extern "C" fn ccxr_start_udp_srv( + src: *const c_char, + addr: *const c_char, + port: c_uint, +) -> c_int { + let src = if !src.is_null() { + Some(CStr::from_ptr(src).to_str().unwrap()) + } else { + None + }; + + let addr = if !addr.is_null() { + Some(CStr::from_ptr(addr).to_str().unwrap()) + } else { + None + }; + + let port = port.try_into().unwrap(); + + start_udp_srv(src, addr, port); + + 1 +}