-
Notifications
You must be signed in to change notification settings - Fork 15
Extract style tags and non dialogue words. #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 17 commits
3e285ab
8c97741
b2e35c5
72560fd
24fbf16
7defad5
617bde8
e1511e6
779f07b
80b60bc
7fde062
fdc9212
c9ce550
796ea56
4241a68
8c67266
d89de38
586106b
ff4d226
2d70316
1a67859
b3a503d
c6ca75c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,7 +92,6 @@ class SubtitleItem | |
std::vector<std::string> getNonDialogueWords(); //return string vector of non dialogue words | ||
std::vector<std::string> getStyleTags(); //return string vector of style tags | ||
|
||
|
||
void setStartTime(long int startTime); //set starting time | ||
void setEndTime(long int endTime); //set ending time | ||
void setText(std::string text); //set subtitle text | ||
|
@@ -382,17 +381,12 @@ inline void SubtitleItem::extractInfo(bool keepHTML, bool doNotIgnoreNonDialogue | |
//stripping HTML tags | ||
if(!keepHTML) | ||
{ | ||
/* | ||
* TODO : Before erasing, extract the words. | ||
* std::vector<std::string> getStyleTags(); | ||
* int getStyleTagCount() const; | ||
* std::vector<std::string> _styleTag; | ||
* int _styleTagCount; | ||
*/ | ||
|
||
int countP = 0; | ||
std::string tag; | ||
for(char& c : output) // replacing <...> with ~~~~ | ||
{ | ||
|
||
if(c=='<') | ||
{ | ||
countP++; | ||
|
@@ -403,34 +397,37 @@ inline void SubtitleItem::extractInfo(bool keepHTML, bool doNotIgnoreNonDialogue | |
{ | ||
if(countP!=0) | ||
{ | ||
if(c != '>') | ||
c = '~'; | ||
|
||
if(c != '>'){ | ||
tag += c; | ||
c = '~'; | ||
} | ||
else if(c == '>') | ||
{ | ||
c = '~'; | ||
countP--; | ||
_styleTagCount++; | ||
if(tag[0] == '/'){ | ||
tag.erase(0,1); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||
_nonDialogue.push_back(tag); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will insert tags into nonDialogue vector. |
||
tag=""; | ||
} | ||
} | ||
} | ||
|
||
} | ||
|
||
} | ||
|
||
//stripping non dialogue data e.g. (applause) | ||
|
||
if(!doNotIgnoreNonDialogues) | ||
{ | ||
/* | ||
* TODO : Before erasing, extract the words. | ||
* std::vector<std::string> getNonDialogueWords(); | ||
* int getNonDialogueCount() const; | ||
* std::vector<std::string> _nonDialogue; | ||
* int _nonDialogueCount; | ||
*/ | ||
|
||
int countP = 0; | ||
std::string tag; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this called |
||
for(char& c : output) // replacing (...) with ~~~~ | ||
{ | ||
|
||
if(c=='(') | ||
{ | ||
countP++; | ||
|
@@ -441,13 +438,17 @@ inline void SubtitleItem::extractInfo(bool keepHTML, bool doNotIgnoreNonDialogue | |
{ | ||
if(countP!=0) | ||
{ | ||
if(c != ')') | ||
if(c != ')'){ | ||
tag.push_back(c); | ||
c = '~'; | ||
|
||
} | ||
else if(c == ')') | ||
{ | ||
c = '~'; | ||
countP--; | ||
_nonDialogueCount++; | ||
_nonDialogue.push_back(tag); | ||
tag=""; | ||
} | ||
} | ||
} | ||
|
@@ -654,4 +655,4 @@ inline SubtitleWord::~SubtitleWord(void) | |
} | ||
|
||
|
||
#endif //SRTPARSER_H | ||
#endif //SRTPARSER_H |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will not erase complete enclosing tag. If it starts with
</
, it need not be stored.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Additionally
<font></font>
and all such pairs should be counted as a singular unit.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not adding the < and > characthers. I'm ignoring them and only adding till it doesn't equal >. The problem with that approach was the / characther will get added and since i'm not adding < it's /font for example and it'll always be first.