Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mejorar script para encontrar diferencias de formato #1783

Open
wants to merge 5 commits into
base: 3.13
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 102 additions & 47 deletions scripts/format_differences.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,111 @@
import collections
import os
import glob

import re
import sys
from pathlib import Path
from pprint import pprint

import polib # fades

PO_DIR = os.path.abspath(
os.path.join(
os.path.dirname(__file__),
'..',
))



DELIMITERS = ("``", "*")

def has_delimiters(x):
for d in DELIMITERS:
if d in x:
return True
return False

def main():
files_with_differences = collections.defaultdict(list)

for i, pofilename in enumerate(glob.glob(PO_DIR + '**/**/*.po')):
from typing import List

import polib

_patterns = [
":c:func:`[^`]+`",
":c:type:`[^`]+`",
":c:macro:`[^`]+`",
":c:member:`[^`]+`",
":c:data:`[^`]+`",
":py:data:`[^`]+`",
":py:mod:`[^`]+`",
":func:`[^`]+`",
":mod:`[^`]+`",
Comment on lines +11 to +19
Copy link
Collaborator

@humitos humitos Oct 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yo creo que esto se podría escribir de una forma más genérica usando más regex:

: +[a-z]+:`[a-z]+`

Esa por ejemplo, encuentra "uno o más espacios luego del primer :"

:[a-z]+: +`[a-z]+`

Encuentra un "un espacio luego del segundo :"

De esta forma, solo tenemos que escribir "un par de regex" para encontrar los espacios en todas las posibles variaciones de todos los roles de Sphinx. ¿Qué te parece? Lo mismo se puede hacer para cursiva y negrita.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

me parece super :D

":ref:`[^`]+`",
":class:`[^`]+`",
":pep:`[^`]+`",
":data:`[^`]+`",
":exc:`[^`]+`",
":term:`[^`]+`",
":meth:`[^`]+`",
":envvar:`[^`]+`",
":file:`[^`]+`",
":attr:`[^`]+`",
":const:`[^`]+`",
":issue:`[^`]+`",
":opcode:`[^`]+`",
":option:`[^`]+`",
":program:`[^`]+`",
":keyword:`[^`]+`",
":RFC:`[^`]+`",
":rfc:`[^`]+`",
":doc:`[^`]+`",
"``[^`]+``",
"`[^`]+`__",
"`[^`]+`_",
"\*\*[^\*]+\*\*", # bold text between **
"\*[^\*]+\*", # italic text between *
cmaureir marked this conversation as resolved.
Show resolved Hide resolved
]

_exps = [re.compile(e) for e in _patterns]


def get_sphinx_directives(s: str) -> List[str]:
"""
Parameters:
string containing the text to translate

Returns:
dictionary containing all the placeholder text as keys
and the correct value.
"""

output: List[str] = []
for exp in _exps:
matches = exp.findall(s)
for match in matches:
output.append(match)
# remove the found pattern from the original string
s = s.replace(match, "")
return output

def ind(level=0):
return f"{' ' * 4 * level}"

if __name__ == "__main__":
PO_DIR = Path(__file__).resolve().parent.parent
VENV_DIR = PO_DIR / "venv"

if len(sys.argv) > 1:
filename = sys.argv[1]
files = []
if filename:
if Path(filename).is_dir():
files = [i for i in PO_DIR.glob(f"{filename}/*.po") if not i.is_relative_to(VENV_DIR)]
elif not Path(filename).is_file():
print(f"File not found: '{filename}'")
sys.exit(-1)
else:
files = [filename]
else:
files = [i for i in PO_DIR.glob("**/**/*.po") if not i.is_relative_to(VENV_DIR)]

for i, pofilename in enumerate(files):
cmaureir marked this conversation as resolved.
Show resolved Hide resolved
print(f"\n> Processing {pofilename}")
po = polib.pofile(pofilename)
if po.percent_translated() < 85:
continue

for entry in po:
words = []
wordsid = wordsstr = list()

if has_delimiters(entry.msgid):
wordsid = [word for word in entry.msgid.split() if has_delimiters(word)]

if has_delimiters(entry.msgstr):
wordsstr = [word for word in entry.msgstr.split() if has_delimiters(word)]

if len(wordsid) != len(wordsstr):
key = pofilename.replace(PO_DIR, '')
files_with_differences[key].append({
'occurrences': entry.occurrences,
'words': {
'original': wordsid,
'translated': wordsstr,
},
})
directives_id = get_sphinx_directives(entry.msgid)
directives_str = get_sphinx_directives(entry.msgstr)

return files_with_differences
# Check if any of them is not empty
if directives_id or directives_str:

# Check if the directives are the same
for ori, dst in zip(directives_id, directives_str):
if ori == dst:
continue

pprint(main())
if ori != dst:
occs = [f"{ind(2)}{t[0]}:{t[1]}" for t in entry.occurrences]
print(f"\n{ind(1)}{pofilename}:{entry.linenum}")
print(f"\n".join(occs))
print(f"{ind(3)}{ori}")
print(f"{ind(3)}{dst}")