رُکُن:RishabhBot/source-code/gendered-langs.py
To use headword and etymology templates for all gendered (mf) languages. Code can be altered for each language with each's name and ISO code. Last language for which this was run was Spanish.
import pywikibot
import re
site = pywikibot.Site("ks", "wiktionary")
cat = pywikibot.Category(site, "زٲژ:ہِسپٲنوی عام ناوٕتؠ").articles()
lst = [i for i in cat]
def cont_elem(t, lst):
for i in lst:
if i in t:
return True
return False
def contains_lang(t):
return cont_elem(t, code_dict.values())
code_dict = {'ks':'کٲشُر', 'en':'اَنٛگریٖزی' ,'it':'اِطٲلوی', 'ur':'اُردوٗ', 'bn':'بَنٛگٲلؠ', 'ps' :'پَشتوٗ',
'pa' :'پَنٛجٲبؠ', 'inc-pra' :'پرٛاکرِت', 'ang' :'پرٛون اَنٛگریٖزی', 'roa-opt' :'پرٛون پُرتَگٲلؠ',
'goh':'پرٛون تھۆد جَرمَن', 'ojp':'پرٛون جاپٲنؠ', 'odt':'پرٛون ڈَچ',
'fro':'پرٛون فرانسیٖسی', 'peo':'پرٛون فارسی', 'osp':'پرٛون ہِسپٲنوی',
'grc':'پرٛون یوٗنٲنؠ', 'pt':'پُرتَگٲلؠ', 'ta':'تٲمِل', 'tr':'تُرکی', 'de':'جَرمَن',
'ja':'جاپٲنؠ', 'nl':'ڈَچ', 'ru':'روٗسی', 'sa':'سَنَسکرٕٛت', 'sd':'سِندی', 'ar': 'عَربی',
'ota': 'عُثمٲنؠ تُرکی', 'fr': 'فرانسیٖسی', 'fa':'فارسی', 'kn' :'کَنَڑ', 'gu':'گُجرٲتؠ',
'la' :'لاطیٖنی', 'mr':'مَرٲٹھؠ', 'ml':'مَلیٲلؠ', 'enm':"مَنٛز اَنٛگریٖزی",
'gmh':'مَنٛز تھۆد جَرمَن', 'dum':'مَنٛز ڈَچ', 'frm':'مَنٛز فرانسیٖسی', 'pal':'مَنٛز فارسی',
'es':'ہِسپٲنوی', 'hi':'ہِندی', 'el':'یوٗنٲنؠ',
}
err_lst = []
to_change = []
to_change_g = True
for page in lst:
try:
if "فرما" not in page.title():
print(page.title())
text = page.text
#print(page.title())
ipa = [i for i in re.findall(r"\[(.*?)\]", text) if ':' not in i and '[' not in i]
summary = "Update with templates: headword, etymology"
if text.count("''نَر''") == 1:
g = "m"
to_change_g = True
elif text.count("''مادٕ''") == 1:
g = "f"
to_change_g = True
else:
to_change_g = False
if not to_change_g:
g = re.findall("g=.", text)[0].replace("g=", "")
cat_name = [i for i in re.findall(r"\[(.*?)\]", text) if 'فرانسیٖسی' and "زٲژ" in i]
if len(ipa) != 0:
out_head = '{{es-noun|ipa=' + ipa[0] + "|g=" + g + "}}\n"
to_change_head = True
else:
to_change_head = False
lines = text.split('\n')
out = ''
for n, l in enumerate(lines):
if to_change_head and ipa[0] in l:
out += out_head + '\n'
elif cont_elem(l, cat_name):
pass
elif cont_elem(l, code_dict.values()) and "==" in l:
out += l + '\n'
elif "پؠٹھٕ آمُت" in l:
anc_w = re.findall(r"پؠٹھٕ آمُت(.*?)\(", l)[0].strip()
anc_lang = re.findall(r"(.*?)\ پؠٹھٕ آمُت", l)[0].strip()
tr = re.findall(r"\((.*?)\)", l)[0].strip()
anc_code = [k for k,v in code_dict.items() if anc_lang== v][0]
out += "{{from|es|"+anc_code+"|"+anc_w+"|"+tr+"}}\n"
elif contains_lang(l):
pass
else:
out += l + '\n'
if out.strip() != page.text.strip():
to_change.append(page.title())
print(out)
#page.text = out
page.save(summary)
else:
print(page.title(), "not changed")
print("BREAK")
except Exception as e:
err_lst.append(page.title() + " : " + str(e))
print("Errors :" + str(err_lst))
print(str(to_change))
print(len(to_change))