# -*- coding: utf-8 -*-
import pymysql as mdb
from colorama import Fore, Back, Style
from playwright.sync_api import sync_playwright
from anticaptchaofficial.imagecaptcha import *
import time, subprocess, sys, os, json, re
from multiprocessing import Process, Pool
from datetime import datetime, timedelta
from random import randint
import random, requests
import smtplib
from concurrent.futures import ThreadPoolExecutor


def get_proxy():
    lines = open('/opt/aparser/files/proxy/proxy.txt').read().splitlines()
    proxy = random.choice(lines)
    return proxy.strip()


def begin(project_id):
    project_id = str(project_id)
    try:
        con = mdb.connect(host="195.201.99.130", user="axg_crm_new1", password="Yyv1QT2JUMLyxt71cYLG",
                          database="axg_crm_new", autocommit=True)
        cur = con.cursor(mdb.cursors.DictCursor)
    except:
        return

    cur.execute("SELECT * FROM user_groups WHERE user_group_id = '7'")
    if not cur.rowcount:
        print("User group is empty")
        con.close()
        return
    user_group = cur.fetchall()
    settings = user_group[0]['settings']
    if settings is None:
        print("User group settings is empty.")
        con.close()
        return
    settings = json.loads(settings)

    try:
        if 'auto_send_bing_form' not in settings:
            print(Fore.YELLOW + "auto_send_bing_form is empty.")
            print(Style.RESET_ALL)
            con.close()
            return
    except BaseException as e:
        print('Error: ' + str(e))
    cur.execute("SELECT * FROM bing_user WHERE bing_user_id = '15'")
    if not cur.rowcount:
        print("Missing google user")
        con.close()
        return
    google_user = cur.fetchall()
    cookies = google_user[0]['cookies']
    new_cookies = '{"cookies": ' + cookies + '}'

    # cookies_file = str(project_id) + '.json'
    cookies_file = str(project_id) + str(int(time.time())) + '_bing_cookies.json'
    with open(cookies_file, 'w') as fh:
        fh.write(new_cookies)

    link_ids = []
    form_ids = []
    skip_form_ids = []
    project_titles = []
    all_dmcaDescription = ""
    all_pirate_urls = []
    all_white_urls = []
    initKeys = ['date1', 'date2', 'date_form_sent']
    copyright_holder = ""
    project_content_type = ""

    cur.execute("SELECT * FROM google_form_bing WHERE project_id = %s and link_count < 1001 and grouped = 1 ORDER BY RAND() LIMIT 1", [project_id])
    running_projects = cur.fetchall()
    for running_project in running_projects:
        publisher = running_project['publisher']

        print(Fore.YELLOW + "Publisher: " + publisher)
        print("Project ID: " + project_id)
        print(Style.RESET_ALL)

        copyright_holder = publisher
        google_form_id = running_project['id']
        form_ids.append(google_form_id)

        project_name = running_project['project_name']
        current_link_ids = running_project['link_ids'].split('*')

        current_link_ids = list(set(current_link_ids))
        added_link_ids = []
        linksToSend = []
        pirateUrls = []
        whiteUrls = []

        project_official_rightholder_page = running_project['project_official_rightholder_page']
        project_author = running_project['project_author']
        project_title = running_project['project_title']
        all_dmcaDescription = project_name + " " + str(project_author)

        content_type = getContentType(con, cur, project_id)
        content_type_name = content_type['name']

        if 'Ebook' in content_type_name or 'ebook' in content_type_name:
            project_content_type = 'Ebook'
        if 'Film' in content_type_name:
            project_content_type = 'Film/TV'
        if 'text' in content_type_name or "Text" in content_type_name:
            project_content_type = 'Text'
        if 'Artwork' in content_type_name:
            project_content_type = 'Artwork'
        if 'Software' in content_type_name:
            project_content_type = 'Software'
        if 'Photograph' in content_type_name:
            project_content_type = 'Photograph'
        if 'Video' in content_type_name:
            project_content_type = 'Video'
        if 'Music' in content_type_name:
            project_content_type = 'Music'

        link_ids_str = ','.join(['%s'] * len(current_link_ids))
        sql = f"SELECT * FROM reference WHERE id IN ({link_ids_str})"
        cur.execute(sql, current_link_ids)
        reference_links = cur.fetchall()

        added_link_ids = []
        for link in reference_links:
            # print(link['id'])
            if len(str(link['id'])) < 4:
                continue
            # Check if the link should be processed
            """
            if link['google_form_sent'] == 1:
                continue
            """
            linksToSend.append(link)
            added_link_ids.append(link['id'])

        for link in linksToSend:
            displayLink = link['displayLink']
            if '...' in link['link']:
                link_ids.append(link['id'])
                continue

            cur.execute("SELECT google_file_extention FROM google_extension_exclusions")
            excluded_extensions = [row['google_file_extention'] for row in cur.fetchall()]

            if any(link['link'].endswith(ext) for ext in excluded_extensions):
                link_ids.append(link['id'])
                continue

            cur.execute("SELECT * FROM google_exclusions_bing WHERE google_exclusion_site_url = %s", [displayLink])
            if not cur.rowcount:
                o = displayLink.split('.')
                if len(o) > 2:
                    new_display = '*.' + o[-2].strip() + '.' + o[-1].strip()
                else:
                    new_display = '*.' + displayLink
                cur.execute("SELECT * FROM google_exclusions_bing WHERE google_exclusion_site_url = %s", [new_display])
                if not cur.rowcount:
                    if 'blogspot.com' in link['link']:
                        link_ids.append(link['id'])
                        continue
                    if '.jpg' not in link['link']:
                        # print(link['link'])
                        pirateUrls.append(link['link'])
                else:
                    print("exclusion link: " + link['link'])
                    link_ids.append(link['id'])
            else:
                print("exclusion link: " + link['link'])
            link_ids.append(link['id'])
        linksToSend = []
        if len(pirateUrls) < 1:
            sql = "DELETE FROM google_form_bing WHERE id=%s"
            cur.execute(sql, [google_form_id])
            con.commit()
            continue

        if project_official_rightholder_page is not None:
            cleanr = re.compile('<.*?>')
            project_official_rightholder_page = re.sub(cleanr, '', project_official_rightholder_page)
            all_urls = re.findall(r'(https?://[^\s]+)', project_official_rightholder_page)

            for j in range(2):
                try:
                    whiteUrls.append(all_urls[j])
                except:
                    pass

        whiteUrls = list(set(whiteUrls))

        print("Count links to send: " + str(len(pirateUrls)))
        all_pirate_urls.append(pirateUrls)
        all_white_urls.append(whiteUrls)

        print("#######################################################################")

    if len(all_pirate_urls) < 1:
        sql = "DELETE FROM google_form_bing WHERE id=%s"
        cur.execute(sql, [google_form_id])
        con.close()
        return

    the_file_name = str(randint(1001, 209991)) + '_bing.txt'
    file_name = '/var/www/html/' + the_file_name
    bing_load_form_url = 'https://www.bing.com/webmaster/tools/contentremovalform'
    with open(file_name, 'w', encoding="utf-8") as fh:
        for a_pirate_urls in all_pirate_urls[0]:
            a_pirate_urls = a_pirate_urls.replace('è', '%C3%A8')
            a_pirate_urls = a_pirate_urls.replace('é', '%C3%A9')
            a_pirate_urls = a_pirate_urls.replace('ù', '%C3%B9')
            a_pirate_urls = a_pirate_urls.replace('ú', '%C3%BA')
            a_pirate_urls = a_pirate_urls.replace('à', '%C3%A0')
            a_pirate_urls = a_pirate_urls.replace('á', '%C3%A1')
            a_pirate_urls = a_pirate_urls.replace('í', '%C3%AD')
            a_pirate_urls = a_pirate_urls.replace('ò', '%C3%B2')
            a_pirate_urls = a_pirate_urls.replace('ó', '%C3%B3')
            a_pirate_urls = a_pirate_urls.replace('ö', '%C3%B6')
            a_pirate_urls = a_pirate_urls.replace('й', '%D0%B9')
            a_pirate_urls = a_pirate_urls.replace('ц', '%D1%86')
            fh.write(a_pirate_urls + "\n")

    proxy = get_proxy()
    proxy_to_use = {
        'server': 'http://' + proxy,
    }
    command = "node /home/moses/scripts/user_agents.js"

    try:
        useragent = subprocess.check_output(command, shell=True)
    except BaseException as e:
        print(str(e))
        con.close()
        return
    useragent = useragent.decode().rstrip("\n")

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True, proxy=proxy_to_use)
        context = browser.new_context(
            user_agent=useragent,
            storage_state=cookies_file,
            record_video_dir="/var/www/html/google_videos",
            locale='en-US',
            timezone_id="Europe/Tallinn"
        )
        context.tracing.start(screenshots=True, snapshots=True, sources=True)
        page = context.new_page()
        page.set_default_timeout(120000)
        try:
            page.goto(bing_load_form_url)
            time.sleep(10)
        except BaseException as e:
            print(str(e))
            pirateUrls = []
            linksToSend = []
            return

        video_name = os.path.basename(page.video.path())
        content = page.content()

        if page.locator('//div[contains(text(),"Please Sign In to submit an Infringement Notice")]').is_visible():
            print("Cookies Expired")
            try:
                send_mail_for_cookies(cur, "cookies expired")
            except Exception as e:
                print(e)
            page.screenshot(path=str(int(time.time())) + "_screenshot.png")
            return

        try:
            formrows = page.query_selector_all("//div[@class='formrow']")
            search_type = formrows[0]
        except BaseException as e:
            print(str(e))
            if "list index out of range" in str(e):
                if "This page contains the following errors":
                    page.screenshot(path=str(int(time.time())) + "_screenshot.png")
                    return
                send_mail_for_cookies(cur, "Cannot find form row")
            if 'Bing Webmaster services could not be reached' in content:
                page.screenshot(path=str(int(time.time())) + "_screenshot.png")
                return
            if 'Please Sign In to submit an Infringement' in content:
                send_mail_for_cookies(cur, str(e))
                page.screenshot(path=str(int(time.time())) + "_screenshot.png")
                return
            page.screenshot(path=str(int(time.time())) + "_screenshot.png")
            return

        form_columns = search_type.query_selector_all("//div[@class='formcolumn']")
        search_column = form_columns[1]
        select = search_column.query_selector('select')
        select.select_option(label='Web')

        first_name_field = formrows[1]
        first_name_field.query_selector('input').fill("Oleksandr")

        last_name_field = formrows[2]
        last_name_field.query_selector('input').fill("Honcharenko")

        org_field = formrows[3]
        org_field.query_selector('input').fill("AXGHOUSE ANTIPIRACY OÜ")

        copyright_field = formrows[4]
        copyright_field.query_selector('input').fill(copyright_holder)

        holder_country = formrows[5]
        form_columns = holder_country.query_selector_all("//div[@class='formcolumn']")
        search_column = form_columns[1]
        select = search_column.query_selector('select')
        select.select_option(label='Estonia')

        email_address = formrows[6]
        email_address.query_selector('input').fill("removals@axghouse.com")

        type_of_work = formrows[7]
        form_columns = type_of_work.query_selector_all("//div[@class='formcolumn']")
        search_column = form_columns[1]
        select = search_column.query_selector('select')
        select.select_option(label=project_content_type)

        white_url = all_white_urls[0][0]

        # Copyrighted work --add first official link
        example_copyrighted_work = formrows[11]
        form_columns = example_copyrighted_work.query_selector_all("//div[@class='formcolumn']")
        search_column = form_columns[1]
        search_column.query_selector('input').fill(white_url)

        # Description --add Title and Author
        description = formrows[13]
        desc = all_dmcaDescription
        description.query_selector('textarea').fill(desc)

        bulk_sub = formrows[23]
        inputs = bulk_sub.query_selector_all("input")

        page.set_input_files('#bulkSubmission', file_name)

        good_faith_belief = formrows[27]
        form_columns = good_faith_belief.query_selector_all("//div[@class='formcolumn']")
        accept_column = form_columns[1]
        accept_column.query_selector('input').click()
        time.sleep(2)

        authority_to_act = formrows[28]
        form_columns = authority_to_act.query_selector_all("//div[@class='formcolumn']")
        accept_column = form_columns[1]
        accept_column.query_selector('input').click()
        time.sleep(2)

        ack = formrows[29]
        form_columns = ack.query_selector_all("//div[@class='formcolumn']")
        accept_column = form_columns[1]
        accept_column.query_selector('input').click()
        time.sleep(2)

        sign = formrows[31]
        form_columns = sign.query_selector_all("//div[@class='formcolumn']")
        accept_column = form_columns[1]
        accept_column.query_selector('input').fill('Oleksandr Honcharenko')

        page.keyboard.press("PageDown")
        page.keyboard.press("PageDown")

        page.query_selector("//input[@class='submitbutton']").click()
        print(Fore.GREEN + "Sending form ...")
        print(Style.RESET_ALL)
        time.sleep(10)

        content = page.content()

        result_form = content
        report_id = ""
        success_form = False
        warning_msg = ""
        if 'Submission Date (UTC)' in content:
            report_id_field = page.query_selector("//div[@class='formcolumn_ticketdetails']")
            report_id_parts = report_id_field.query_selector_all("h2")
            report_id = report_id_parts[1].inner_text().strip()
            error_msg = 'Your removal notice was submitted successfully'
            form_state = "5"
            print(report_id)
            success_form = True
        else:
            error_msg = "Submission Failed"
            form_state = "-1"
            warning_msg = "Error"
        context.tracing.stop(path="trace.zip")

        data = {
            'state': form_state,
            'error_msg': error_msg,
            'warning_msg': warning_msg,
            'google_user': report_id,
            'google_user_email': 'removals@axghouse.com',
            'copyright_holder': copyright_holder,
            'white_urls': json.dumps(all_white_urls[0]),
            'pirate_urls': json.dumps(all_pirate_urls[0]),
            'description': all_dmcaDescription,
            'project_id': project_id,
            'link_ids': ','.join(str(e) for e in link_ids),
            'project_name': project_name,
            'result_form': result_form,
            'load_form_error_count': '0',
            'text_file': the_file_name,
            'video': video_name
        }
        print(Fore.YELLOW + "Saving to DB ")
        print(Style.RESET_ALL)
        try:
            keys = ','.join(data.keys())
            values = ','.join(['%s'] * len(data))
            sql = 'INSERT INTO {thetable}({keys}) VALUES({values})'.format(thetable='google_bing_form', keys=keys,
                                                                           values=values)
            if cur.execute(sql, tuple(data.values())):
                con.commit()

        except BaseException as e:
            print("Error: " + str(e))

        if success_form:
            for a_form_id in form_ids:
                try:
                    sql = "DELETE FROM google_form_bing WHERE id=%s"
                    cur.execute(sql, [a_form_id])
                except:
                    print("Something went wrong ... ")
        try:
            os.remove('/home/moses/' + cookies_file)
        except:
            pass
        sql = "UPDATE bing_user SET count = count + 1"
        cur.execute(sql)
        context.close()
        browser.close()

    cur.execute("START TRANSACTION")

    try:
        placeholders = ",".join(["%s"] * len(link_ids))
        sql_update = f"UPDATE reference SET sub_parent_id = '1' WHERE id IN ({placeholders})"

        cur.execute(sql_update, tuple(link_ids))

        con.commit()

    except Exception as e:
        con.rollback()
        print(f"Error updating records: {e}")

    try:
        con.close()
    except:
        return
    return


def regroup_projects(con, cur):
    cur.execute("set group_concat_max_len = 1045576")
    con.commit()
    print("Started grouping projects")
    cur.execute("""
                SELECT project_id, 
                       GROUP_CONCAT(id SEPARATOR ',') AS id_list,
                       GROUP_CONCAT(link_ids SEPARATOR '*') AS combined_links,
                       publisher, 
                       project_name, 
                       project_title, 
                       project_author, 
                       project_official_rightholder_page
                FROM google_form_bing 
                GROUP BY project_id;
            """)
    if cur.rowcount == 0:
        print("No entries")
        return
    projects = cur.fetchall()

    for project in projects:
        project_id = project['project_id']
        id_list = project['id_list']
        combined_links = project['combined_links']
        publisher = project['publisher']
        project_name = project['project_name']
        project_title = project['project_title']
        project_author = project['project_author']
        project_official_rightholder_page = project['project_official_rightholder_page']

        print(f'Project ID: {project_id}')

        unique_links = set(combined_links.split('*'))

        links = list(unique_links)
        batch_links = []
        for link in links:
            batch_links.append(link)
            if len(batch_links) == 1000:
                # Insert the batch of links into the database
                link_count = len(batch_links)
                batch_links_str = '*'.join(batch_links)
                cur.execute("""
                            INSERT INTO google_form_bing (project_id, link_ids, link_count, publisher, project_name,
                            project_title, project_author, project_official_rightholder_page, grouped)
                            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
                        """, (
                    project_id, batch_links_str, link_count, publisher, project_name, project_title, project_author,
                    project_official_rightholder_page, 1))

                batch_links = []

        if batch_links:
            filtered_count = [x for x in batch_links if x != '']
            link_count = len(filtered_count)

            batch_links_str = '*'.join(batch_links)
            cur.execute("""
                        INSERT INTO google_form_bing (project_id, link_ids, link_count, publisher, project_name,
                        project_title, project_author, project_official_rightholder_page, grouped )
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
            project_id, batch_links_str, link_count, publisher, project_name, project_title, project_author,
            project_official_rightholder_page, 1))

        # print(f'Deleting IDs: {id_list}')
        cur.execute("DELETE FROM google_form_bing WHERE id IN (" + id_list + ")")

    con.commit()


def getContentType(con, cur, project_id):
    sql = "SELECT * FROM project_content_types WHERE project_id = (%s)"
    cur.execute(sql, [project_id])
    if not cur.rowcount:
        return ""
    rows = cur.fetchall()
    content_type_id = rows[0]['content_type_id']

    sql = "SELECT * FROM content_types WHERE content_type_id = (%s)"
    cur.execute(sql, [str(content_type_id)])
    rowss = cur.fetchall()
    return rowss[0]


def getCookies(cur):
    sql = "SELECT * FROM bing_user LIMIT 1"
    cur.execute(sql)
    rows = cur.fetchall()
    return rows


def send_mail_for_cookies(cur, subject):
    one_hour = datetime.now() - timedelta(hours=3)
    ti_ = one_hour.strftime('%Y-%m-%d %H:%M:%S')

    sql = "SELECT * FROM bing_cookies_expiry_email WHERE date_added < %s"
    cur.execute(sql, [ti_])
    if not cur.rowcount:  # do not send email
        return

    sql = "UPDATE bing_cookies_expiry_email SET count = count + 1"
    cur.execute(sql)

    sql = "UPDATE bing_user SET status = '0'"
    cur.execute(sql)

    username = 'removals@axghouse.com'
    password = 'Y7sL9C2LNfWJ67mP@'

    sender = 'removals@axghouse.com'
    receivers = ['pandimoses@gmail.com', 'support@axghouse.com','santhoshkasturi7@gmail.com']
    body = "Please update the cookies, bing script stopped running."

    message = f"""\
From: Axghouse Antipiracy <removals@axghouse.com>
MIME-Version: 1.0
Content-type: text/html
Subject: Cookies Expired. Bing Form {subject}

{body}
"""
    try:
        server = smtplib.SMTP('mail.your-server.de', 587)
        server.starttls()
        server.login(username, password)
        server.sendmail(sender, receivers, message)
        server.quit()
        print("Email sent successfully!")
    except smtplib.SMTPException as e:
        print("Error sending email:", e)

    sql = "UPDATE bing_cookies_expiry_email SET count = count + 1"
    cur.execute(sql)


if __name__ == "__main__":
    try:
        con = mdb.connect(host="195.201.99.130", user="axg_crm_new1", password="Yyv1QT2JUMLyxt71cYLG",
                          database="axg_crm_new", autocommit=True)
        cur = con.cursor(mdb.cursors.DictCursor)
    except:
        sys.exit()

    bing_user = getCookies(cur)
    status = bing_user[0]['status']

    if int(status) == 0:
        print("Bing User disabled")
        sys.exit()

    regroup_projects(con, cur)

    sql = "SELECT DISTINCT(project_id) FROM google_form_bing ORDER BY RAND() LIMIT 100"
    cur.execute(sql)
    if not cur.rowcount:
        print("Nothing found --quitting")

        cur.close()
        con.close()
        sys.exit()

    project_ids = cur.fetchall()
    themids = []
    for project in project_ids:
        project_id_ = project['project_id']
        themids.append(project_id_)

    cur.close()
    con.close()

    with ThreadPoolExecutor(max_workers=25) as executor:
        futures = [executor.submit(begin, project_id) for project_id in themids]
        for future in futures:
            stat = future.result()

