example_input.jsonl

{"repo": "soimort/you-get", "path": "src/you_get/extractors/miomio.py", "func_name": "sina_xml_to_url_list", "original_string": "def sina_xml_to_url_list(xml_data):\n    \"\"\"str->list\n    Convert XML to URL List.\n    From Biligrab.\n    \"\"\"\n    rawurl = []\n    dom = parseString(xml_data)\n    for node in dom.getElementsByTagName('durl'):\n        url = node.getElementsByTagName('url')[0]\n        rawurl.append(url.childNodes[0].data)\n    return rawurl", "language": "python", "code": "def sina_xml_to_url_list(xml_data):\n    \"\"\"str->list\n    Convert XML to URL List.\n    From Biligrab.\n    \"\"\"\n    rawurl = []\n    dom = parseString(xml_data)\n    for node in dom.getElementsByTagName('durl'):\n        url = node.getElementsByTagName('url')[0]\n        rawurl.append(url.childNodes[0].data)\n    return rawurl", "code_tokens": ["def", "sina_xml_to_url_list", "(", "xml_data", ")", ":", "rawurl", "=", "[", "]", "dom", "=", "parseString", "(", "xml_data", ")", "for", "node", "in", "dom", ".", "getElementsByTagName", "(", "'durl'", ")", ":", "url", "=", "node", ".", "getElementsByTagName", "(", "'url'", ")", "[", "0", "]", "rawurl", ".", "append", "(", "url", ".", "childNodes", "[", "0", "]", ".", "data", ")", "return", "rawurl"], "docstring": "str->list\n    Convert XML to URL List.\n    From Biligrab.", "docstring_tokens": ["str", "-", ">", "list", "Convert", "XML", "to", "URL", "List", ".", "From", "Biligrab", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/miomio.py#L41-L51", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/dailymotion.py", "func_name": "dailymotion_download", "original_string": "def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"Downloads Dailymotion videos by URL.\n    \"\"\"\n\n    html = get_content(rebuilt_url(url))\n    info = json.loads(match1(html, r'qualities\":({.+?}),\"'))\n    title = match1(html, r'\"video_title\"\\s*:\\s*\"([^\"]+)\"') or \\\n            match1(html, r'\"title\"\\s*:\\s*\"([^\"]+)\"')\n    title = unicodize(title)\n\n    for quality in ['1080','720','480','380','240','144','auto']:\n        try:\n            real_url = info[quality][1][\"url\"]\n            if real_url:\n                break\n        except KeyError:\n            pass\n\n    mime, ext, size = url_info(real_url)\n\n    print_info(site_info, title, mime, size)\n    if not info_only:\n        download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge)", "language": "python", "code": "def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"Downloads Dailymotion videos by URL.\n    \"\"\"\n\n    html = get_content(rebuilt_url(url))\n    info = json.loads(match1(html, r'qualities\":({.+?}),\"'))\n    title = match1(html, r'\"video_title\"\\s*:\\s*\"([^\"]+)\"') or \\\n            match1(html, r'\"title\"\\s*:\\s*\"([^\"]+)\"')\n    title = unicodize(title)\n\n    for quality in ['1080','720','480','380','240','144','auto']:\n        try:\n            real_url = info[quality][1][\"url\"]\n            if real_url:\n                break\n        except KeyError:\n            pass\n\n    mime, ext, size = url_info(real_url)\n\n    print_info(site_info, title, mime, size)\n    if not info_only:\n        download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge)", "code_tokens": ["def", "dailymotion_download", "(", "url", ",", "output_dir", "=", "'.'", ",", "merge", "=", "True", ",", "info_only", "=", "False", ",", "*", "*", "kwargs", ")", ":", "html", "=", "get_content", "(", "rebuilt_url", "(", "url", ")", ")", "info", "=", "json", ".", "loads", "(", "match1", "(", "html", ",", "r'qualities\":({.+?}),\"'", ")", ")", "title", "=", "match1", "(", "html", ",", "r'\"video_title\"\\s*:\\s*\"([^\"]+)\"'", ")", "or", "match1", "(", "html", ",", "r'\"title\"\\s*:\\s*\"([^\"]+)\"'", ")", "title", "=", "unicodize", "(", "title", ")", "for", "quality", "in", "[", "'1080'", ",", "'720'", ",", "'480'", ",", "'380'", ",", "'240'", ",", "'144'", ",", "'auto'", "]", ":", "try", ":", "real_url", "=", "info", "[", "quality", "]", "[", "1", "]", "[", "\"url\"", "]", "if", "real_url", ":", "break", "except", "KeyError", ":", "pass", "mime", ",", "ext", ",", "size", "=", "url_info", "(", "real_url", ")", "print_info", "(", "site_info", ",", "title", ",", "mime", ",", "size", ")", "if", "not", "info_only", ":", "download_urls", "(", "[", "real_url", "]", ",", "title", ",", "ext", ",", "size", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ")"], "docstring": "Downloads Dailymotion videos by URL.", "docstring_tokens": ["Downloads", "Dailymotion", "videos", "by", "URL", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/dailymotion.py#L13-L35", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/sina.py", "func_name": "sina_download", "original_string": "def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"Downloads Sina videos by URL.\n    \"\"\"\n    if 'news.sina.com.cn/zxt' in url:\n        sina_zxt(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)\n        return\n\n    vid = match1(url, r'vid=(\\d+)')\n    if vid is None:\n        video_page = get_content(url)\n        vid = hd_vid = match1(video_page, r'hd_vid\\s*:\\s*\\'([^\\']+)\\'')\n        if hd_vid == '0':\n            vids = match1(video_page, r'[^\\w]vid\\s*:\\s*\\'([^\\']+)\\'').split('|')\n            vid = vids[-1]\n\n    if vid is None:\n        vid = match1(video_page, r'vid:\"?(\\d+)\"?')\n    if vid:\n        #title = match1(video_page, r'title\\s*:\\s*\\'([^\\']+)\\'')\n        sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)\n    else:\n        vkey = match1(video_page, r'vkey\\s*:\\s*\"([^\"]+)\"')\n        if vkey is None:\n            vid = match1(url, r'#(\\d+)')\n            sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)\n            return\n        title = match1(video_page, r'title\\s*:\\s*\"([^\"]+)\"')\n        sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)", "language": "python", "code": "def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"Downloads Sina videos by URL.\n    \"\"\"\n    if 'news.sina.com.cn/zxt' in url:\n        sina_zxt(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)\n        return\n\n    vid = match1(url, r'vid=(\\d+)')\n    if vid is None:\n        video_page = get_content(url)\n        vid = hd_vid = match1(video_page, r'hd_vid\\s*:\\s*\\'([^\\']+)\\'')\n        if hd_vid == '0':\n            vids = match1(video_page, r'[^\\w]vid\\s*:\\s*\\'([^\\']+)\\'').split('|')\n            vid = vids[-1]\n\n    if vid is None:\n        vid = match1(video_page, r'vid:\"?(\\d+)\"?')\n    if vid:\n        #title = match1(video_page, r'title\\s*:\\s*\\'([^\\']+)\\'')\n        sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)\n    else:\n        vkey = match1(video_page, r'vkey\\s*:\\s*\"([^\"]+)\"')\n        if vkey is None:\n            vid = match1(url, r'#(\\d+)')\n            sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)\n            return\n        title = match1(video_page, r'title\\s*:\\s*\"([^\"]+)\"')\n        sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)", "code_tokens": ["def", "sina_download", "(", "url", ",", "output_dir", "=", "'.'", ",", "merge", "=", "True", ",", "info_only", "=", "False", ",", "*", "*", "kwargs", ")", ":", "if", "'news.sina.com.cn/zxt'", "in", "url", ":", "sina_zxt", "(", "url", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ",", "*", "*", "kwargs", ")", "return", "vid", "=", "match1", "(", "url", ",", "r'vid=(\\d+)'", ")", "if", "vid", "is", "None", ":", "video_page", "=", "get_content", "(", "url", ")", "vid", "=", "hd_vid", "=", "match1", "(", "video_page", ",", "r'hd_vid\\s*:\\s*\\'([^\\']+)\\''", ")", "if", "hd_vid", "==", "'0'", ":", "vids", "=", "match1", "(", "video_page", ",", "r'[^\\w]vid\\s*:\\s*\\'([^\\']+)\\''", ")", ".", "split", "(", "'|'", ")", "vid", "=", "vids", "[", "-", "1", "]", "if", "vid", "is", "None", ":", "vid", "=", "match1", "(", "video_page", ",", "r'vid:\"?(\\d+)\"?'", ")", "if", "vid", ":", "#title = match1(video_page, r'title\\s*:\\s*\\'([^\\']+)\\'')", "sina_download_by_vid", "(", "vid", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ")", "else", ":", "vkey", "=", "match1", "(", "video_page", ",", "r'vkey\\s*:\\s*\"([^\"]+)\"'", ")", "if", "vkey", "is", "None", ":", "vid", "=", "match1", "(", "url", ",", "r'#(\\d+)'", ")", "sina_download_by_vid", "(", "vid", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ")", "return", "title", "=", "match1", "(", "video_page", ",", "r'title\\s*:\\s*\"([^\"]+)\"'", ")", "sina_download_by_vkey", "(", "vkey", ",", "title", "=", "title", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ")"], "docstring": "Downloads Sina videos by URL.", "docstring_tokens": ["Downloads", "Sina", "videos", "by", "URL", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/sina.py#L94-L121", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/util/log.py", "func_name": "sprint", "original_string": "def sprint(text, *colors):\n    \"\"\"Format text with color or other effects into ANSI escaped string.\"\"\"\n    return \"\\33[{}m{content}\\33[{}m\".format(\";\".join([str(color) for color in colors]), RESET, content=text) if IS_ANSI_TERMINAL and colors else text", "language": "python", "code": "def sprint(text, *colors):\n    \"\"\"Format text with color or other effects into ANSI escaped string.\"\"\"\n    return \"\\33[{}m{content}\\33[{}m\".format(\";\".join([str(color) for color in colors]), RESET, content=text) if IS_ANSI_TERMINAL and colors else text", "code_tokens": ["def", "sprint", "(", "text", ",", "*", "colors", ")", ":", "return", "\"\\33[{}m{content}\\33[{}m\"", ".", "format", "(", "\";\"", ".", "join", "(", "[", "str", "(", "color", ")", "for", "color", "in", "colors", "]", ")", ",", "RESET", ",", "content", "=", "text", ")", "if", "IS_ANSI_TERMINAL", "and", "colors", "else", "text"], "docstring": "Format text with color or other effects into ANSI escaped string.", "docstring_tokens": ["Format", "text", "with", "color", "or", "other", "effects", "into", "ANSI", "escaped", "string", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/util/log.py#L60-L62", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/util/log.py", "func_name": "print_log", "original_string": "def print_log(text, *colors):\n    \"\"\"Print a log message to standard error.\"\"\"\n    sys.stderr.write(sprint(\"{}: {}\".format(script_name, text), *colors) + \"\\n\")", "language": "python", "code": "def print_log(text, *colors):\n    \"\"\"Print a log message to standard error.\"\"\"\n    sys.stderr.write(sprint(\"{}: {}\".format(script_name, text), *colors) + \"\\n\")", "code_tokens": ["def", "print_log", "(", "text", ",", "*", "colors", ")", ":", "sys", ".", "stderr", ".", "write", "(", "sprint", "(", "\"{}: {}\"", ".", "format", "(", "script_name", ",", "text", ")", ",", "*", "colors", ")", "+", "\"\\n\"", ")"], "docstring": "Print a log message to standard error.", "docstring_tokens": ["Print", "a", "log", "message", "to", "standard", "error", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/util/log.py#L72-L74", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/util/log.py", "func_name": "e", "original_string": "def e(message, exit_code=None):\n    \"\"\"Print an error log message.\"\"\"\n    print_log(message, YELLOW, BOLD)\n    if exit_code is not None:\n        sys.exit(exit_code)", "language": "python", "code": "def e(message, exit_code=None):\n    \"\"\"Print an error log message.\"\"\"\n    print_log(message, YELLOW, BOLD)\n    if exit_code is not None:\n        sys.exit(exit_code)", "code_tokens": ["def", "e", "(", "message", ",", "exit_code", "=", "None", ")", ":", "print_log", "(", "message", ",", "YELLOW", ",", "BOLD", ")", "if", "exit_code", "is", "not", "None", ":", "sys", ".", "exit", "(", "exit_code", ")"], "docstring": "Print an error log message.", "docstring_tokens": ["Print", "an", "error", "log", "message", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/util/log.py#L88-L92", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/util/log.py", "func_name": "wtf", "original_string": "def wtf(message, exit_code=1):\n    \"\"\"What a Terrible Failure!\"\"\"\n    print_log(message, RED, BOLD)\n    if exit_code is not None:\n        sys.exit(exit_code)", "language": "python", "code": "def wtf(message, exit_code=1):\n    \"\"\"What a Terrible Failure!\"\"\"\n    print_log(message, RED, BOLD)\n    if exit_code is not None:\n        sys.exit(exit_code)", "code_tokens": ["def", "wtf", "(", "message", ",", "exit_code", "=", "1", ")", ":", "print_log", "(", "message", ",", "RED", ",", "BOLD", ")", "if", "exit_code", "is", "not", "None", ":", "sys", ".", "exit", "(", "exit_code", ")"], "docstring": "What a Terrible Failure!", "docstring_tokens": ["What", "a", "Terrible", "Failure!"], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/util/log.py#L94-L98", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/util/os.py", "func_name": "detect_os", "original_string": "def detect_os():\n    \"\"\"Detect operating system.\n    \"\"\"\n\n    # Inspired by:\n    # https://github.com/scivision/pybashutils/blob/78b7f2b339cb03b1c37df94015098bbe462f8526/pybashutils/windows_linux_detect.py\n\n    syst = system().lower()\n    os = 'unknown'\n\n    if 'cygwin' in syst:\n        os = 'cygwin'\n    elif 'darwin' in syst:\n        os = 'mac'\n    elif 'linux' in syst:\n        os = 'linux'\n        # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423\n        try:\n            with open('/proc/version', 'r') as f:\n                if 'microsoft' in f.read().lower():\n                    os = 'wsl'\n        except: pass\n    elif 'windows' in syst:\n        os = 'windows'\n    elif 'bsd' in syst:\n        os = 'bsd'\n\n    return os", "language": "python", "code": "def detect_os():\n    \"\"\"Detect operating system.\n    \"\"\"\n\n    # Inspired by:\n    # https://github.com/scivision/pybashutils/blob/78b7f2b339cb03b1c37df94015098bbe462f8526/pybashutils/windows_linux_detect.py\n\n    syst = system().lower()\n    os = 'unknown'\n\n    if 'cygwin' in syst:\n        os = 'cygwin'\n    elif 'darwin' in syst:\n        os = 'mac'\n    elif 'linux' in syst:\n        os = 'linux'\n        # detect WSL https://github.com/Microsoft/BashOnWindows/issues/423\n        try:\n            with open('/proc/version', 'r') as f:\n                if 'microsoft' in f.read().lower():\n                    os = 'wsl'\n        except: pass\n    elif 'windows' in syst:\n        os = 'windows'\n    elif 'bsd' in syst:\n        os = 'bsd'\n\n    return os", "code_tokens": ["def", "detect_os", "(", ")", ":", "# Inspired by:", "# https://github.com/scivision/pybashutils/blob/78b7f2b339cb03b1c37df94015098bbe462f8526/pybashutils/windows_linux_detect.py", "syst", "=", "system", "(", ")", ".", "lower", "(", ")", "os", "=", "'unknown'", "if", "'cygwin'", "in", "syst", ":", "os", "=", "'cygwin'", "elif", "'darwin'", "in", "syst", ":", "os", "=", "'mac'", "elif", "'linux'", "in", "syst", ":", "os", "=", "'linux'", "# detect WSL https://github.com/Microsoft/BashOnWindows/issues/423", "try", ":", "with", "open", "(", "'/proc/version'", ",", "'r'", ")", "as", "f", ":", "if", "'microsoft'", "in", "f", ".", "read", "(", ")", ".", "lower", "(", ")", ":", "os", "=", "'wsl'", "except", ":", "pass", "elif", "'windows'", "in", "syst", ":", "os", "=", "'windows'", "elif", "'bsd'", "in", "syst", ":", "os", "=", "'bsd'", "return", "os"], "docstring": "Detect operating system.", "docstring_tokens": ["Detect", "operating", "system", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/util/os.py#L5-L32", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/vimeo.py", "func_name": "vimeo_download_by_channel", "original_string": "def vimeo_download_by_channel(url, output_dir='.', merge=False, info_only=False, **kwargs):\n    \"\"\"str->None\"\"\"\n    # https://vimeo.com/channels/464686\n    channel_id = match1(url, r'http://vimeo.com/channels/(\\w+)')\n    vimeo_download_by_channel_id(channel_id, output_dir, merge, info_only, **kwargs)", "language": "python", "code": "def vimeo_download_by_channel(url, output_dir='.', merge=False, info_only=False, **kwargs):\n    \"\"\"str->None\"\"\"\n    # https://vimeo.com/channels/464686\n    channel_id = match1(url, r'http://vimeo.com/channels/(\\w+)')\n    vimeo_download_by_channel_id(channel_id, output_dir, merge, info_only, **kwargs)", "code_tokens": ["def", "vimeo_download_by_channel", "(", "url", ",", "output_dir", "=", "'.'", ",", "merge", "=", "False", ",", "info_only", "=", "False", ",", "*", "*", "kwargs", ")", ":", "# https://vimeo.com/channels/464686", "channel_id", "=", "match1", "(", "url", ",", "r'http://vimeo.com/channels/(\\w+)'", ")", "vimeo_download_by_channel_id", "(", "channel_id", ",", "output_dir", ",", "merge", ",", "info_only", ",", "*", "*", "kwargs", ")"], "docstring": "str->None", "docstring_tokens": ["str", "-", ">", "None"], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/vimeo.py#L15-L19", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/ckplayer.py", "func_name": "ckplayer_get_info_by_xml", "original_string": "def ckplayer_get_info_by_xml(ckinfo):\n    \"\"\"str->dict\n    Information for CKPlayer API content.\"\"\"\n    e = ET.XML(ckinfo)\n    video_dict = {'title': '',\n                  #'duration': 0,\n                  'links': [],\n                  'size': 0,\n                  'flashvars': '',}\n    dictified = dictify(e)['ckplayer']\n    if 'info' in dictified:\n        if '_text' in dictified['info'][0]['title'][0]:  #title\n            video_dict['title'] = dictified['info'][0]['title'][0]['_text'].strip()\n\n    #if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip():  #duration\n        #video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip()\n\n    if '_text' in dictified['video'][0]['size'][0]:  #size exists for 1 piece\n        video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictified['video']])\n\n    if '_text' in dictified['video'][0]['file'][0]:  #link exist\n        video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictified['video']]\n\n    if '_text' in dictified['flashvars'][0]:\n        video_dict['flashvars'] = dictified['flashvars'][0]['_text'].strip()\n\n    return video_dict", "language": "python", "code": "def ckplayer_get_info_by_xml(ckinfo):\n    \"\"\"str->dict\n    Information for CKPlayer API content.\"\"\"\n    e = ET.XML(ckinfo)\n    video_dict = {'title': '',\n                  #'duration': 0,\n                  'links': [],\n                  'size': 0,\n                  'flashvars': '',}\n    dictified = dictify(e)['ckplayer']\n    if 'info' in dictified:\n        if '_text' in dictified['info'][0]['title'][0]:  #title\n            video_dict['title'] = dictified['info'][0]['title'][0]['_text'].strip()\n\n    #if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip():  #duration\n        #video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip()\n\n    if '_text' in dictified['video'][0]['size'][0]:  #size exists for 1 piece\n        video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictified['video']])\n\n    if '_text' in dictified['video'][0]['file'][0]:  #link exist\n        video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictified['video']]\n\n    if '_text' in dictified['flashvars'][0]:\n        video_dict['flashvars'] = dictified['flashvars'][0]['_text'].strip()\n\n    return video_dict", "code_tokens": ["def", "ckplayer_get_info_by_xml", "(", "ckinfo", ")", ":", "e", "=", "ET", ".", "XML", "(", "ckinfo", ")", "video_dict", "=", "{", "'title'", ":", "''", ",", "#'duration': 0,", "'links'", ":", "[", "]", ",", "'size'", ":", "0", ",", "'flashvars'", ":", "''", ",", "}", "dictified", "=", "dictify", "(", "e", ")", "[", "'ckplayer'", "]", "if", "'info'", "in", "dictified", ":", "if", "'_text'", "in", "dictified", "[", "'info'", "]", "[", "0", "]", "[", "'title'", "]", "[", "0", "]", ":", "#title", "video_dict", "[", "'title'", "]", "=", "dictified", "[", "'info'", "]", "[", "0", "]", "[", "'title'", "]", "[", "0", "]", "[", "'_text'", "]", ".", "strip", "(", ")", "#if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip():  #duration", "#video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip()", "if", "'_text'", "in", "dictified", "[", "'video'", "]", "[", "0", "]", "[", "'size'", "]", "[", "0", "]", ":", "#size exists for 1 piece", "video_dict", "[", "'size'", "]", "=", "sum", "(", "[", "int", "(", "i", "[", "'size'", "]", "[", "0", "]", "[", "'_text'", "]", ")", "for", "i", "in", "dictified", "[", "'video'", "]", "]", ")", "if", "'_text'", "in", "dictified", "[", "'video'", "]", "[", "0", "]", "[", "'file'", "]", "[", "0", "]", ":", "#link exist", "video_dict", "[", "'links'", "]", "=", "[", "i", "[", "'file'", "]", "[", "0", "]", "[", "'_text'", "]", ".", "strip", "(", ")", "for", "i", "in", "dictified", "[", "'video'", "]", "]", "if", "'_text'", "in", "dictified", "[", "'flashvars'", "]", "[", "0", "]", ":", "video_dict", "[", "'flashvars'", "]", "=", "dictified", "[", "'flashvars'", "]", "[", "0", "]", "[", "'_text'", "]", ".", "strip", "(", ")", "return", "video_dict"], "docstring": "str->dict\n    Information for CKPlayer API content.", "docstring_tokens": ["str", "-", ">", "dict", "Information", "for", "CKPlayer", "API", "content", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/ckplayer.py#L13-L39", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/ixigua.py", "func_name": "get_video_url_from_video_id", "original_string": "def get_video_url_from_video_id(video_id):\n    \"\"\"Splicing URLs according to video ID to get video details\"\"\"\n    # from js\n    data = [\"\"] * 256\n    for index, _ in enumerate(data):\n        t = index\n        for i in range(8):\n            t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1)\n        data[index] = t\n\n    def tmp():\n        rand_num = random.random()\n        path = \"/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}\".format(video_id=video_id,\n                                                                              random_num=str(rand_num)[2:])\n        e = o = r = -1\n        i, a = 0, len(path)\n        while i < a:\n            e = ord(path[i])\n            i += 1\n            if e < 128:\n                r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)]\n            else:\n                if e < 2048:\n                    r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))]\n                    r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]\n                else:\n                    if 55296 <= e < 57344:\n                        e = (1023 & e) + 64\n                        i += 1\n                        o = 1023 & t.url(i)\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))]\n                    else:\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]\n\n        return \"https://ib.365yg.com{path}&s={param}\".format(path=path, param=unsigned_right_shitf(r ^ -1, 0))\n\n    while 1:\n        url = tmp()\n        if url.split(\"=\")[-1][0] != \"-\":  # \u53c2\u6570s\u4e0d\u80fd\u4e3a\u8d1f\u6570\n            return url", "language": "python", "code": "def get_video_url_from_video_id(video_id):\n    \"\"\"Splicing URLs according to video ID to get video details\"\"\"\n    # from js\n    data = [\"\"] * 256\n    for index, _ in enumerate(data):\n        t = index\n        for i in range(8):\n            t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1)\n        data[index] = t\n\n    def tmp():\n        rand_num = random.random()\n        path = \"/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}\".format(video_id=video_id,\n                                                                              random_num=str(rand_num)[2:])\n        e = o = r = -1\n        i, a = 0, len(path)\n        while i < a:\n            e = ord(path[i])\n            i += 1\n            if e < 128:\n                r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)]\n            else:\n                if e < 2048:\n                    r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))]\n                    r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]\n                else:\n                    if 55296 <= e < 57344:\n                        e = (1023 & e) + 64\n                        i += 1\n                        o = 1023 & t.url(i)\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))]\n                    else:\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))]\n                        r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]\n\n        return \"https://ib.365yg.com{path}&s={param}\".format(path=path, param=unsigned_right_shitf(r ^ -1, 0))\n\n    while 1:\n        url = tmp()\n        if url.split(\"=\")[-1][0] != \"-\":  # \u53c2\u6570s\u4e0d\u80fd\u4e3a\u8d1f\u6570\n            return url", "code_tokens": ["def", "get_video_url_from_video_id", "(", "video_id", ")", ":", "# from js", "data", "=", "[", "\"\"", "]", "*", "256", "for", "index", ",", "_", "in", "enumerate", "(", "data", ")", ":", "t", "=", "index", "for", "i", "in", "range", "(", "8", ")", ":", "t", "=", "-", "306674912", "^", "unsigned_right_shitf", "(", "t", ",", "1", ")", "if", "1", "&", "t", "else", "unsigned_right_shitf", "(", "t", ",", "1", ")", "data", "[", "index", "]", "=", "t", "def", "tmp", "(", ")", ":", "rand_num", "=", "random", ".", "random", "(", ")", "path", "=", "\"/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}\"", ".", "format", "(", "video_id", "=", "video_id", ",", "random_num", "=", "str", "(", "rand_num", ")", "[", "2", ":", "]", ")", "e", "=", "o", "=", "r", "=", "-", "1", "i", ",", "a", "=", "0", ",", "len", "(", "path", ")", "while", "i", "<", "a", ":", "e", "=", "ord", "(", "path", "[", "i", "]", ")", "i", "+=", "1", "if", "e", "<", "128", ":", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "e", ")", "]", "else", ":", "if", "e", "<", "2048", ":", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "192", "|", "e", ">>", "6", "&", "31", ")", ")", "]", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "128", "|", "63", "&", "e", ")", ")", "]", "else", ":", "if", "55296", "<=", "e", "<", "57344", ":", "e", "=", "(", "1023", "&", "e", ")", "+", "64", "i", "+=", "1", "o", "=", "1023", "&", "t", ".", "url", "(", "i", ")", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "240", "|", "e", ">>", "8", "&", "7", ")", ")", "]", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "128", "|", "e", ">>", "2", "&", "63", ")", ")", "]", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "128", "|", "o", ">>", "6", "&", "15", "|", "(", "3", "&", "e", ")", "<<", "4", ")", ")", "]", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "128", "|", "63", "&", "o", ")", ")", "]", "else", ":", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "224", "|", "e", ">>", "12", "&", "15", ")", ")", "]", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "128", "|", "e", ">>", "6", "&", "63", ")", ")", "]", "r", "=", "unsigned_right_shitf", "(", "r", ",", "8", ")", "^", "data", "[", "255", "&", "(", "r", "^", "(", "128", "|", "63", "&", "e", ")", ")", "]", "return", "\"https://ib.365yg.com{path}&s={param}\"", ".", "format", "(", "path", "=", "path", ",", "param", "=", "unsigned_right_shitf", "(", "r", "^", "-", "1", ",", "0", ")", ")", "while", "1", ":", "url", "=", "tmp", "(", ")", "if", "url", ".", "split", "(", "\"=\"", ")", "[", "-", "1", "]", "[", "0", "]", "!=", "\"-\"", ":", "# \u53c2\u6570s\u4e0d\u80fd\u4e3a\u8d1f\u6570", "return", "url"], "docstring": "Splicing URLs according to video ID to get video details", "docstring_tokens": ["Splicing", "URLs", "according", "to", "video", "ID", "to", "get", "video", "details"], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/ixigua.py#L34-L78", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/mgtv.py", "func_name": "MGTV.get_mgtv_real_url", "original_string": "def get_mgtv_real_url(url):\n        \"\"\"str->list of str\n        Give you the real URLs.\"\"\"\n        content = loads(get_content(url))\n        m3u_url = content['info']\n        split = urlsplit(m3u_url)\n        \n        base_url = \"{scheme}://{netloc}{path}/\".format(scheme = split[0],\n                                                      netloc = split[1],\n                                                      path = dirname(split[2]))\n\n        content = get_content(content['info'])  #get the REAL M3U url, maybe to be changed later?\n        segment_list = []\n        segments_size = 0\n        for i in content.split():\n            if not i.startswith('#'):  #not the best way, better we use the m3u8 package\n                segment_list.append(base_url + i)\n            # use ext-info for fast size calculate\n            elif i.startswith('#EXT-MGTV-File-SIZE:'):\n                segments_size += int(i[i.rfind(':')+1:])\n\n        return m3u_url, segments_size, segment_list", "language": "python", "code": "def get_mgtv_real_url(url):\n        \"\"\"str->list of str\n        Give you the real URLs.\"\"\"\n        content = loads(get_content(url))\n        m3u_url = content['info']\n        split = urlsplit(m3u_url)\n        \n        base_url = \"{scheme}://{netloc}{path}/\".format(scheme = split[0],\n                                                      netloc = split[1],\n                                                      path = dirname(split[2]))\n\n        content = get_content(content['info'])  #get the REAL M3U url, maybe to be changed later?\n        segment_list = []\n        segments_size = 0\n        for i in content.split():\n            if not i.startswith('#'):  #not the best way, better we use the m3u8 package\n                segment_list.append(base_url + i)\n            # use ext-info for fast size calculate\n            elif i.startswith('#EXT-MGTV-File-SIZE:'):\n                segments_size += int(i[i.rfind(':')+1:])\n\n        return m3u_url, segments_size, segment_list", "code_tokens": ["def", "get_mgtv_real_url", "(", "url", ")", ":", "content", "=", "loads", "(", "get_content", "(", "url", ")", ")", "m3u_url", "=", "content", "[", "'info'", "]", "split", "=", "urlsplit", "(", "m3u_url", ")", "base_url", "=", "\"{scheme}://{netloc}{path}/\"", ".", "format", "(", "scheme", "=", "split", "[", "0", "]", ",", "netloc", "=", "split", "[", "1", "]", ",", "path", "=", "dirname", "(", "split", "[", "2", "]", ")", ")", "content", "=", "get_content", "(", "content", "[", "'info'", "]", ")", "#get the REAL M3U url, maybe to be changed later?", "segment_list", "=", "[", "]", "segments_size", "=", "0", "for", "i", "in", "content", ".", "split", "(", ")", ":", "if", "not", "i", ".", "startswith", "(", "'#'", ")", ":", "#not the best way, better we use the m3u8 package", "segment_list", ".", "append", "(", "base_url", "+", "i", ")", "# use ext-info for fast size calculate", "elif", "i", ".", "startswith", "(", "'#EXT-MGTV-File-SIZE:'", ")", ":", "segments_size", "+=", "int", "(", "i", "[", "i", ".", "rfind", "(", "':'", ")", "+", "1", ":", "]", ")", "return", "m3u_url", ",", "segments_size", ",", "segment_list"], "docstring": "str->list of str\n        Give you the real URLs.", "docstring_tokens": ["str", "-", ">", "list", "of", "str", "Give", "you", "the", "real", "URLs", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/mgtv.py#L37-L58", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/util/fs.py", "func_name": "legitimize", "original_string": "def legitimize(text, os=detect_os()):\n    \"\"\"Converts a string to a valid filename.\n    \"\"\"\n\n    # POSIX systems\n    text = text.translate({\n        0: None,\n        ord('/'): '-',\n        ord('|'): '-',\n    })\n\n    # FIXME: do some filesystem detection\n    if os == 'windows' or os == 'cygwin' or os == 'wsl':\n        # Windows (non-POSIX namespace)\n        text = text.translate({\n            # Reserved in Windows VFAT and NTFS\n            ord(':'): '-',\n            ord('*'): '-',\n            ord('?'): '-',\n            ord('\\\\'): '-',\n            ord('\\\"'): '\\'',\n            # Reserved in Windows VFAT\n            ord('+'): '-',\n            ord('<'): '-',\n            ord('>'): '-',\n            ord('['): '(',\n            ord(']'): ')',\n            ord('\\t'): ' ',\n        })\n    else:\n        # *nix\n        if os == 'mac':\n            # Mac OS HFS+\n            text = text.translate({\n                ord(':'): '-',\n            })\n\n        # Remove leading .\n        if text.startswith(\".\"):\n            text = text[1:]\n\n    text = text[:80] # Trim to 82 Unicode characters long\n    return text", "language": "python", "code": "def legitimize(text, os=detect_os()):\n    \"\"\"Converts a string to a valid filename.\n    \"\"\"\n\n    # POSIX systems\n    text = text.translate({\n        0: None,\n        ord('/'): '-',\n        ord('|'): '-',\n    })\n\n    # FIXME: do some filesystem detection\n    if os == 'windows' or os == 'cygwin' or os == 'wsl':\n        # Windows (non-POSIX namespace)\n        text = text.translate({\n            # Reserved in Windows VFAT and NTFS\n            ord(':'): '-',\n            ord('*'): '-',\n            ord('?'): '-',\n            ord('\\\\'): '-',\n            ord('\\\"'): '\\'',\n            # Reserved in Windows VFAT\n            ord('+'): '-',\n            ord('<'): '-',\n            ord('>'): '-',\n            ord('['): '(',\n            ord(']'): ')',\n            ord('\\t'): ' ',\n        })\n    else:\n        # *nix\n        if os == 'mac':\n            # Mac OS HFS+\n            text = text.translate({\n                ord(':'): '-',\n            })\n\n        # Remove leading .\n        if text.startswith(\".\"):\n            text = text[1:]\n\n    text = text[:80] # Trim to 82 Unicode characters long\n    return text", "code_tokens": ["def", "legitimize", "(", "text", ",", "os", "=", "detect_os", "(", ")", ")", ":", "# POSIX systems", "text", "=", "text", ".", "translate", "(", "{", "0", ":", "None", ",", "ord", "(", "'/'", ")", ":", "'-'", ",", "ord", "(", "'|'", ")", ":", "'-'", ",", "}", ")", "# FIXME: do some filesystem detection", "if", "os", "==", "'windows'", "or", "os", "==", "'cygwin'", "or", "os", "==", "'wsl'", ":", "# Windows (non-POSIX namespace)", "text", "=", "text", ".", "translate", "(", "{", "# Reserved in Windows VFAT and NTFS", "ord", "(", "':'", ")", ":", "'-'", ",", "ord", "(", "'*'", ")", ":", "'-'", ",", "ord", "(", "'?'", ")", ":", "'-'", ",", "ord", "(", "'\\\\'", ")", ":", "'-'", ",", "ord", "(", "'\\\"'", ")", ":", "'\\''", ",", "# Reserved in Windows VFAT", "ord", "(", "'+'", ")", ":", "'-'", ",", "ord", "(", "'<'", ")", ":", "'-'", ",", "ord", "(", "'>'", ")", ":", "'-'", ",", "ord", "(", "'['", ")", ":", "'('", ",", "ord", "(", "']'", ")", ":", "')'", ",", "ord", "(", "'\\t'", ")", ":", "' '", ",", "}", ")", "else", ":", "# *nix", "if", "os", "==", "'mac'", ":", "# Mac OS HFS+", "text", "=", "text", ".", "translate", "(", "{", "ord", "(", "':'", ")", ":", "'-'", ",", "}", ")", "# Remove leading .", "if", "text", ".", "startswith", "(", "\".\"", ")", ":", "text", "=", "text", "[", "1", ":", "]", "text", "=", "text", "[", ":", "80", "]", "# Trim to 82 Unicode characters long", "return", "text"], "docstring": "Converts a string to a valid filename.", "docstring_tokens": ["Converts", "a", "string", "to", "a", "valid", "filename", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/util/fs.py#L5-L47", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/cbs.py", "func_name": "cbs_download", "original_string": "def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"Downloads CBS videos by URL.\n    \"\"\"\n\n    html = get_content(url)\n    pid = match1(html, r'video\\.settings\\.pid\\s*=\\s*\\'([^\\']+)\\'')\n    title = match1(html, r'video\\.settings\\.title\\s*=\\s*\\\"([^\\\"]+)\\\"')\n\n    theplatform_download_by_pid(pid, title, output_dir=output_dir, merge=merge, info_only=info_only)", "language": "python", "code": "def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"Downloads CBS videos by URL.\n    \"\"\"\n\n    html = get_content(url)\n    pid = match1(html, r'video\\.settings\\.pid\\s*=\\s*\\'([^\\']+)\\'')\n    title = match1(html, r'video\\.settings\\.title\\s*=\\s*\\\"([^\\\"]+)\\\"')\n\n    theplatform_download_by_pid(pid, title, output_dir=output_dir, merge=merge, info_only=info_only)", "code_tokens": ["def", "cbs_download", "(", "url", ",", "output_dir", "=", "'.'", ",", "merge", "=", "True", ",", "info_only", "=", "False", ",", "*", "*", "kwargs", ")", ":", "html", "=", "get_content", "(", "url", ")", "pid", "=", "match1", "(", "html", ",", "r'video\\.settings\\.pid\\s*=\\s*\\'([^\\']+)\\''", ")", "title", "=", "match1", "(", "html", ",", "r'video\\.settings\\.title\\s*=\\s*\\\"([^\\\"]+)\\\"'", ")", "theplatform_download_by_pid", "(", "pid", ",", "title", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ")"], "docstring": "Downloads CBS videos by URL.", "docstring_tokens": ["Downloads", "CBS", "videos", "by", "URL", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/cbs.py#L9-L17", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/iqiyi.py", "func_name": "Iqiyi.download", "original_string": "def download(self, **kwargs):\n        \"\"\"Override the original one\n        Ugly ugly dirty hack\"\"\"\n        if 'json_output' in kwargs and kwargs['json_output']:\n            json_output.output(self)\n        elif 'info_only' in kwargs and kwargs['info_only']:\n            if 'stream_id' in kwargs and kwargs['stream_id']:\n                # Display the stream\n                stream_id = kwargs['stream_id']\n                if 'index' not in kwargs:\n                    self.p(stream_id)\n                else:\n                    self.p_i(stream_id)\n            else:\n                # Display all available streams\n                if 'index' not in kwargs:\n                    self.p([])\n                else:\n                    stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']\n                    self.p_i(stream_id)\n\n        else:\n            if 'stream_id' in kwargs and kwargs['stream_id']:\n                # Download the stream\n                stream_id = kwargs['stream_id']\n            else:\n                # Download stream with the best quality\n                stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']\n\n            if 'index' not in kwargs:\n                self.p(stream_id)\n            else:\n                self.p_i(stream_id)\n\n            if stream_id in self.streams:\n                urls = self.streams[stream_id]['src']\n                ext = self.streams[stream_id]['container']\n                total_size = self.streams[stream_id]['size']\n            else:\n                urls = self.dash_streams[stream_id]['src']\n                ext = self.dash_streams[stream_id]['container']\n                total_size = self.dash_streams[stream_id]['size']\n\n            if not urls:\n                log.wtf('[Failed] Cannot extract video source.')\n            # For legacy main()\n            \n            #Here's the change!!\n            download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False)\n\n            if not kwargs['caption']:\n                print('Skipping captions.')\n                return\n            for lang in self.caption_tracks:\n                filename = '%s.%s.srt' % (get_filename(self.title), lang)\n                print('Saving %s ... ' % filename, end=\"\", flush=True)\n                srt = self.caption_tracks[lang]\n                with open(os.path.join(kwargs['output_dir'], filename),\n                          'w', encoding='utf-8') as x:\n                    x.write(srt)\n                print('Done.')", "language": "python", "code": "def download(self, **kwargs):\n        \"\"\"Override the original one\n        Ugly ugly dirty hack\"\"\"\n        if 'json_output' in kwargs and kwargs['json_output']:\n            json_output.output(self)\n        elif 'info_only' in kwargs and kwargs['info_only']:\n            if 'stream_id' in kwargs and kwargs['stream_id']:\n                # Display the stream\n                stream_id = kwargs['stream_id']\n                if 'index' not in kwargs:\n                    self.p(stream_id)\n                else:\n                    self.p_i(stream_id)\n            else:\n                # Display all available streams\n                if 'index' not in kwargs:\n                    self.p([])\n                else:\n                    stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']\n                    self.p_i(stream_id)\n\n        else:\n            if 'stream_id' in kwargs and kwargs['stream_id']:\n                # Download the stream\n                stream_id = kwargs['stream_id']\n            else:\n                # Download stream with the best quality\n                stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']\n\n            if 'index' not in kwargs:\n                self.p(stream_id)\n            else:\n                self.p_i(stream_id)\n\n            if stream_id in self.streams:\n                urls = self.streams[stream_id]['src']\n                ext = self.streams[stream_id]['container']\n                total_size = self.streams[stream_id]['size']\n            else:\n                urls = self.dash_streams[stream_id]['src']\n                ext = self.dash_streams[stream_id]['container']\n                total_size = self.dash_streams[stream_id]['size']\n\n            if not urls:\n                log.wtf('[Failed] Cannot extract video source.')\n            # For legacy main()\n            \n            #Here's the change!!\n            download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False)\n\n            if not kwargs['caption']:\n                print('Skipping captions.')\n                return\n            for lang in self.caption_tracks:\n                filename = '%s.%s.srt' % (get_filename(self.title), lang)\n                print('Saving %s ... ' % filename, end=\"\", flush=True)\n                srt = self.caption_tracks[lang]\n                with open(os.path.join(kwargs['output_dir'], filename),\n                          'w', encoding='utf-8') as x:\n                    x.write(srt)\n                print('Done.')", "code_tokens": ["def", "download", "(", "self", ",", "*", "*", "kwargs", ")", ":", "if", "'json_output'", "in", "kwargs", "and", "kwargs", "[", "'json_output'", "]", ":", "json_output", ".", "output", "(", "self", ")", "elif", "'info_only'", "in", "kwargs", "and", "kwargs", "[", "'info_only'", "]", ":", "if", "'stream_id'", "in", "kwargs", "and", "kwargs", "[", "'stream_id'", "]", ":", "# Display the stream", "stream_id", "=", "kwargs", "[", "'stream_id'", "]", "if", "'index'", "not", "in", "kwargs", ":", "self", ".", "p", "(", "stream_id", ")", "else", ":", "self", ".", "p_i", "(", "stream_id", ")", "else", ":", "# Display all available streams", "if", "'index'", "not", "in", "kwargs", ":", "self", ".", "p", "(", "[", "]", ")", "else", ":", "stream_id", "=", "self", ".", "streams_sorted", "[", "0", "]", "[", "'id'", "]", "if", "'id'", "in", "self", ".", "streams_sorted", "[", "0", "]", "else", "self", ".", "streams_sorted", "[", "0", "]", "[", "'itag'", "]", "self", ".", "p_i", "(", "stream_id", ")", "else", ":", "if", "'stream_id'", "in", "kwargs", "and", "kwargs", "[", "'stream_id'", "]", ":", "# Download the stream", "stream_id", "=", "kwargs", "[", "'stream_id'", "]", "else", ":", "# Download stream with the best quality", "stream_id", "=", "self", ".", "streams_sorted", "[", "0", "]", "[", "'id'", "]", "if", "'id'", "in", "self", ".", "streams_sorted", "[", "0", "]", "else", "self", ".", "streams_sorted", "[", "0", "]", "[", "'itag'", "]", "if", "'index'", "not", "in", "kwargs", ":", "self", ".", "p", "(", "stream_id", ")", "else", ":", "self", ".", "p_i", "(", "stream_id", ")", "if", "stream_id", "in", "self", ".", "streams", ":", "urls", "=", "self", ".", "streams", "[", "stream_id", "]", "[", "'src'", "]", "ext", "=", "self", ".", "streams", "[", "stream_id", "]", "[", "'container'", "]", "total_size", "=", "self", ".", "streams", "[", "stream_id", "]", "[", "'size'", "]", "else", ":", "urls", "=", "self", ".", "dash_streams", "[", "stream_id", "]", "[", "'src'", "]", "ext", "=", "self", ".", "dash_streams", "[", "stream_id", "]", "[", "'container'", "]", "total_size", "=", "self", ".", "dash_streams", "[", "stream_id", "]", "[", "'size'", "]", "if", "not", "urls", ":", "log", ".", "wtf", "(", "'[Failed] Cannot extract video source.'", ")", "# For legacy main()", "#Here's the change!!", "download_url_ffmpeg", "(", "urls", "[", "0", "]", ",", "self", ".", "title", ",", "'mp4'", ",", "output_dir", "=", "kwargs", "[", "'output_dir'", "]", ",", "merge", "=", "kwargs", "[", "'merge'", "]", ",", "stream", "=", "False", ")", "if", "not", "kwargs", "[", "'caption'", "]", ":", "print", "(", "'Skipping captions.'", ")", "return", "for", "lang", "in", "self", ".", "caption_tracks", ":", "filename", "=", "'%s.%s.srt'", "%", "(", "get_filename", "(", "self", ".", "title", ")", ",", "lang", ")", "print", "(", "'Saving %s ... '", "%", "filename", ",", "end", "=", "\"\"", ",", "flush", "=", "True", ")", "srt", "=", "self", ".", "caption_tracks", "[", "lang", "]", "with", "open", "(", "os", ".", "path", ".", "join", "(", "kwargs", "[", "'output_dir'", "]", ",", "filename", ")", ",", "'w'", ",", "encoding", "=", "'utf-8'", ")", "as", "x", ":", "x", ".", "write", "(", "srt", ")", "print", "(", "'Done.'", ")"], "docstring": "Override the original one\n        Ugly ugly dirty hack", "docstring_tokens": ["Override", "the", "original", "one", "Ugly", "ugly", "dirty", "hack"], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/iqiyi.py#L158-L218", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/acfun.py", "func_name": "acfun_download_by_vid", "original_string": "def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"str, str, str, bool, bool ->None\n\n    Download Acfun video by vid.\n\n    Call Acfun API, decide which site to use, and pass the job to its\n    extractor.\n    \"\"\"\n\n    #first call the main parasing API\n    info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid))\n\n    sourceType = info['sourceType']\n\n    #decide sourceId to know which extractor to use\n    if 'sourceId' in info: sourceId = info['sourceId']\n    # danmakuId = info['danmakuId']\n\n    #call extractor decided by sourceId\n    if sourceType == 'sina':\n        sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)\n    elif sourceType == 'youku':\n        youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)\n    elif sourceType == 'tudou':\n        tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)\n    elif sourceType == 'qq':\n        qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only)\n    elif sourceType == 'letv':\n        letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)\n    elif sourceType == 'zhuzhan':\n        #As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this\n#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player\n#old code removed\n        url = 'http://www.acfun.cn/v/ac' + vid\n        yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url)\n        seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']\n        for t in seq:\n            if yk_streams.get(t):\n                preferred = yk_streams[t]\n                break\n#total_size in the json could be incorrect(F.I. 0)\n        size = 0\n        for url in preferred[0]:\n            _, _, seg_size = url_info(url)\n            size += seg_size\n#fallback to flvhd is not quite possible\n        if re.search(r'fid=[0-9A-Z\\-]*.flv', preferred[0][0]):\n            ext = 'flv'\n        else:\n            ext = 'mp4'\n        print_info(site_info, title, ext, size)\n        if not info_only:\n            download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)\n    else:\n        raise NotImplementedError(sourceType)\n\n    if not info_only and not dry_run:\n        if not kwargs['caption']:\n            print('Skipping danmaku.')\n            return\n        try:\n            title = get_filename(title)\n            print('Downloading %s ...\\n' % (title + '.cmt.json'))\n            cmt = get_srt_json(vid)\n            with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x:\n                x.write(cmt)\n        except:\n            pass", "language": "python", "code": "def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"str, str, str, bool, bool ->None\n\n    Download Acfun video by vid.\n\n    Call Acfun API, decide which site to use, and pass the job to its\n    extractor.\n    \"\"\"\n\n    #first call the main parasing API\n    info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid))\n\n    sourceType = info['sourceType']\n\n    #decide sourceId to know which extractor to use\n    if 'sourceId' in info: sourceId = info['sourceId']\n    # danmakuId = info['danmakuId']\n\n    #call extractor decided by sourceId\n    if sourceType == 'sina':\n        sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)\n    elif sourceType == 'youku':\n        youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)\n    elif sourceType == 'tudou':\n        tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)\n    elif sourceType == 'qq':\n        qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only)\n    elif sourceType == 'letv':\n        letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)\n    elif sourceType == 'zhuzhan':\n        #As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this\n#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player\n#old code removed\n        url = 'http://www.acfun.cn/v/ac' + vid\n        yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url)\n        seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']\n        for t in seq:\n            if yk_streams.get(t):\n                preferred = yk_streams[t]\n                break\n#total_size in the json could be incorrect(F.I. 0)\n        size = 0\n        for url in preferred[0]:\n            _, _, seg_size = url_info(url)\n            size += seg_size\n#fallback to flvhd is not quite possible\n        if re.search(r'fid=[0-9A-Z\\-]*.flv', preferred[0][0]):\n            ext = 'flv'\n        else:\n            ext = 'mp4'\n        print_info(site_info, title, ext, size)\n        if not info_only:\n            download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)\n    else:\n        raise NotImplementedError(sourceType)\n\n    if not info_only and not dry_run:\n        if not kwargs['caption']:\n            print('Skipping danmaku.')\n            return\n        try:\n            title = get_filename(title)\n            print('Downloading %s ...\\n' % (title + '.cmt.json'))\n            cmt = get_srt_json(vid)\n            with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x:\n                x.write(cmt)\n        except:\n            pass", "code_tokens": ["def", "acfun_download_by_vid", "(", "vid", ",", "title", ",", "output_dir", "=", "'.'", ",", "merge", "=", "True", ",", "info_only", "=", "False", ",", "*", "*", "kwargs", ")", ":", "#first call the main parasing API", "info", "=", "json", ".", "loads", "(", "get_content", "(", "'http://www.acfun.cn/video/getVideo.aspx?id='", "+", "vid", ")", ")", "sourceType", "=", "info", "[", "'sourceType'", "]", "#decide sourceId to know which extractor to use", "if", "'sourceId'", "in", "info", ":", "sourceId", "=", "info", "[", "'sourceId'", "]", "# danmakuId = info['danmakuId']", "#call extractor decided by sourceId", "if", "sourceType", "==", "'sina'", ":", "sina_download_by_vid", "(", "sourceId", ",", "title", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ")", "elif", "sourceType", "==", "'youku'", ":", "youku_download_by_vid", "(", "sourceId", ",", "title", "=", "title", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ",", "*", "*", "kwargs", ")", "elif", "sourceType", "==", "'tudou'", ":", "tudou_download_by_iid", "(", "sourceId", ",", "title", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ")", "elif", "sourceType", "==", "'qq'", ":", "qq_download_by_vid", "(", "sourceId", ",", "title", ",", "True", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ")", "elif", "sourceType", "==", "'letv'", ":", "letvcloud_download_by_vu", "(", "sourceId", ",", "'2d8c027396'", ",", "title", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ")", "elif", "sourceType", "==", "'zhuzhan'", ":", "#As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this", "#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player", "#old code removed", "url", "=", "'http://www.acfun.cn/v/ac'", "+", "vid", "yk_streams", "=", "youku_acfun_proxy", "(", "info", "[", "'sourceId'", "]", ",", "info", "[", "'encode'", "]", ",", "url", ")", "seq", "=", "[", "'mp4hd3'", ",", "'mp4hd2'", ",", "'mp4hd'", ",", "'flvhd'", "]", "for", "t", "in", "seq", ":", "if", "yk_streams", ".", "get", "(", "t", ")", ":", "preferred", "=", "yk_streams", "[", "t", "]", "break", "#total_size in the json could be incorrect(F.I. 0)", "size", "=", "0", "for", "url", "in", "preferred", "[", "0", "]", ":", "_", ",", "_", ",", "seg_size", "=", "url_info", "(", "url", ")", "size", "+=", "seg_size", "#fallback to flvhd is not quite possible", "if", "re", ".", "search", "(", "r'fid=[0-9A-Z\\-]*.flv'", ",", "preferred", "[", "0", "]", "[", "0", "]", ")", ":", "ext", "=", "'flv'", "else", ":", "ext", "=", "'mp4'", "print_info", "(", "site_info", ",", "title", ",", "ext", ",", "size", ")", "if", "not", "info_only", ":", "download_urls", "(", "preferred", "[", "0", "]", ",", "title", ",", "ext", ",", "size", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ")", "else", ":", "raise", "NotImplementedError", "(", "sourceType", ")", "if", "not", "info_only", "and", "not", "dry_run", ":", "if", "not", "kwargs", "[", "'caption'", "]", ":", "print", "(", "'Skipping danmaku.'", ")", "return", "try", ":", "title", "=", "get_filename", "(", "title", ")", "print", "(", "'Downloading %s ...\\n'", "%", "(", "title", "+", "'.cmt.json'", ")", ")", "cmt", "=", "get_srt_json", "(", "vid", ")", "with", "open", "(", "os", ".", "path", ".", "join", "(", "output_dir", ",", "title", "+", "'.cmt.json'", ")", ",", "'w'", ",", "encoding", "=", "'utf-8'", ")", "as", "x", ":", "x", ".", "write", "(", "cmt", ")", "except", ":", "pass"], "docstring": "str, str, str, bool, bool ->None\n\n    Download Acfun video by vid.\n\n    Call Acfun API, decide which site to use, and pass the job to its\n    extractor.", "docstring_tokens": ["str", "str", "str", "bool", "bool", "-", ">", "None"], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/acfun.py#L42-L109", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/common.py", "func_name": "matchall", "original_string": "def matchall(text, patterns):\n    \"\"\"Scans through a string for substrings matched some patterns.\n\n    Args:\n        text: A string to be scanned.\n        patterns: a list of regex pattern.\n\n    Returns:\n        a list if matched. empty if not.\n    \"\"\"\n\n    ret = []\n    for pattern in patterns:\n        match = re.findall(pattern, text)\n        ret += match\n\n    return ret", "language": "python", "code": "def matchall(text, patterns):\n    \"\"\"Scans through a string for substrings matched some patterns.\n\n    Args:\n        text: A string to be scanned.\n        patterns: a list of regex pattern.\n\n    Returns:\n        a list if matched. empty if not.\n    \"\"\"\n\n    ret = []\n    for pattern in patterns:\n        match = re.findall(pattern, text)\n        ret += match\n\n    return ret", "code_tokens": ["def", "matchall", "(", "text", ",", "patterns", ")", ":", "ret", "=", "[", "]", "for", "pattern", "in", "patterns", ":", "match", "=", "re", ".", "findall", "(", "pattern", ",", "text", ")", "ret", "+=", "match", "return", "ret"], "docstring": "Scans through a string for substrings matched some patterns.\n\n    Args:\n        text: A string to be scanned.\n        patterns: a list of regex pattern.\n\n    Returns:\n        a list if matched. empty if not.", "docstring_tokens": ["Scans", "through", "a", "string", "for", "substrings", "matched", "some", "patterns", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/common.py#L252-L268", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/common.py", "func_name": "parse_query_param", "original_string": "def parse_query_param(url, param):\n    \"\"\"Parses the query string of a URL and returns the value of a parameter.\n\n    Args:\n        url: A URL.\n        param: A string representing the name of the parameter.\n\n    Returns:\n        The value of the parameter.\n    \"\"\"\n\n    try:\n        return parse.parse_qs(parse.urlparse(url).query)[param][0]\n    except:\n        return None", "language": "python", "code": "def parse_query_param(url, param):\n    \"\"\"Parses the query string of a URL and returns the value of a parameter.\n\n    Args:\n        url: A URL.\n        param: A string representing the name of the parameter.\n\n    Returns:\n        The value of the parameter.\n    \"\"\"\n\n    try:\n        return parse.parse_qs(parse.urlparse(url).query)[param][0]\n    except:\n        return None", "code_tokens": ["def", "parse_query_param", "(", "url", ",", "param", ")", ":", "try", ":", "return", "parse", ".", "parse_qs", "(", "parse", ".", "urlparse", "(", "url", ")", ".", "query", ")", "[", "param", "]", "[", "0", "]", "except", ":", "return", "None"], "docstring": "Parses the query string of a URL and returns the value of a parameter.\n\n    Args:\n        url: A URL.\n        param: A string representing the name of the parameter.\n\n    Returns:\n        The value of the parameter.", "docstring_tokens": ["Parses", "the", "query", "string", "of", "a", "URL", "and", "returns", "the", "value", "of", "a", "parameter", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/common.py#L285-L299", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/common.py", "func_name": "get_content", "original_string": "def get_content(url, headers={}, decoded=True):\n    \"\"\"Gets the content of a URL via sending a HTTP GET request.\n\n    Args:\n        url: A URL.\n        headers: Request headers used by the client.\n        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.\n\n    Returns:\n        The content as a string.\n    \"\"\"\n\n    logging.debug('get_content: %s' % url)\n\n    req = request.Request(url, headers=headers)\n    if cookies:\n        cookies.add_cookie_header(req)\n        req.headers.update(req.unredirected_hdrs)\n\n    response = urlopen_with_retry(req)\n    data = response.read()\n\n    # Handle HTTP compression for gzip and deflate (zlib)\n    content_encoding = response.getheader('Content-Encoding')\n    if content_encoding == 'gzip':\n        data = ungzip(data)\n    elif content_encoding == 'deflate':\n        data = undeflate(data)\n\n    # Decode the response body\n    if decoded:\n        charset = match1(\n            response.getheader('Content-Type', ''), r'charset=([\\w-]+)'\n        )\n        if charset is not None:\n            data = data.decode(charset, 'ignore')\n        else:\n            data = data.decode('utf-8', 'ignore')\n\n    return data", "language": "python", "code": "def get_content(url, headers={}, decoded=True):\n    \"\"\"Gets the content of a URL via sending a HTTP GET request.\n\n    Args:\n        url: A URL.\n        headers: Request headers used by the client.\n        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.\n\n    Returns:\n        The content as a string.\n    \"\"\"\n\n    logging.debug('get_content: %s' % url)\n\n    req = request.Request(url, headers=headers)\n    if cookies:\n        cookies.add_cookie_header(req)\n        req.headers.update(req.unredirected_hdrs)\n\n    response = urlopen_with_retry(req)\n    data = response.read()\n\n    # Handle HTTP compression for gzip and deflate (zlib)\n    content_encoding = response.getheader('Content-Encoding')\n    if content_encoding == 'gzip':\n        data = ungzip(data)\n    elif content_encoding == 'deflate':\n        data = undeflate(data)\n\n    # Decode the response body\n    if decoded:\n        charset = match1(\n            response.getheader('Content-Type', ''), r'charset=([\\w-]+)'\n        )\n        if charset is not None:\n            data = data.decode(charset, 'ignore')\n        else:\n            data = data.decode('utf-8', 'ignore')\n\n    return data", "code_tokens": ["def", "get_content", "(", "url", ",", "headers", "=", "{", "}", ",", "decoded", "=", "True", ")", ":", "logging", ".", "debug", "(", "'get_content: %s'", "%", "url", ")", "req", "=", "request", ".", "Request", "(", "url", ",", "headers", "=", "headers", ")", "if", "cookies", ":", "cookies", ".", "add_cookie_header", "(", "req", ")", "req", ".", "headers", ".", "update", "(", "req", ".", "unredirected_hdrs", ")", "response", "=", "urlopen_with_retry", "(", "req", ")", "data", "=", "response", ".", "read", "(", ")", "# Handle HTTP compression for gzip and deflate (zlib)", "content_encoding", "=", "response", ".", "getheader", "(", "'Content-Encoding'", ")", "if", "content_encoding", "==", "'gzip'", ":", "data", "=", "ungzip", "(", "data", ")", "elif", "content_encoding", "==", "'deflate'", ":", "data", "=", "undeflate", "(", "data", ")", "# Decode the response body", "if", "decoded", ":", "charset", "=", "match1", "(", "response", ".", "getheader", "(", "'Content-Type'", ",", "''", ")", ",", "r'charset=([\\w-]+)'", ")", "if", "charset", "is", "not", "None", ":", "data", "=", "data", ".", "decode", "(", "charset", ",", "'ignore'", ")", "else", ":", "data", "=", "data", ".", "decode", "(", "'utf-8'", ",", "'ignore'", ")", "return", "data"], "docstring": "Gets the content of a URL via sending a HTTP GET request.\n\n    Args:\n        url: A URL.\n        headers: Request headers used by the client.\n        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.\n\n    Returns:\n        The content as a string.", "docstring_tokens": ["Gets", "the", "content", "of", "a", "URL", "via", "sending", "a", "HTTP", "GET", "request", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/common.py#L415-L454", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/common.py", "func_name": "post_content", "original_string": "def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):\n    \"\"\"Post the content of a URL via sending a HTTP POST request.\n\n    Args:\n        url: A URL.\n        headers: Request headers used by the client.\n        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.\n\n    Returns:\n        The content as a string.\n    \"\"\"\n    if kwargs.get('post_data_raw'):\n        logging.debug('post_content: %s\\npost_data_raw: %s' % (url, kwargs['post_data_raw']))\n    else:\n        logging.debug('post_content: %s\\npost_data: %s' % (url, post_data))\n\n    req = request.Request(url, headers=headers)\n    if cookies:\n        cookies.add_cookie_header(req)\n        req.headers.update(req.unredirected_hdrs)\n    if kwargs.get('post_data_raw'):\n        post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')\n    else:\n        post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')\n    response = urlopen_with_retry(req, data=post_data_enc)\n    data = response.read()\n\n    # Handle HTTP compression for gzip and deflate (zlib)\n    content_encoding = response.getheader('Content-Encoding')\n    if content_encoding == 'gzip':\n        data = ungzip(data)\n    elif content_encoding == 'deflate':\n        data = undeflate(data)\n\n    # Decode the response body\n    if decoded:\n        charset = match1(\n            response.getheader('Content-Type'), r'charset=([\\w-]+)'\n        )\n        if charset is not None:\n            data = data.decode(charset)\n        else:\n            data = data.decode('utf-8')\n\n    return data", "language": "python", "code": "def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):\n    \"\"\"Post the content of a URL via sending a HTTP POST request.\n\n    Args:\n        url: A URL.\n        headers: Request headers used by the client.\n        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.\n\n    Returns:\n        The content as a string.\n    \"\"\"\n    if kwargs.get('post_data_raw'):\n        logging.debug('post_content: %s\\npost_data_raw: %s' % (url, kwargs['post_data_raw']))\n    else:\n        logging.debug('post_content: %s\\npost_data: %s' % (url, post_data))\n\n    req = request.Request(url, headers=headers)\n    if cookies:\n        cookies.add_cookie_header(req)\n        req.headers.update(req.unredirected_hdrs)\n    if kwargs.get('post_data_raw'):\n        post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')\n    else:\n        post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')\n    response = urlopen_with_retry(req, data=post_data_enc)\n    data = response.read()\n\n    # Handle HTTP compression for gzip and deflate (zlib)\n    content_encoding = response.getheader('Content-Encoding')\n    if content_encoding == 'gzip':\n        data = ungzip(data)\n    elif content_encoding == 'deflate':\n        data = undeflate(data)\n\n    # Decode the response body\n    if decoded:\n        charset = match1(\n            response.getheader('Content-Type'), r'charset=([\\w-]+)'\n        )\n        if charset is not None:\n            data = data.decode(charset)\n        else:\n            data = data.decode('utf-8')\n\n    return data", "code_tokens": ["def", "post_content", "(", "url", ",", "headers", "=", "{", "}", ",", "post_data", "=", "{", "}", ",", "decoded", "=", "True", ",", "*", "*", "kwargs", ")", ":", "if", "kwargs", ".", "get", "(", "'post_data_raw'", ")", ":", "logging", ".", "debug", "(", "'post_content: %s\\npost_data_raw: %s'", "%", "(", "url", ",", "kwargs", "[", "'post_data_raw'", "]", ")", ")", "else", ":", "logging", ".", "debug", "(", "'post_content: %s\\npost_data: %s'", "%", "(", "url", ",", "post_data", ")", ")", "req", "=", "request", ".", "Request", "(", "url", ",", "headers", "=", "headers", ")", "if", "cookies", ":", "cookies", ".", "add_cookie_header", "(", "req", ")", "req", ".", "headers", ".", "update", "(", "req", ".", "unredirected_hdrs", ")", "if", "kwargs", ".", "get", "(", "'post_data_raw'", ")", ":", "post_data_enc", "=", "bytes", "(", "kwargs", "[", "'post_data_raw'", "]", ",", "'utf-8'", ")", "else", ":", "post_data_enc", "=", "bytes", "(", "parse", ".", "urlencode", "(", "post_data", ")", ",", "'utf-8'", ")", "response", "=", "urlopen_with_retry", "(", "req", ",", "data", "=", "post_data_enc", ")", "data", "=", "response", ".", "read", "(", ")", "# Handle HTTP compression for gzip and deflate (zlib)", "content_encoding", "=", "response", ".", "getheader", "(", "'Content-Encoding'", ")", "if", "content_encoding", "==", "'gzip'", ":", "data", "=", "ungzip", "(", "data", ")", "elif", "content_encoding", "==", "'deflate'", ":", "data", "=", "undeflate", "(", "data", ")", "# Decode the response body", "if", "decoded", ":", "charset", "=", "match1", "(", "response", ".", "getheader", "(", "'Content-Type'", ")", ",", "r'charset=([\\w-]+)'", ")", "if", "charset", "is", "not", "None", ":", "data", "=", "data", ".", "decode", "(", "charset", ")", "else", ":", "data", "=", "data", ".", "decode", "(", "'utf-8'", ")", "return", "data"], "docstring": "Post the content of a URL via sending a HTTP POST request.\n\n    Args:\n        url: A URL.\n        headers: Request headers used by the client.\n        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.\n\n    Returns:\n        The content as a string.", "docstring_tokens": ["Post", "the", "content", "of", "a", "URL", "via", "sending", "a", "HTTP", "POST", "request", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/common.py#L457-L501", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/common.py", "func_name": "parse_host", "original_string": "def parse_host(host):\n    \"\"\"Parses host name and port number from a string.\n    \"\"\"\n    if re.match(r'^(\\d+)$', host) is not None:\n        return (\"0.0.0.0\", int(host))\n    if re.match(r'^(\\w+)://', host) is None:\n        host = \"//\" + host\n    o = parse.urlparse(host)\n    hostname = o.hostname or \"0.0.0.0\"\n    port = o.port or 0\n    return (hostname, port)", "language": "python", "code": "def parse_host(host):\n    \"\"\"Parses host name and port number from a string.\n    \"\"\"\n    if re.match(r'^(\\d+)$', host) is not None:\n        return (\"0.0.0.0\", int(host))\n    if re.match(r'^(\\w+)://', host) is None:\n        host = \"//\" + host\n    o = parse.urlparse(host)\n    hostname = o.hostname or \"0.0.0.0\"\n    port = o.port or 0\n    return (hostname, port)", "code_tokens": ["def", "parse_host", "(", "host", ")", ":", "if", "re", ".", "match", "(", "r'^(\\d+)$'", ",", "host", ")", "is", "not", "None", ":", "return", "(", "\"0.0.0.0\"", ",", "int", "(", "host", ")", ")", "if", "re", ".", "match", "(", "r'^(\\w+)://'", ",", "host", ")", "is", "None", ":", "host", "=", "\"//\"", "+", "host", "o", "=", "parse", ".", "urlparse", "(", "host", ")", "hostname", "=", "o", ".", "hostname", "or", "\"0.0.0.0\"", "port", "=", "o", ".", "port", "or", "0", "return", "(", "hostname", ",", "port", ")"], "docstring": "Parses host name and port number from a string.", "docstring_tokens": ["Parses", "host", "name", "and", "port", "number", "from", "a", "string", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/common.py#L1216-L1226", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/showroom.py", "func_name": "showroom_get_roomid_by_room_url_key", "original_string": "def showroom_get_roomid_by_room_url_key(room_url_key):\n    \"\"\"str->str\"\"\"\n    fake_headers_mobile = {\n        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',\n        'Accept-Charset': 'UTF-8,*;q=0.5',\n        'Accept-Encoding': 'gzip,deflate,sdch',\n        'Accept-Language': 'en-US,en;q=0.8',\n        'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'\n    }\n    webpage_url = 'https://www.showroom-live.com/' + room_url_key\n    html = get_content(webpage_url, headers = fake_headers_mobile)\n    roomid = match1(html, r'room\\?room_id\\=(\\d+)')\n    assert roomid\n    return roomid", "language": "python", "code": "def showroom_get_roomid_by_room_url_key(room_url_key):\n    \"\"\"str->str\"\"\"\n    fake_headers_mobile = {\n        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',\n        'Accept-Charset': 'UTF-8,*;q=0.5',\n        'Accept-Encoding': 'gzip,deflate,sdch',\n        'Accept-Language': 'en-US,en;q=0.8',\n        'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'\n    }\n    webpage_url = 'https://www.showroom-live.com/' + room_url_key\n    html = get_content(webpage_url, headers = fake_headers_mobile)\n    roomid = match1(html, r'room\\?room_id\\=(\\d+)')\n    assert roomid\n    return roomid", "code_tokens": ["def", "showroom_get_roomid_by_room_url_key", "(", "room_url_key", ")", ":", "fake_headers_mobile", "=", "{", "'Accept'", ":", "'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'", ",", "'Accept-Charset'", ":", "'UTF-8,*;q=0.5'", ",", "'Accept-Encoding'", ":", "'gzip,deflate,sdch'", ",", "'Accept-Language'", ":", "'en-US,en;q=0.8'", ",", "'User-Agent'", ":", "'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'", "}", "webpage_url", "=", "'https://www.showroom-live.com/'", "+", "room_url_key", "html", "=", "get_content", "(", "webpage_url", ",", "headers", "=", "fake_headers_mobile", ")", "roomid", "=", "match1", "(", "html", ",", "r'room\\?room_id\\=(\\d+)'", ")", "assert", "roomid", "return", "roomid"], "docstring": "str->str", "docstring_tokens": ["str", "-", ">", "str"], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/showroom.py#L11-L24", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/wanmen.py", "func_name": "_wanmen_get_title_by_json_topic_part", "original_string": "def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex):\n    \"\"\"JSON, int, int, int->str\n    \n    Get a proper title with courseid+topicID+partID.\"\"\"\n\n    return '_'.join([json_content[0]['name'],\n                    json_content[0]['Topics'][tIndex]['name'],\n                    json_content[0]['Topics'][tIndex]['Parts'][pIndex]['name']])", "language": "python", "code": "def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex):\n    \"\"\"JSON, int, int, int->str\n    \n    Get a proper title with courseid+topicID+partID.\"\"\"\n\n    return '_'.join([json_content[0]['name'],\n                    json_content[0]['Topics'][tIndex]['name'],\n                    json_content[0]['Topics'][tIndex]['Parts'][pIndex]['name']])", "code_tokens": ["def", "_wanmen_get_title_by_json_topic_part", "(", "json_content", ",", "tIndex", ",", "pIndex", ")", ":", "return", "'_'", ".", "join", "(", "[", "json_content", "[", "0", "]", "[", "'name'", "]", ",", "json_content", "[", "0", "]", "[", "'Topics'", "]", "[", "tIndex", "]", "[", "'name'", "]", ",", "json_content", "[", "0", "]", "[", "'Topics'", "]", "[", "tIndex", "]", "[", "'Parts'", "]", "[", "pIndex", "]", "[", "'name'", "]", "]", ")"], "docstring": "JSON, int, int, int->str\n    \n    Get a proper title with courseid+topicID+partID.", "docstring_tokens": ["JSON", "int", "int", "int", "-", ">", "str", "Get", "a", "proper", "title", "with", "courseid", "+", "topicID", "+", "partID", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/wanmen.py#L18-L25", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/wanmen.py", "func_name": "wanmen_download_by_course", "original_string": "def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"int->None\n    \n    Download a WHOLE course.\n    Reuse the API call to save time.\"\"\"\n\n    for tIndex in range(len(json_api_content[0]['Topics'])):\n        for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])):\n            wanmen_download_by_course_topic_part(json_api_content,\n                                                 tIndex,\n                                                 pIndex,\n                                                 output_dir=output_dir,\n                                                 merge=merge,\n                                                 info_only=info_only,\n                                                 **kwargs)", "language": "python", "code": "def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"int->None\n    \n    Download a WHOLE course.\n    Reuse the API call to save time.\"\"\"\n\n    for tIndex in range(len(json_api_content[0]['Topics'])):\n        for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])):\n            wanmen_download_by_course_topic_part(json_api_content,\n                                                 tIndex,\n                                                 pIndex,\n                                                 output_dir=output_dir,\n                                                 merge=merge,\n                                                 info_only=info_only,\n                                                 **kwargs)", "code_tokens": ["def", "wanmen_download_by_course", "(", "json_api_content", ",", "output_dir", "=", "'.'", ",", "merge", "=", "True", ",", "info_only", "=", "False", ",", "*", "*", "kwargs", ")", ":", "for", "tIndex", "in", "range", "(", "len", "(", "json_api_content", "[", "0", "]", "[", "'Topics'", "]", ")", ")", ":", "for", "pIndex", "in", "range", "(", "len", "(", "json_api_content", "[", "0", "]", "[", "'Topics'", "]", "[", "tIndex", "]", "[", "'Parts'", "]", ")", ")", ":", "wanmen_download_by_course_topic_part", "(", "json_api_content", ",", "tIndex", ",", "pIndex", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ",", "*", "*", "kwargs", ")"], "docstring": "int->None\n    \n    Download a WHOLE course.\n    Reuse the API call to save time.", "docstring_tokens": ["int", "-", ">", "None", "Download", "a", "WHOLE", "course", ".", "Reuse", "the", "API", "call", "to", "save", "time", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/wanmen.py#L37-L51", "partition": "test"}
{"repo": "soimort/you-get", "path": "src/you_get/extractors/wanmen.py", "func_name": "wanmen_download_by_course_topic_part", "original_string": "def wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"int, int, int->None\n    \n    Download ONE PART of the course.\"\"\"\n\n    html = json_api_content\n\n    title = _wanmen_get_title_by_json_topic_part(html, \n                                                  tIndex, \n                                                  pIndex)\n\n    bokeccID = _wanmen_get_boke_id_by_json_topic_part(html,\n                                                      tIndex, \n                                                     pIndex)\n\n    bokecc_download_by_id(vid = bokeccID, title = title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)", "language": "python", "code": "def wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, output_dir='.', merge=True, info_only=False, **kwargs):\n    \"\"\"int, int, int->None\n    \n    Download ONE PART of the course.\"\"\"\n\n    html = json_api_content\n\n    title = _wanmen_get_title_by_json_topic_part(html, \n                                                  tIndex, \n                                                  pIndex)\n\n    bokeccID = _wanmen_get_boke_id_by_json_topic_part(html,\n                                                      tIndex, \n                                                     pIndex)\n\n    bokecc_download_by_id(vid = bokeccID, title = title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)", "code_tokens": ["def", "wanmen_download_by_course_topic_part", "(", "json_api_content", ",", "tIndex", ",", "pIndex", ",", "output_dir", "=", "'.'", ",", "merge", "=", "True", ",", "info_only", "=", "False", ",", "*", "*", "kwargs", ")", ":", "html", "=", "json_api_content", "title", "=", "_wanmen_get_title_by_json_topic_part", "(", "html", ",", "tIndex", ",", "pIndex", ")", "bokeccID", "=", "_wanmen_get_boke_id_by_json_topic_part", "(", "html", ",", "tIndex", ",", "pIndex", ")", "bokecc_download_by_id", "(", "vid", "=", "bokeccID", ",", "title", "=", "title", ",", "output_dir", "=", "output_dir", ",", "merge", "=", "merge", ",", "info_only", "=", "info_only", ",", "*", "*", "kwargs", ")"], "docstring": "int, int, int->None\n    \n    Download ONE PART of the course.", "docstring_tokens": ["int", "int", "int", "-", ">", "None", "Download", "ONE", "PART", "of", "the", "course", "."], "sha": "b746ac01c9f39de94cac2d56f665285b0523b974", "url": "https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/wanmen.py#L69-L84", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/executors/base_executor.py", "func_name": "BaseExecutor.has_task", "original_string": "def has_task(self, task_instance):\n        \"\"\"\n        Checks if a task is either queued or running in this executor\n\n        :param task_instance: TaskInstance\n        :return: True if the task is known to this executor\n        \"\"\"\n        if task_instance.key in self.queued_tasks or task_instance.key in self.running:\n            return True", "language": "python", "code": "def has_task(self, task_instance):\n        \"\"\"\n        Checks if a task is either queued or running in this executor\n\n        :param task_instance: TaskInstance\n        :return: True if the task is known to this executor\n        \"\"\"\n        if task_instance.key in self.queued_tasks or task_instance.key in self.running:\n            return True", "code_tokens": ["def", "has_task", "(", "self", ",", "task_instance", ")", ":", "if", "task_instance", ".", "key", "in", "self", ".", "queued_tasks", "or", "task_instance", ".", "key", "in", "self", ".", "running", ":", "return", "True"], "docstring": "Checks if a task is either queued or running in this executor\n\n        :param task_instance: TaskInstance\n        :return: True if the task is known to this executor", "docstring_tokens": ["Checks", "if", "a", "task", "is", "either", "queued", "or", "running", "in", "this", "executor"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/executors/base_executor.py#L97-L105", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/executors/base_executor.py", "func_name": "BaseExecutor.get_event_buffer", "original_string": "def get_event_buffer(self, dag_ids=None):\n        \"\"\"\n        Returns and flush the event buffer. In case dag_ids is specified\n        it will only return and flush events for the given dag_ids. Otherwise\n        it returns and flushes all\n\n        :param dag_ids: to dag_ids to return events for, if None returns all\n        :return: a dict of events\n        \"\"\"\n        cleared_events = dict()\n        if dag_ids is None:\n            cleared_events = self.event_buffer\n            self.event_buffer = dict()\n        else:\n            for key in list(self.event_buffer.keys()):\n                dag_id, _, _, _ = key\n                if dag_id in dag_ids:\n                    cleared_events[key] = self.event_buffer.pop(key)\n\n        return cleared_events", "language": "python", "code": "def get_event_buffer(self, dag_ids=None):\n        \"\"\"\n        Returns and flush the event buffer. In case dag_ids is specified\n        it will only return and flush events for the given dag_ids. Otherwise\n        it returns and flushes all\n\n        :param dag_ids: to dag_ids to return events for, if None returns all\n        :return: a dict of events\n        \"\"\"\n        cleared_events = dict()\n        if dag_ids is None:\n            cleared_events = self.event_buffer\n            self.event_buffer = dict()\n        else:\n            for key in list(self.event_buffer.keys()):\n                dag_id, _, _, _ = key\n                if dag_id in dag_ids:\n                    cleared_events[key] = self.event_buffer.pop(key)\n\n        return cleared_events", "code_tokens": ["def", "get_event_buffer", "(", "self", ",", "dag_ids", "=", "None", ")", ":", "cleared_events", "=", "dict", "(", ")", "if", "dag_ids", "is", "None", ":", "cleared_events", "=", "self", ".", "event_buffer", "self", ".", "event_buffer", "=", "dict", "(", ")", "else", ":", "for", "key", "in", "list", "(", "self", ".", "event_buffer", ".", "keys", "(", ")", ")", ":", "dag_id", ",", "_", ",", "_", ",", "_", "=", "key", "if", "dag_id", "in", "dag_ids", ":", "cleared_events", "[", "key", "]", "=", "self", ".", "event_buffer", ".", "pop", "(", "key", ")", "return", "cleared_events"], "docstring": "Returns and flush the event buffer. In case dag_ids is specified\n        it will only return and flush events for the given dag_ids. Otherwise\n        it returns and flushes all\n\n        :param dag_ids: to dag_ids to return events for, if None returns all\n        :return: a dict of events", "docstring_tokens": ["Returns", "and", "flush", "the", "event", "buffer", ".", "In", "case", "dag_ids", "is", "specified", "it", "will", "only", "return", "and", "flush", "events", "for", "the", "given", "dag_ids", ".", "Otherwise", "it", "returns", "and", "flushes", "all"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/executors/base_executor.py#L160-L179", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/snowflake_hook.py", "func_name": "SnowflakeHook.get_conn", "original_string": "def get_conn(self):\n        \"\"\"\n        Returns a snowflake.connection object\n        \"\"\"\n        conn_config = self._get_conn_params()\n        conn = snowflake.connector.connect(**conn_config)\n        return conn", "language": "python", "code": "def get_conn(self):\n        \"\"\"\n        Returns a snowflake.connection object\n        \"\"\"\n        conn_config = self._get_conn_params()\n        conn = snowflake.connector.connect(**conn_config)\n        return conn", "code_tokens": ["def", "get_conn", "(", "self", ")", ":", "conn_config", "=", "self", ".", "_get_conn_params", "(", ")", "conn", "=", "snowflake", ".", "connector", ".", "connect", "(", "*", "*", "conn_config", ")", "return", "conn"], "docstring": "Returns a snowflake.connection object", "docstring_tokens": ["Returns", "a", "snowflake", ".", "connection", "object"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/snowflake_hook.py#L107-L113", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/snowflake_hook.py", "func_name": "SnowflakeHook._get_aws_credentials", "original_string": "def _get_aws_credentials(self):\n        \"\"\"\n        returns aws_access_key_id, aws_secret_access_key\n        from extra\n\n        intended to be used by external import and export statements\n        \"\"\"\n        if self.snowflake_conn_id:\n            connection_object = self.get_connection(self.snowflake_conn_id)\n            if 'aws_secret_access_key' in connection_object.extra_dejson:\n                aws_access_key_id = connection_object.extra_dejson.get(\n                    'aws_access_key_id')\n                aws_secret_access_key = connection_object.extra_dejson.get(\n                    'aws_secret_access_key')\n        return aws_access_key_id, aws_secret_access_key", "language": "python", "code": "def _get_aws_credentials(self):\n        \"\"\"\n        returns aws_access_key_id, aws_secret_access_key\n        from extra\n\n        intended to be used by external import and export statements\n        \"\"\"\n        if self.snowflake_conn_id:\n            connection_object = self.get_connection(self.snowflake_conn_id)\n            if 'aws_secret_access_key' in connection_object.extra_dejson:\n                aws_access_key_id = connection_object.extra_dejson.get(\n                    'aws_access_key_id')\n                aws_secret_access_key = connection_object.extra_dejson.get(\n                    'aws_secret_access_key')\n        return aws_access_key_id, aws_secret_access_key", "code_tokens": ["def", "_get_aws_credentials", "(", "self", ")", ":", "if", "self", ".", "snowflake_conn_id", ":", "connection_object", "=", "self", ".", "get_connection", "(", "self", ".", "snowflake_conn_id", ")", "if", "'aws_secret_access_key'", "in", "connection_object", ".", "extra_dejson", ":", "aws_access_key_id", "=", "connection_object", ".", "extra_dejson", ".", "get", "(", "'aws_access_key_id'", ")", "aws_secret_access_key", "=", "connection_object", ".", "extra_dejson", ".", "get", "(", "'aws_secret_access_key'", ")", "return", "aws_access_key_id", ",", "aws_secret_access_key"], "docstring": "returns aws_access_key_id, aws_secret_access_key\n        from extra\n\n        intended to be used by external import and export statements", "docstring_tokens": ["returns", "aws_access_key_id", "aws_secret_access_key", "from", "extra"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/snowflake_hook.py#L115-L129", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/grpc_hook.py", "func_name": "GrpcHook._get_field", "original_string": "def _get_field(self, field_name, default=None):\n        \"\"\"\n        Fetches a field from extras, and returns it. This is some Airflow\n        magic. The grpc hook type adds custom UI elements\n        to the hook page, which allow admins to specify scopes, credential pem files, etc.\n        They get formatted as shown below.\n        \"\"\"\n        full_field_name = 'extra__grpc__{}'.format(field_name)\n        if full_field_name in self.extras:\n            return self.extras[full_field_name]\n        else:\n            return default", "language": "python", "code": "def _get_field(self, field_name, default=None):\n        \"\"\"\n        Fetches a field from extras, and returns it. This is some Airflow\n        magic. The grpc hook type adds custom UI elements\n        to the hook page, which allow admins to specify scopes, credential pem files, etc.\n        They get formatted as shown below.\n        \"\"\"\n        full_field_name = 'extra__grpc__{}'.format(field_name)\n        if full_field_name in self.extras:\n            return self.extras[full_field_name]\n        else:\n            return default", "code_tokens": ["def", "_get_field", "(", "self", ",", "field_name", ",", "default", "=", "None", ")", ":", "full_field_name", "=", "'extra__grpc__{}'", ".", "format", "(", "field_name", ")", "if", "full_field_name", "in", "self", ".", "extras", ":", "return", "self", ".", "extras", "[", "full_field_name", "]", "else", ":", "return", "default"], "docstring": "Fetches a field from extras, and returns it. This is some Airflow\n        magic. The grpc hook type adds custom UI elements\n        to the hook page, which allow admins to specify scopes, credential pem files, etc.\n        They get formatted as shown below.", "docstring_tokens": ["Fetches", "a", "field", "from", "extras", "and", "returns", "it", ".", "This", "is", "some", "Airflow", "magic", ".", "The", "grpc", "hook", "type", "adds", "custom", "UI", "elements", "to", "the", "hook", "page", "which", "allow", "admins", "to", "specify", "scopes", "credential", "pem", "files", "etc", ".", "They", "get", "formatted", "as", "shown", "below", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/grpc_hook.py#L112-L123", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/postgres_hook.py", "func_name": "PostgresHook.copy_expert", "original_string": "def copy_expert(self, sql, filename, open=open):\n        \"\"\"\n        Executes SQL using psycopg2 copy_expert method.\n        Necessary to execute COPY command without access to a superuser.\n\n        Note: if this method is called with a \"COPY FROM\" statement and\n        the specified input file does not exist, it creates an empty\n        file and no data is loaded, but the operation succeeds.\n        So if users want to be aware when the input file does not exist,\n        they have to check its existence by themselves.\n        \"\"\"\n        if not os.path.isfile(filename):\n            with open(filename, 'w'):\n                pass\n\n        with open(filename, 'r+') as f:\n            with closing(self.get_conn()) as conn:\n                with closing(conn.cursor()) as cur:\n                    cur.copy_expert(sql, f)\n                    f.truncate(f.tell())\n                    conn.commit()", "language": "python", "code": "def copy_expert(self, sql, filename, open=open):\n        \"\"\"\n        Executes SQL using psycopg2 copy_expert method.\n        Necessary to execute COPY command without access to a superuser.\n\n        Note: if this method is called with a \"COPY FROM\" statement and\n        the specified input file does not exist, it creates an empty\n        file and no data is loaded, but the operation succeeds.\n        So if users want to be aware when the input file does not exist,\n        they have to check its existence by themselves.\n        \"\"\"\n        if not os.path.isfile(filename):\n            with open(filename, 'w'):\n                pass\n\n        with open(filename, 'r+') as f:\n            with closing(self.get_conn()) as conn:\n                with closing(conn.cursor()) as cur:\n                    cur.copy_expert(sql, f)\n                    f.truncate(f.tell())\n                    conn.commit()", "code_tokens": ["def", "copy_expert", "(", "self", ",", "sql", ",", "filename", ",", "open", "=", "open", ")", ":", "if", "not", "os", ".", "path", ".", "isfile", "(", "filename", ")", ":", "with", "open", "(", "filename", ",", "'w'", ")", ":", "pass", "with", "open", "(", "filename", ",", "'r+'", ")", "as", "f", ":", "with", "closing", "(", "self", ".", "get_conn", "(", ")", ")", "as", "conn", ":", "with", "closing", "(", "conn", ".", "cursor", "(", ")", ")", "as", "cur", ":", "cur", ".", "copy_expert", "(", "sql", ",", "f", ")", "f", ".", "truncate", "(", "f", ".", "tell", "(", ")", ")", "conn", ".", "commit", "(", ")"], "docstring": "Executes SQL using psycopg2 copy_expert method.\n        Necessary to execute COPY command without access to a superuser.\n\n        Note: if this method is called with a \"COPY FROM\" statement and\n        the specified input file does not exist, it creates an empty\n        file and no data is loaded, but the operation succeeds.\n        So if users want to be aware when the input file does not exist,\n        they have to check its existence by themselves.", "docstring_tokens": ["Executes", "SQL", "using", "psycopg2", "copy_expert", "method", ".", "Necessary", "to", "execute", "COPY", "command", "without", "access", "to", "a", "superuser", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/postgres_hook.py#L63-L83", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/postgres_hook.py", "func_name": "PostgresHook.bulk_dump", "original_string": "def bulk_dump(self, table, tmp_file):\n        \"\"\"\n        Dumps a database table into a tab-delimited file\n        \"\"\"\n        self.copy_expert(\"COPY {table} TO STDOUT\".format(table=table), tmp_file)", "language": "python", "code": "def bulk_dump(self, table, tmp_file):\n        \"\"\"\n        Dumps a database table into a tab-delimited file\n        \"\"\"\n        self.copy_expert(\"COPY {table} TO STDOUT\".format(table=table), tmp_file)", "code_tokens": ["def", "bulk_dump", "(", "self", ",", "table", ",", "tmp_file", ")", ":", "self", ".", "copy_expert", "(", "\"COPY {table} TO STDOUT\"", ".", "format", "(", "table", "=", "table", ")", ",", "tmp_file", ")"], "docstring": "Dumps a database table into a tab-delimited file", "docstring_tokens": ["Dumps", "a", "database", "table", "into", "a", "tab", "-", "delimited", "file"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/postgres_hook.py#L91-L95", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/operators/file_to_gcs.py", "func_name": "FileToGoogleCloudStorageOperator.execute", "original_string": "def execute(self, context):\n        \"\"\"\n        Uploads the file to Google cloud storage\n        \"\"\"\n        hook = GoogleCloudStorageHook(\n            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,\n            delegate_to=self.delegate_to)\n\n        hook.upload(\n            bucket_name=self.bucket,\n            object_name=self.dst,\n            mime_type=self.mime_type,\n            filename=self.src,\n            gzip=self.gzip,\n        )", "language": "python", "code": "def execute(self, context):\n        \"\"\"\n        Uploads the file to Google cloud storage\n        \"\"\"\n        hook = GoogleCloudStorageHook(\n            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,\n            delegate_to=self.delegate_to)\n\n        hook.upload(\n            bucket_name=self.bucket,\n            object_name=self.dst,\n            mime_type=self.mime_type,\n            filename=self.src,\n            gzip=self.gzip,\n        )", "code_tokens": ["def", "execute", "(", "self", ",", "context", ")", ":", "hook", "=", "GoogleCloudStorageHook", "(", "google_cloud_storage_conn_id", "=", "self", ".", "google_cloud_storage_conn_id", ",", "delegate_to", "=", "self", ".", "delegate_to", ")", "hook", ".", "upload", "(", "bucket_name", "=", "self", ".", "bucket", ",", "object_name", "=", "self", ".", "dst", ",", "mime_type", "=", "self", ".", "mime_type", ",", "filename", "=", "self", ".", "src", ",", "gzip", "=", "self", ".", "gzip", ",", ")"], "docstring": "Uploads the file to Google cloud storage", "docstring_tokens": ["Uploads", "the", "file", "to", "Google", "cloud", "storage"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/operators/file_to_gcs.py#L68-L82", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/macros/hive.py", "func_name": "max_partition", "original_string": "def max_partition(\n        table, schema=\"default\", field=None, filter_map=None,\n        metastore_conn_id='metastore_default'):\n    \"\"\"\n    Gets the max partition for a table.\n\n    :param schema: The hive schema the table lives in\n    :type schema: str\n    :param table: The hive table you are interested in, supports the dot\n        notation as in \"my_database.my_table\", if a dot is found,\n        the schema param is disregarded\n    :type table: str\n    :param metastore_conn_id: The hive connection you are interested in.\n        If your default is set you don't need to use this parameter.\n    :type metastore_conn_id: str\n    :param filter_map: partition_key:partition_value map used for partition filtering,\n                       e.g. {'key1': 'value1', 'key2': 'value2'}.\n                       Only partitions matching all partition_key:partition_value\n                       pairs will be considered as candidates of max partition.\n    :type filter_map: map\n    :param field: the field to get the max value from. If there's only\n        one partition field, this will be inferred\n    :type field: str\n\n    >>> max_partition('airflow.static_babynames_partitioned')\n    '2015-01-01'\n    \"\"\"\n    from airflow.hooks.hive_hooks import HiveMetastoreHook\n    if '.' in table:\n        schema, table = table.split('.')\n    hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id)\n    return hh.max_partition(\n        schema=schema, table_name=table, field=field, filter_map=filter_map)", "language": "python", "code": "def max_partition(\n        table, schema=\"default\", field=None, filter_map=None,\n        metastore_conn_id='metastore_default'):\n    \"\"\"\n    Gets the max partition for a table.\n\n    :param schema: The hive schema the table lives in\n    :type schema: str\n    :param table: The hive table you are interested in, supports the dot\n        notation as in \"my_database.my_table\", if a dot is found,\n        the schema param is disregarded\n    :type table: str\n    :param metastore_conn_id: The hive connection you are interested in.\n        If your default is set you don't need to use this parameter.\n    :type metastore_conn_id: str\n    :param filter_map: partition_key:partition_value map used for partition filtering,\n                       e.g. {'key1': 'value1', 'key2': 'value2'}.\n                       Only partitions matching all partition_key:partition_value\n                       pairs will be considered as candidates of max partition.\n    :type filter_map: map\n    :param field: the field to get the max value from. If there's only\n        one partition field, this will be inferred\n    :type field: str\n\n    >>> max_partition('airflow.static_babynames_partitioned')\n    '2015-01-01'\n    \"\"\"\n    from airflow.hooks.hive_hooks import HiveMetastoreHook\n    if '.' in table:\n        schema, table = table.split('.')\n    hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id)\n    return hh.max_partition(\n        schema=schema, table_name=table, field=field, filter_map=filter_map)", "code_tokens": ["def", "max_partition", "(", "table", ",", "schema", "=", "\"default\"", ",", "field", "=", "None", ",", "filter_map", "=", "None", ",", "metastore_conn_id", "=", "'metastore_default'", ")", ":", "from", "airflow", ".", "hooks", ".", "hive_hooks", "import", "HiveMetastoreHook", "if", "'.'", "in", "table", ":", "schema", ",", "table", "=", "table", ".", "split", "(", "'.'", ")", "hh", "=", "HiveMetastoreHook", "(", "metastore_conn_id", "=", "metastore_conn_id", ")", "return", "hh", ".", "max_partition", "(", "schema", "=", "schema", ",", "table_name", "=", "table", ",", "field", "=", "field", ",", "filter_map", "=", "filter_map", ")"], "docstring": "Gets the max partition for a table.\n\n    :param schema: The hive schema the table lives in\n    :type schema: str\n    :param table: The hive table you are interested in, supports the dot\n        notation as in \"my_database.my_table\", if a dot is found,\n        the schema param is disregarded\n    :type table: str\n    :param metastore_conn_id: The hive connection you are interested in.\n        If your default is set you don't need to use this parameter.\n    :type metastore_conn_id: str\n    :param filter_map: partition_key:partition_value map used for partition filtering,\n                       e.g. {'key1': 'value1', 'key2': 'value2'}.\n                       Only partitions matching all partition_key:partition_value\n                       pairs will be considered as candidates of max partition.\n    :type filter_map: map\n    :param field: the field to get the max value from. If there's only\n        one partition field, this will be inferred\n    :type field: str\n\n    >>> max_partition('airflow.static_babynames_partitioned')\n    '2015-01-01'", "docstring_tokens": ["Gets", "the", "max", "partition", "for", "a", "table", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/macros/hive.py#L23-L55", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/mysql_hook.py", "func_name": "MySqlHook.get_conn", "original_string": "def get_conn(self):\n        \"\"\"\n        Returns a mysql connection object\n        \"\"\"\n        conn = self.get_connection(self.mysql_conn_id)\n        conn_config = {\n            \"user\": conn.login,\n            \"passwd\": conn.password or '',\n            \"host\": conn.host or 'localhost',\n            \"db\": self.schema or conn.schema or ''\n        }\n\n        if not conn.port:\n            conn_config[\"port\"] = 3306\n        else:\n            conn_config[\"port\"] = int(conn.port)\n\n        if conn.extra_dejson.get('charset', False):\n            conn_config[\"charset\"] = conn.extra_dejson[\"charset\"]\n            if (conn_config[\"charset\"]).lower() == 'utf8' or\\\n                    (conn_config[\"charset\"]).lower() == 'utf-8':\n                conn_config[\"use_unicode\"] = True\n        if conn.extra_dejson.get('cursor', False):\n            if (conn.extra_dejson[\"cursor\"]).lower() == 'sscursor':\n                conn_config[\"cursorclass\"] = MySQLdb.cursors.SSCursor\n            elif (conn.extra_dejson[\"cursor\"]).lower() == 'dictcursor':\n                conn_config[\"cursorclass\"] = MySQLdb.cursors.DictCursor\n            elif (conn.extra_dejson[\"cursor\"]).lower() == 'ssdictcursor':\n                conn_config[\"cursorclass\"] = MySQLdb.cursors.SSDictCursor\n        local_infile = conn.extra_dejson.get('local_infile', False)\n        if conn.extra_dejson.get('ssl', False):\n            # SSL parameter for MySQL has to be a dictionary and in case\n            # of extra/dejson we can get string if extra is passed via\n            # URL parameters\n            dejson_ssl = conn.extra_dejson['ssl']\n            if isinstance(dejson_ssl, six.string_types):\n                dejson_ssl = json.loads(dejson_ssl)\n            conn_config['ssl'] = dejson_ssl\n        if conn.extra_dejson.get('unix_socket'):\n            conn_config['unix_socket'] = conn.extra_dejson['unix_socket']\n        if local_infile:\n            conn_config[\"local_infile\"] = 1\n        conn = MySQLdb.connect(**conn_config)\n        return conn", "language": "python", "code": "def get_conn(self):\n        \"\"\"\n        Returns a mysql connection object\n        \"\"\"\n        conn = self.get_connection(self.mysql_conn_id)\n        conn_config = {\n            \"user\": conn.login,\n            \"passwd\": conn.password or '',\n            \"host\": conn.host or 'localhost',\n            \"db\": self.schema or conn.schema or ''\n        }\n\n        if not conn.port:\n            conn_config[\"port\"] = 3306\n        else:\n            conn_config[\"port\"] = int(conn.port)\n\n        if conn.extra_dejson.get('charset', False):\n            conn_config[\"charset\"] = conn.extra_dejson[\"charset\"]\n            if (conn_config[\"charset\"]).lower() == 'utf8' or\\\n                    (conn_config[\"charset\"]).lower() == 'utf-8':\n                conn_config[\"use_unicode\"] = True\n        if conn.extra_dejson.get('cursor', False):\n            if (conn.extra_dejson[\"cursor\"]).lower() == 'sscursor':\n                conn_config[\"cursorclass\"] = MySQLdb.cursors.SSCursor\n            elif (conn.extra_dejson[\"cursor\"]).lower() == 'dictcursor':\n                conn_config[\"cursorclass\"] = MySQLdb.cursors.DictCursor\n            elif (conn.extra_dejson[\"cursor\"]).lower() == 'ssdictcursor':\n                conn_config[\"cursorclass\"] = MySQLdb.cursors.SSDictCursor\n        local_infile = conn.extra_dejson.get('local_infile', False)\n        if conn.extra_dejson.get('ssl', False):\n            # SSL parameter for MySQL has to be a dictionary and in case\n            # of extra/dejson we can get string if extra is passed via\n            # URL parameters\n            dejson_ssl = conn.extra_dejson['ssl']\n            if isinstance(dejson_ssl, six.string_types):\n                dejson_ssl = json.loads(dejson_ssl)\n            conn_config['ssl'] = dejson_ssl\n        if conn.extra_dejson.get('unix_socket'):\n            conn_config['unix_socket'] = conn.extra_dejson['unix_socket']\n        if local_infile:\n            conn_config[\"local_infile\"] = 1\n        conn = MySQLdb.connect(**conn_config)\n        return conn", "code_tokens": ["def", "get_conn", "(", "self", ")", ":", "conn", "=", "self", ".", "get_connection", "(", "self", ".", "mysql_conn_id", ")", "conn_config", "=", "{", "\"user\"", ":", "conn", ".", "login", ",", "\"passwd\"", ":", "conn", ".", "password", "or", "''", ",", "\"host\"", ":", "conn", ".", "host", "or", "'localhost'", ",", "\"db\"", ":", "self", ".", "schema", "or", "conn", ".", "schema", "or", "''", "}", "if", "not", "conn", ".", "port", ":", "conn_config", "[", "\"port\"", "]", "=", "3306", "else", ":", "conn_config", "[", "\"port\"", "]", "=", "int", "(", "conn", ".", "port", ")", "if", "conn", ".", "extra_dejson", ".", "get", "(", "'charset'", ",", "False", ")", ":", "conn_config", "[", "\"charset\"", "]", "=", "conn", ".", "extra_dejson", "[", "\"charset\"", "]", "if", "(", "conn_config", "[", "\"charset\"", "]", ")", ".", "lower", "(", ")", "==", "'utf8'", "or", "(", "conn_config", "[", "\"charset\"", "]", ")", ".", "lower", "(", ")", "==", "'utf-8'", ":", "conn_config", "[", "\"use_unicode\"", "]", "=", "True", "if", "conn", ".", "extra_dejson", ".", "get", "(", "'cursor'", ",", "False", ")", ":", "if", "(", "conn", ".", "extra_dejson", "[", "\"cursor\"", "]", ")", ".", "lower", "(", ")", "==", "'sscursor'", ":", "conn_config", "[", "\"cursorclass\"", "]", "=", "MySQLdb", ".", "cursors", ".", "SSCursor", "elif", "(", "conn", ".", "extra_dejson", "[", "\"cursor\"", "]", ")", ".", "lower", "(", ")", "==", "'dictcursor'", ":", "conn_config", "[", "\"cursorclass\"", "]", "=", "MySQLdb", ".", "cursors", ".", "DictCursor", "elif", "(", "conn", ".", "extra_dejson", "[", "\"cursor\"", "]", ")", ".", "lower", "(", ")", "==", "'ssdictcursor'", ":", "conn_config", "[", "\"cursorclass\"", "]", "=", "MySQLdb", ".", "cursors", ".", "SSDictCursor", "local_infile", "=", "conn", ".", "extra_dejson", ".", "get", "(", "'local_infile'", ",", "False", ")", "if", "conn", ".", "extra_dejson", ".", "get", "(", "'ssl'", ",", "False", ")", ":", "# SSL parameter for MySQL has to be a dictionary and in case", "# of extra/dejson we can get string if extra is passed via", "# URL parameters", "dejson_ssl", "=", "conn", ".", "extra_dejson", "[", "'ssl'", "]", "if", "isinstance", "(", "dejson_ssl", ",", "six", ".", "string_types", ")", ":", "dejson_ssl", "=", "json", ".", "loads", "(", "dejson_ssl", ")", "conn_config", "[", "'ssl'", "]", "=", "dejson_ssl", "if", "conn", ".", "extra_dejson", ".", "get", "(", "'unix_socket'", ")", ":", "conn_config", "[", "'unix_socket'", "]", "=", "conn", ".", "extra_dejson", "[", "'unix_socket'", "]", "if", "local_infile", ":", "conn_config", "[", "\"local_infile\"", "]", "=", "1", "conn", "=", "MySQLdb", ".", "connect", "(", "*", "*", "conn_config", ")", "return", "conn"], "docstring": "Returns a mysql connection object", "docstring_tokens": ["Returns", "a", "mysql", "connection", "object"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/mysql_hook.py#L62-L105", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/bin/cli.py", "func_name": "task_state", "original_string": "def task_state(args):\n    \"\"\"\n    Returns the state of a TaskInstance at the command line.\n    >>> airflow task_state tutorial sleep 2015-01-01\n    success\n    \"\"\"\n    dag = get_dag(args)\n    task = dag.get_task(task_id=args.task_id)\n    ti = TaskInstance(task, args.execution_date)\n    print(ti.current_state())", "language": "python", "code": "def task_state(args):\n    \"\"\"\n    Returns the state of a TaskInstance at the command line.\n    >>> airflow task_state tutorial sleep 2015-01-01\n    success\n    \"\"\"\n    dag = get_dag(args)\n    task = dag.get_task(task_id=args.task_id)\n    ti = TaskInstance(task, args.execution_date)\n    print(ti.current_state())", "code_tokens": ["def", "task_state", "(", "args", ")", ":", "dag", "=", "get_dag", "(", "args", ")", "task", "=", "dag", ".", "get_task", "(", "task_id", "=", "args", ".", "task_id", ")", "ti", "=", "TaskInstance", "(", "task", ",", "args", ".", "execution_date", ")", "print", "(", "ti", ".", "current_state", "(", ")", ")"], "docstring": "Returns the state of a TaskInstance at the command line.\n    >>> airflow task_state tutorial sleep 2015-01-01\n    success", "docstring_tokens": ["Returns", "the", "state", "of", "a", "TaskInstance", "at", "the", "command", "line", ".", ">>>", "airflow", "task_state", "tutorial", "sleep", "2015", "-", "01", "-", "01", "success"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/bin/cli.py#L554-L563", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/bin/cli.py", "func_name": "restart_workers", "original_string": "def restart_workers(gunicorn_master_proc, num_workers_expected, master_timeout):\n    \"\"\"\n    Runs forever, monitoring the child processes of @gunicorn_master_proc and\n    restarting workers occasionally.\n    Each iteration of the loop traverses one edge of this state transition\n    diagram, where each state (node) represents\n    [ num_ready_workers_running / num_workers_running ]. We expect most time to\n    be spent in [n / n]. `bs` is the setting webserver.worker_refresh_batch_size.\n    The horizontal transition at ? happens after the new worker parses all the\n    dags (so it could take a while!)\n       V \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n    [n / n] \u2500\u2500TTIN\u2500\u2500> [ [n, n+bs) / n + bs ]  \u2500\u2500\u2500\u2500?\u2500\u2500\u2500> [n + bs / n + bs] \u2500\u2500TTOU\u2500\u2518\n       ^                          ^\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n       \u2502\n       \u2502      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500v\n       \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500 [ [0, n) / n ] <\u2500\u2500\u2500 start\n    We change the number of workers by sending TTIN and TTOU to the gunicorn\n    master process, which increases and decreases the number of child workers\n    respectively. Gunicorn guarantees that on TTOU workers are terminated\n    gracefully and that the oldest worker is terminated.\n    \"\"\"\n\n    def wait_until_true(fn, timeout=0):\n        \"\"\"\n        Sleeps until fn is true\n        \"\"\"\n        t = time.time()\n        while not fn():\n            if 0 < timeout <= time.time() - t:\n                raise AirflowWebServerTimeout(\n                    \"No response from gunicorn master within {0} seconds\"\n                    .format(timeout))\n            time.sleep(0.1)\n\n    def start_refresh(gunicorn_master_proc):\n        batch_size = conf.getint('webserver', 'worker_refresh_batch_size')\n        log.debug('%s doing a refresh of %s workers', state, batch_size)\n        sys.stdout.flush()\n        sys.stderr.flush()\n\n        excess = 0\n        for _ in range(batch_size):\n            gunicorn_master_proc.send_signal(signal.SIGTTIN)\n            excess += 1\n            wait_until_true(lambda: num_workers_expected + excess ==\n                            get_num_workers_running(gunicorn_master_proc),\n                            master_timeout)\n\n    try:\n        wait_until_true(lambda: num_workers_expected ==\n                        get_num_workers_running(gunicorn_master_proc),\n                        master_timeout)\n        while True:\n            num_workers_running = get_num_workers_running(gunicorn_master_proc)\n            num_ready_workers_running = \\\n                get_num_ready_workers_running(gunicorn_master_proc)\n\n            state = '[{0} / {1}]'.format(num_ready_workers_running, num_workers_running)\n\n            # Whenever some workers are not ready, wait until all workers are ready\n            if num_ready_workers_running < num_workers_running:\n                log.debug('%s some workers are starting up, waiting...', state)\n                sys.stdout.flush()\n                time.sleep(1)\n\n            # Kill a worker gracefully by asking gunicorn to reduce number of workers\n            elif num_workers_running > num_workers_expected:\n                excess = num_workers_running - num_workers_expected\n                log.debug('%s killing %s workers', state, excess)\n\n                for _ in range(excess):\n                    gunicorn_master_proc.send_signal(signal.SIGTTOU)\n                    excess -= 1\n                    wait_until_true(lambda: num_workers_expected + excess ==\n                                    get_num_workers_running(gunicorn_master_proc),\n                                    master_timeout)\n\n            # Start a new worker by asking gunicorn to increase number of workers\n            elif num_workers_running == num_workers_expected:\n                refresh_interval = conf.getint('webserver', 'worker_refresh_interval')\n                log.debug(\n                    '%s sleeping for %ss starting doing a refresh...',\n                    state, refresh_interval\n                )\n                time.sleep(refresh_interval)\n                start_refresh(gunicorn_master_proc)\n\n            else:\n                # num_ready_workers_running == num_workers_running < num_workers_expected\n                log.error((\n                    \"%s some workers seem to have died and gunicorn\"\n                    \"did not restart them as expected\"\n                ), state)\n                time.sleep(10)\n                if len(\n                    psutil.Process(gunicorn_master_proc.pid).children()\n                ) < num_workers_expected:\n                    start_refresh(gunicorn_master_proc)\n    except (AirflowWebServerTimeout, OSError) as err:\n        log.error(err)\n        log.error(\"Shutting down webserver\")\n        try:\n            gunicorn_master_proc.terminate()\n            gunicorn_master_proc.wait()\n        finally:\n            sys.exit(1)", "language": "python", "code": "def restart_workers(gunicorn_master_proc, num_workers_expected, master_timeout):\n    \"\"\"\n    Runs forever, monitoring the child processes of @gunicorn_master_proc and\n    restarting workers occasionally.\n    Each iteration of the loop traverses one edge of this state transition\n    diagram, where each state (node) represents\n    [ num_ready_workers_running / num_workers_running ]. We expect most time to\n    be spent in [n / n]. `bs` is the setting webserver.worker_refresh_batch_size.\n    The horizontal transition at ? happens after the new worker parses all the\n    dags (so it could take a while!)\n       V \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n    [n / n] \u2500\u2500TTIN\u2500\u2500> [ [n, n+bs) / n + bs ]  \u2500\u2500\u2500\u2500?\u2500\u2500\u2500> [n + bs / n + bs] \u2500\u2500TTOU\u2500\u2518\n       ^                          ^\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n       \u2502\n       \u2502      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500v\n       \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500 [ [0, n) / n ] <\u2500\u2500\u2500 start\n    We change the number of workers by sending TTIN and TTOU to the gunicorn\n    master process, which increases and decreases the number of child workers\n    respectively. Gunicorn guarantees that on TTOU workers are terminated\n    gracefully and that the oldest worker is terminated.\n    \"\"\"\n\n    def wait_until_true(fn, timeout=0):\n        \"\"\"\n        Sleeps until fn is true\n        \"\"\"\n        t = time.time()\n        while not fn():\n            if 0 < timeout <= time.time() - t:\n                raise AirflowWebServerTimeout(\n                    \"No response from gunicorn master within {0} seconds\"\n                    .format(timeout))\n            time.sleep(0.1)\n\n    def start_refresh(gunicorn_master_proc):\n        batch_size = conf.getint('webserver', 'worker_refresh_batch_size')\n        log.debug('%s doing a refresh of %s workers', state, batch_size)\n        sys.stdout.flush()\n        sys.stderr.flush()\n\n        excess = 0\n        for _ in range(batch_size):\n            gunicorn_master_proc.send_signal(signal.SIGTTIN)\n            excess += 1\n            wait_until_true(lambda: num_workers_expected + excess ==\n                            get_num_workers_running(gunicorn_master_proc),\n                            master_timeout)\n\n    try:\n        wait_until_true(lambda: num_workers_expected ==\n                        get_num_workers_running(gunicorn_master_proc),\n                        master_timeout)\n        while True:\n            num_workers_running = get_num_workers_running(gunicorn_master_proc)\n            num_ready_workers_running = \\\n                get_num_ready_workers_running(gunicorn_master_proc)\n\n            state = '[{0} / {1}]'.format(num_ready_workers_running, num_workers_running)\n\n            # Whenever some workers are not ready, wait until all workers are ready\n            if num_ready_workers_running < num_workers_running:\n                log.debug('%s some workers are starting up, waiting...', state)\n                sys.stdout.flush()\n                time.sleep(1)\n\n            # Kill a worker gracefully by asking gunicorn to reduce number of workers\n            elif num_workers_running > num_workers_expected:\n                excess = num_workers_running - num_workers_expected\n                log.debug('%s killing %s workers', state, excess)\n\n                for _ in range(excess):\n                    gunicorn_master_proc.send_signal(signal.SIGTTOU)\n                    excess -= 1\n                    wait_until_true(lambda: num_workers_expected + excess ==\n                                    get_num_workers_running(gunicorn_master_proc),\n                                    master_timeout)\n\n            # Start a new worker by asking gunicorn to increase number of workers\n            elif num_workers_running == num_workers_expected:\n                refresh_interval = conf.getint('webserver', 'worker_refresh_interval')\n                log.debug(\n                    '%s sleeping for %ss starting doing a refresh...',\n                    state, refresh_interval\n                )\n                time.sleep(refresh_interval)\n                start_refresh(gunicorn_master_proc)\n\n            else:\n                # num_ready_workers_running == num_workers_running < num_workers_expected\n                log.error((\n                    \"%s some workers seem to have died and gunicorn\"\n                    \"did not restart them as expected\"\n                ), state)\n                time.sleep(10)\n                if len(\n                    psutil.Process(gunicorn_master_proc.pid).children()\n                ) < num_workers_expected:\n                    start_refresh(gunicorn_master_proc)\n    except (AirflowWebServerTimeout, OSError) as err:\n        log.error(err)\n        log.error(\"Shutting down webserver\")\n        try:\n            gunicorn_master_proc.terminate()\n            gunicorn_master_proc.wait()\n        finally:\n            sys.exit(1)", "code_tokens": ["def", "restart_workers", "(", "gunicorn_master_proc", ",", "num_workers_expected", ",", "master_timeout", ")", ":", "def", "wait_until_true", "(", "fn", ",", "timeout", "=", "0", ")", ":", "\"\"\"\n        Sleeps until fn is true\n        \"\"\"", "t", "=", "time", ".", "time", "(", ")", "while", "not", "fn", "(", ")", ":", "if", "0", "<", "timeout", "<=", "time", ".", "time", "(", ")", "-", "t", ":", "raise", "AirflowWebServerTimeout", "(", "\"No response from gunicorn master within {0} seconds\"", ".", "format", "(", "timeout", ")", ")", "time", ".", "sleep", "(", "0.1", ")", "def", "start_refresh", "(", "gunicorn_master_proc", ")", ":", "batch_size", "=", "conf", ".", "getint", "(", "'webserver'", ",", "'worker_refresh_batch_size'", ")", "log", ".", "debug", "(", "'%s doing a refresh of %s workers'", ",", "state", ",", "batch_size", ")", "sys", ".", "stdout", ".", "flush", "(", ")", "sys", ".", "stderr", ".", "flush", "(", ")", "excess", "=", "0", "for", "_", "in", "range", "(", "batch_size", ")", ":", "gunicorn_master_proc", ".", "send_signal", "(", "signal", ".", "SIGTTIN", ")", "excess", "+=", "1", "wait_until_true", "(", "lambda", ":", "num_workers_expected", "+", "excess", "==", "get_num_workers_running", "(", "gunicorn_master_proc", ")", ",", "master_timeout", ")", "try", ":", "wait_until_true", "(", "lambda", ":", "num_workers_expected", "==", "get_num_workers_running", "(", "gunicorn_master_proc", ")", ",", "master_timeout", ")", "while", "True", ":", "num_workers_running", "=", "get_num_workers_running", "(", "gunicorn_master_proc", ")", "num_ready_workers_running", "=", "get_num_ready_workers_running", "(", "gunicorn_master_proc", ")", "state", "=", "'[{0} / {1}]'", ".", "format", "(", "num_ready_workers_running", ",", "num_workers_running", ")", "# Whenever some workers are not ready, wait until all workers are ready", "if", "num_ready_workers_running", "<", "num_workers_running", ":", "log", ".", "debug", "(", "'%s some workers are starting up, waiting...'", ",", "state", ")", "sys", ".", "stdout", ".", "flush", "(", ")", "time", ".", "sleep", "(", "1", ")", "# Kill a worker gracefully by asking gunicorn to reduce number of workers", "elif", "num_workers_running", ">", "num_workers_expected", ":", "excess", "=", "num_workers_running", "-", "num_workers_expected", "log", ".", "debug", "(", "'%s killing %s workers'", ",", "state", ",", "excess", ")", "for", "_", "in", "range", "(", "excess", ")", ":", "gunicorn_master_proc", ".", "send_signal", "(", "signal", ".", "SIGTTOU", ")", "excess", "-=", "1", "wait_until_true", "(", "lambda", ":", "num_workers_expected", "+", "excess", "==", "get_num_workers_running", "(", "gunicorn_master_proc", ")", ",", "master_timeout", ")", "# Start a new worker by asking gunicorn to increase number of workers", "elif", "num_workers_running", "==", "num_workers_expected", ":", "refresh_interval", "=", "conf", ".", "getint", "(", "'webserver'", ",", "'worker_refresh_interval'", ")", "log", ".", "debug", "(", "'%s sleeping for %ss starting doing a refresh...'", ",", "state", ",", "refresh_interval", ")", "time", ".", "sleep", "(", "refresh_interval", ")", "start_refresh", "(", "gunicorn_master_proc", ")", "else", ":", "# num_ready_workers_running == num_workers_running < num_workers_expected", "log", ".", "error", "(", "(", "\"%s some workers seem to have died and gunicorn\"", "\"did not restart them as expected\"", ")", ",", "state", ")", "time", ".", "sleep", "(", "10", ")", "if", "len", "(", "psutil", ".", "Process", "(", "gunicorn_master_proc", ".", "pid", ")", ".", "children", "(", ")", ")", "<", "num_workers_expected", ":", "start_refresh", "(", "gunicorn_master_proc", ")", "except", "(", "AirflowWebServerTimeout", ",", "OSError", ")", "as", "err", ":", "log", ".", "error", "(", "err", ")", "log", ".", "error", "(", "\"Shutting down webserver\"", ")", "try", ":", "gunicorn_master_proc", ".", "terminate", "(", ")", "gunicorn_master_proc", ".", "wait", "(", ")", "finally", ":", "sys", ".", "exit", "(", "1", ")"], "docstring": "Runs forever, monitoring the child processes of @gunicorn_master_proc and\n    restarting workers occasionally.\n    Each iteration of the loop traverses one edge of this state transition\n    diagram, where each state (node) represents\n    [ num_ready_workers_running / num_workers_running ]. We expect most time to\n    be spent in [n / n]. `bs` is the setting webserver.worker_refresh_batch_size.\n    The horizontal transition at ? happens after the new worker parses all the\n    dags (so it could take a while!)\n       V \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n    [n / n] \u2500\u2500TTIN\u2500\u2500> [ [n, n+bs) / n + bs ]  \u2500\u2500\u2500\u2500?\u2500\u2500\u2500> [n + bs / n + bs] \u2500\u2500TTOU\u2500\u2518\n       ^                          ^\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n       \u2502\n       \u2502      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500v\n       \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2500 [ [0, n) / n ] <\u2500\u2500\u2500 start\n    We change the number of workers by sending TTIN and TTOU to the gunicorn\n    master process, which increases and decreases the number of child workers\n    respectively. Gunicorn guarantees that on TTOU workers are terminated\n    gracefully and that the oldest worker is terminated.", "docstring_tokens": ["Runs", "forever", "monitoring", "the", "child", "processes", "of"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/bin/cli.py#L763-L868", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_translate_hook.py", "func_name": "CloudTranslateHook.get_conn", "original_string": "def get_conn(self):\n        \"\"\"\n        Retrieves connection to Cloud Translate\n\n        :return: Google Cloud Translate client object.\n        :rtype: Client\n        \"\"\"\n        if not self._client:\n            self._client = Client(credentials=self._get_credentials())\n        return self._client", "language": "python", "code": "def get_conn(self):\n        \"\"\"\n        Retrieves connection to Cloud Translate\n\n        :return: Google Cloud Translate client object.\n        :rtype: Client\n        \"\"\"\n        if not self._client:\n            self._client = Client(credentials=self._get_credentials())\n        return self._client", "code_tokens": ["def", "get_conn", "(", "self", ")", ":", "if", "not", "self", ".", "_client", ":", "self", ".", "_client", "=", "Client", "(", "credentials", "=", "self", ".", "_get_credentials", "(", ")", ")", "return", "self", ".", "_client"], "docstring": "Retrieves connection to Cloud Translate\n\n        :return: Google Cloud Translate client object.\n        :rtype: Client", "docstring_tokens": ["Retrieves", "connection", "to", "Cloud", "Translate"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_translate_hook.py#L34-L43", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_translate_hook.py", "func_name": "CloudTranslateHook.translate", "original_string": "def translate(\n        self, values, target_language, format_=None, source_language=None, model=None\n    ):\n        \"\"\"Translate a string or list of strings.\n\n        See https://cloud.google.com/translate/docs/translating-text\n\n        :type values: str or list\n        :param values: String or list of strings to translate.\n\n        :type target_language: str\n        :param target_language: The language to translate results into. This\n                                is required by the API and defaults to\n                                the target language of the current instance.\n\n        :type format_: str\n        :param format_: (Optional) One of ``text`` or ``html``, to specify\n                        if the input text is plain text or HTML.\n\n        :type source_language: str or None\n        :param source_language: (Optional) The language of the text to\n                                be translated.\n\n        :type model: str or None\n        :param model: (Optional) The model used to translate the text, such\n                      as ``'base'`` or ``'nmt'``.\n\n        :rtype: str or list\n        :returns: A list of dictionaries for each queried value. Each\n                  dictionary typically contains three keys (though not\n                  all will be present in all cases)\n\n                  * ``detectedSourceLanguage``: The detected language (as an\n                    ISO 639-1 language code) of the text.\n                  * ``translatedText``: The translation of the text into the\n                    target language.\n                  * ``input``: The corresponding input value.\n                  * ``model``: The model used to translate the text.\n\n                  If only a single value is passed, then only a single\n                  dictionary will be returned.\n        :raises: :class:`~exceptions.ValueError` if the number of\n                 values and translations differ.\n        \"\"\"\n        client = self.get_conn()\n\n        return client.translate(\n            values=values,\n            target_language=target_language,\n            format_=format_,\n            source_language=source_language,\n            model=model,\n        )", "language": "python", "code": "def translate(\n        self, values, target_language, format_=None, source_language=None, model=None\n    ):\n        \"\"\"Translate a string or list of strings.\n\n        See https://cloud.google.com/translate/docs/translating-text\n\n        :type values: str or list\n        :param values: String or list of strings to translate.\n\n        :type target_language: str\n        :param target_language: The language to translate results into. This\n                                is required by the API and defaults to\n                                the target language of the current instance.\n\n        :type format_: str\n        :param format_: (Optional) One of ``text`` or ``html``, to specify\n                        if the input text is plain text or HTML.\n\n        :type source_language: str or None\n        :param source_language: (Optional) The language of the text to\n                                be translated.\n\n        :type model: str or None\n        :param model: (Optional) The model used to translate the text, such\n                      as ``'base'`` or ``'nmt'``.\n\n        :rtype: str or list\n        :returns: A list of dictionaries for each queried value. Each\n                  dictionary typically contains three keys (though not\n                  all will be present in all cases)\n\n                  * ``detectedSourceLanguage``: The detected language (as an\n                    ISO 639-1 language code) of the text.\n                  * ``translatedText``: The translation of the text into the\n                    target language.\n                  * ``input``: The corresponding input value.\n                  * ``model``: The model used to translate the text.\n\n                  If only a single value is passed, then only a single\n                  dictionary will be returned.\n        :raises: :class:`~exceptions.ValueError` if the number of\n                 values and translations differ.\n        \"\"\"\n        client = self.get_conn()\n\n        return client.translate(\n            values=values,\n            target_language=target_language,\n            format_=format_,\n            source_language=source_language,\n            model=model,\n        )", "code_tokens": ["def", "translate", "(", "self", ",", "values", ",", "target_language", ",", "format_", "=", "None", ",", "source_language", "=", "None", ",", "model", "=", "None", ")", ":", "client", "=", "self", ".", "get_conn", "(", ")", "return", "client", ".", "translate", "(", "values", "=", "values", ",", "target_language", "=", "target_language", ",", "format_", "=", "format_", ",", "source_language", "=", "source_language", ",", "model", "=", "model", ",", ")"], "docstring": "Translate a string or list of strings.\n\n        See https://cloud.google.com/translate/docs/translating-text\n\n        :type values: str or list\n        :param values: String or list of strings to translate.\n\n        :type target_language: str\n        :param target_language: The language to translate results into. This\n                                is required by the API and defaults to\n                                the target language of the current instance.\n\n        :type format_: str\n        :param format_: (Optional) One of ``text`` or ``html``, to specify\n                        if the input text is plain text or HTML.\n\n        :type source_language: str or None\n        :param source_language: (Optional) The language of the text to\n                                be translated.\n\n        :type model: str or None\n        :param model: (Optional) The model used to translate the text, such\n                      as ``'base'`` or ``'nmt'``.\n\n        :rtype: str or list\n        :returns: A list of dictionaries for each queried value. Each\n                  dictionary typically contains three keys (though not\n                  all will be present in all cases)\n\n                  * ``detectedSourceLanguage``: The detected language (as an\n                    ISO 639-1 language code) of the text.\n                  * ``translatedText``: The translation of the text into the\n                    target language.\n                  * ``input``: The corresponding input value.\n                  * ``model``: The model used to translate the text.\n\n                  If only a single value is passed, then only a single\n                  dictionary will be returned.\n        :raises: :class:`~exceptions.ValueError` if the number of\n                 values and translations differ.", "docstring_tokens": ["Translate", "a", "string", "or", "list", "of", "strings", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_translate_hook.py#L45-L97", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.get_instance", "original_string": "def get_instance(self, instance, project_id=None):\n        \"\"\"\n        Retrieves a resource containing information about a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: A Cloud SQL instance resource.\n        :rtype: dict\n        \"\"\"\n        return self.get_conn().instances().get(\n            project=project_id,\n            instance=instance\n        ).execute(num_retries=self.num_retries)", "language": "python", "code": "def get_instance(self, instance, project_id=None):\n        \"\"\"\n        Retrieves a resource containing information about a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: A Cloud SQL instance resource.\n        :rtype: dict\n        \"\"\"\n        return self.get_conn().instances().get(\n            project=project_id,\n            instance=instance\n        ).execute(num_retries=self.num_retries)", "code_tokens": ["def", "get_instance", "(", "self", ",", "instance", ",", "project_id", "=", "None", ")", ":", "return", "self", ".", "get_conn", "(", ")", ".", "instances", "(", ")", ".", "get", "(", "project", "=", "project_id", ",", "instance", "=", "instance", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")"], "docstring": "Retrieves a resource containing information about a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: A Cloud SQL instance resource.\n        :rtype: dict", "docstring_tokens": ["Retrieves", "a", "resource", "containing", "information", "about", "a", "Cloud", "SQL", "instance", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L97-L112", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.create_instance", "original_string": "def create_instance(self, body, project_id=None):\n        \"\"\"\n        Creates a new Cloud SQL instance.\n\n        :param body: Body required by the Cloud SQL insert API, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().instances().insert(\n            project=project_id,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "language": "python", "code": "def create_instance(self, body, project_id=None):\n        \"\"\"\n        Creates a new Cloud SQL instance.\n\n        :param body: Body required by the Cloud SQL insert API, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().instances().insert(\n            project=project_id,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "code_tokens": ["def", "create_instance", "(", "self", ",", "body", ",", "project_id", "=", "None", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "instances", "(", ")", ".", "insert", "(", "project", "=", "project_id", ",", "body", "=", "body", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "project_id", "=", "project_id", ",", "operation_name", "=", "operation_name", ")"], "docstring": "Creates a new Cloud SQL instance.\n\n        :param body: Body required by the Cloud SQL insert API, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None", "docstring_tokens": ["Creates", "a", "new", "Cloud", "SQL", "instance", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L115-L133", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.patch_instance", "original_string": "def patch_instance(self, body, instance, project_id=None):\n        \"\"\"\n        Updates settings of a Cloud SQL instance.\n\n        Caution: This is not a partial update, so you must include values for\n        all the settings that you want to retain.\n\n        :param body: Body required by the Cloud SQL patch API, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body.\n        :type body: dict\n        :param instance: Cloud SQL instance ID. This does not include the project ID.\n        :type instance: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().instances().patch(\n            project=project_id,\n            instance=instance,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "language": "python", "code": "def patch_instance(self, body, instance, project_id=None):\n        \"\"\"\n        Updates settings of a Cloud SQL instance.\n\n        Caution: This is not a partial update, so you must include values for\n        all the settings that you want to retain.\n\n        :param body: Body required by the Cloud SQL patch API, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body.\n        :type body: dict\n        :param instance: Cloud SQL instance ID. This does not include the project ID.\n        :type instance: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().instances().patch(\n            project=project_id,\n            instance=instance,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "code_tokens": ["def", "patch_instance", "(", "self", ",", "body", ",", "instance", ",", "project_id", "=", "None", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "instances", "(", ")", ".", "patch", "(", "project", "=", "project_id", ",", "instance", "=", "instance", ",", "body", "=", "body", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "project_id", "=", "project_id", ",", "operation_name", "=", "operation_name", ")"], "docstring": "Updates settings of a Cloud SQL instance.\n\n        Caution: This is not a partial update, so you must include values for\n        all the settings that you want to retain.\n\n        :param body: Body required by the Cloud SQL patch API, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body.\n        :type body: dict\n        :param instance: Cloud SQL instance ID. This does not include the project ID.\n        :type instance: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None", "docstring_tokens": ["Updates", "settings", "of", "a", "Cloud", "SQL", "instance", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L136-L160", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.delete_instance", "original_string": "def delete_instance(self, instance, project_id=None):\n        \"\"\"\n        Deletes a Cloud SQL instance.\n\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :param instance: Cloud SQL instance ID. This does not include the project ID.\n        :type instance: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().instances().delete(\n            project=project_id,\n            instance=instance,\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "language": "python", "code": "def delete_instance(self, instance, project_id=None):\n        \"\"\"\n        Deletes a Cloud SQL instance.\n\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :param instance: Cloud SQL instance ID. This does not include the project ID.\n        :type instance: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().instances().delete(\n            project=project_id,\n            instance=instance,\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "code_tokens": ["def", "delete_instance", "(", "self", ",", "instance", ",", "project_id", "=", "None", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "instances", "(", ")", ".", "delete", "(", "project", "=", "project_id", ",", "instance", "=", "instance", ",", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "project_id", "=", "project_id", ",", "operation_name", "=", "operation_name", ")"], "docstring": "Deletes a Cloud SQL instance.\n\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :param instance: Cloud SQL instance ID. This does not include the project ID.\n        :type instance: str\n        :return: None", "docstring_tokens": ["Deletes", "a", "Cloud", "SQL", "instance", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L163-L180", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.get_database", "original_string": "def get_database(self, instance, database, project_id=None):\n        \"\"\"\n        Retrieves a database resource from a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database in the instance.\n        :type database: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: A Cloud SQL database resource, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource.\n        :rtype: dict\n        \"\"\"\n        return self.get_conn().databases().get(\n            project=project_id,\n            instance=instance,\n            database=database\n        ).execute(num_retries=self.num_retries)", "language": "python", "code": "def get_database(self, instance, database, project_id=None):\n        \"\"\"\n        Retrieves a database resource from a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database in the instance.\n        :type database: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: A Cloud SQL database resource, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource.\n        :rtype: dict\n        \"\"\"\n        return self.get_conn().databases().get(\n            project=project_id,\n            instance=instance,\n            database=database\n        ).execute(num_retries=self.num_retries)", "code_tokens": ["def", "get_database", "(", "self", ",", "instance", ",", "database", ",", "project_id", "=", "None", ")", ":", "return", "self", ".", "get_conn", "(", ")", ".", "databases", "(", ")", ".", "get", "(", "project", "=", "project_id", ",", "instance", "=", "instance", ",", "database", "=", "database", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")"], "docstring": "Retrieves a database resource from a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database in the instance.\n        :type database: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: A Cloud SQL database resource, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource.\n        :rtype: dict", "docstring_tokens": ["Retrieves", "a", "database", "resource", "from", "a", "Cloud", "SQL", "instance", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L183-L202", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.create_database", "original_string": "def create_database(self, instance, body, project_id=None):\n        \"\"\"\n        Creates a new database inside a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().databases().insert(\n            project=project_id,\n            instance=instance,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "language": "python", "code": "def create_database(self, instance, body, project_id=None):\n        \"\"\"\n        Creates a new database inside a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().databases().insert(\n            project=project_id,\n            instance=instance,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "code_tokens": ["def", "create_database", "(", "self", ",", "instance", ",", "body", ",", "project_id", "=", "None", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "databases", "(", ")", ".", "insert", "(", "project", "=", "project_id", ",", "instance", "=", "instance", ",", "body", "=", "body", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "project_id", "=", "project_id", ",", "operation_name", "=", "operation_name", ")"], "docstring": "Creates a new database inside a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None", "docstring_tokens": ["Creates", "a", "new", "database", "inside", "a", "Cloud", "SQL", "instance", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L205-L226", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.patch_database", "original_string": "def patch_database(self, instance, database, body, project_id=None):\n        \"\"\"\n        Updates a database resource inside a Cloud SQL instance.\n\n        This method supports patch semantics.\n        See https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database to be updated in the instance.\n        :type database: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().databases().patch(\n            project=project_id,\n            instance=instance,\n            database=database,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "language": "python", "code": "def patch_database(self, instance, database, body, project_id=None):\n        \"\"\"\n        Updates a database resource inside a Cloud SQL instance.\n\n        This method supports patch semantics.\n        See https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database to be updated in the instance.\n        :type database: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().databases().patch(\n            project=project_id,\n            instance=instance,\n            database=database,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "code_tokens": ["def", "patch_database", "(", "self", ",", "instance", ",", "database", ",", "body", ",", "project_id", "=", "None", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "databases", "(", ")", ".", "patch", "(", "project", "=", "project_id", ",", "instance", "=", "instance", ",", "database", "=", "database", ",", "body", "=", "body", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "project_id", "=", "project_id", ",", "operation_name", "=", "operation_name", ")"], "docstring": "Updates a database resource inside a Cloud SQL instance.\n\n        This method supports patch semantics.\n        See https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database to be updated in the instance.\n        :type database: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body.\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None", "docstring_tokens": ["Updates", "a", "database", "resource", "inside", "a", "Cloud", "SQL", "instance", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L229-L256", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.delete_database", "original_string": "def delete_database(self, instance, database, project_id=None):\n        \"\"\"\n        Deletes a database from a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database to be deleted in the instance.\n        :type database: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().databases().delete(\n            project=project_id,\n            instance=instance,\n            database=database\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "language": "python", "code": "def delete_database(self, instance, database, project_id=None):\n        \"\"\"\n        Deletes a database from a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database to be deleted in the instance.\n        :type database: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().databases().delete(\n            project=project_id,\n            instance=instance,\n            database=database\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(project_id=project_id,\n                                             operation_name=operation_name)", "code_tokens": ["def", "delete_database", "(", "self", ",", "instance", ",", "database", ",", "project_id", "=", "None", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "databases", "(", ")", ".", "delete", "(", "project", "=", "project_id", ",", "instance", "=", "instance", ",", "database", "=", "database", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "project_id", "=", "project_id", ",", "operation_name", "=", "operation_name", ")"], "docstring": "Deletes a database from a Cloud SQL instance.\n\n        :param instance: Database instance ID. This does not include the project ID.\n        :type instance: str\n        :param database: Name of the database to be deleted in the instance.\n        :type database: str\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None", "docstring_tokens": ["Deletes", "a", "database", "from", "a", "Cloud", "SQL", "instance", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L259-L279", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlHook.export_instance", "original_string": "def export_instance(self, instance, body, project_id=None):\n        \"\"\"\n        Exports data from a Cloud SQL instance to a Cloud Storage bucket as a SQL dump\n        or CSV file.\n\n        :param instance: Database instance ID of the Cloud SQL instance. This does not include the\n            project ID.\n        :type instance: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        try:\n            response = self.get_conn().instances().export(\n                project=project_id,\n                instance=instance,\n                body=body\n            ).execute(num_retries=self.num_retries)\n            operation_name = response[\"name\"]\n            self._wait_for_operation_to_complete(project_id=project_id,\n                                                 operation_name=operation_name)\n        except HttpError as ex:\n            raise AirflowException(\n                'Exporting instance {} failed: {}'.format(instance, ex.content)\n            )", "language": "python", "code": "def export_instance(self, instance, body, project_id=None):\n        \"\"\"\n        Exports data from a Cloud SQL instance to a Cloud Storage bucket as a SQL dump\n        or CSV file.\n\n        :param instance: Database instance ID of the Cloud SQL instance. This does not include the\n            project ID.\n        :type instance: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        try:\n            response = self.get_conn().instances().export(\n                project=project_id,\n                instance=instance,\n                body=body\n            ).execute(num_retries=self.num_retries)\n            operation_name = response[\"name\"]\n            self._wait_for_operation_to_complete(project_id=project_id,\n                                                 operation_name=operation_name)\n        except HttpError as ex:\n            raise AirflowException(\n                'Exporting instance {} failed: {}'.format(instance, ex.content)\n            )", "code_tokens": ["def", "export_instance", "(", "self", ",", "instance", ",", "body", ",", "project_id", "=", "None", ")", ":", "try", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "instances", "(", ")", ".", "export", "(", "project", "=", "project_id", ",", "instance", "=", "instance", ",", "body", "=", "body", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "project_id", "=", "project_id", ",", "operation_name", "=", "operation_name", ")", "except", "HttpError", "as", "ex", ":", "raise", "AirflowException", "(", "'Exporting instance {} failed: {}'", ".", "format", "(", "instance", ",", "ex", ".", "content", ")", ")"], "docstring": "Exports data from a Cloud SQL instance to a Cloud Storage bucket as a SQL dump\n        or CSV file.\n\n        :param instance: Database instance ID of the Cloud SQL instance. This does not include the\n            project ID.\n        :type instance: str\n        :param body: The request body, as described in\n            https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body\n        :type body: dict\n        :param project_id: Project ID of the project that contains the instance. If set\n            to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None", "docstring_tokens": ["Exports", "data", "from", "a", "Cloud", "SQL", "instance", "to", "a", "Cloud", "Storage", "bucket", "as", "a", "SQL", "dump", "or", "CSV", "file", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L282-L310", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlProxyRunner.start_proxy", "original_string": "def start_proxy(self):\n        \"\"\"\n        Starts Cloud SQL Proxy.\n\n        You have to remember to stop the proxy if you started it!\n        \"\"\"\n        self._download_sql_proxy_if_needed()\n        if self.sql_proxy_process:\n            raise AirflowException(\"The sql proxy is already running: {}\".format(\n                self.sql_proxy_process))\n        else:\n            command_to_run = [self.sql_proxy_path]\n            command_to_run.extend(self.command_line_parameters)\n            try:\n                self.log.info(\"Creating directory %s\",\n                              self.cloud_sql_proxy_socket_directory)\n                os.makedirs(self.cloud_sql_proxy_socket_directory)\n            except OSError:\n                # Needed for python 2 compatibility (exists_ok missing)\n                pass\n            command_to_run.extend(self._get_credential_parameters())\n            self.log.info(\"Running the command: `%s`\", \" \".join(command_to_run))\n            self.sql_proxy_process = Popen(command_to_run,\n                                           stdin=PIPE, stdout=PIPE, stderr=PIPE)\n            self.log.info(\"The pid of cloud_sql_proxy: %s\", self.sql_proxy_process.pid)\n            while True:\n                line = self.sql_proxy_process.stderr.readline().decode('utf-8')\n                return_code = self.sql_proxy_process.poll()\n                if line == '' and return_code is not None:\n                    self.sql_proxy_process = None\n                    raise AirflowException(\n                        \"The cloud_sql_proxy finished early with return code {}!\".format(\n                            return_code))\n                if line != '':\n                    self.log.info(line)\n                if \"googleapi: Error\" in line or \"invalid instance name:\" in line:\n                    self.stop_proxy()\n                    raise AirflowException(\n                        \"Error when starting the cloud_sql_proxy {}!\".format(\n                            line))\n                if \"Ready for new connections\" in line:\n                    return", "language": "python", "code": "def start_proxy(self):\n        \"\"\"\n        Starts Cloud SQL Proxy.\n\n        You have to remember to stop the proxy if you started it!\n        \"\"\"\n        self._download_sql_proxy_if_needed()\n        if self.sql_proxy_process:\n            raise AirflowException(\"The sql proxy is already running: {}\".format(\n                self.sql_proxy_process))\n        else:\n            command_to_run = [self.sql_proxy_path]\n            command_to_run.extend(self.command_line_parameters)\n            try:\n                self.log.info(\"Creating directory %s\",\n                              self.cloud_sql_proxy_socket_directory)\n                os.makedirs(self.cloud_sql_proxy_socket_directory)\n            except OSError:\n                # Needed for python 2 compatibility (exists_ok missing)\n                pass\n            command_to_run.extend(self._get_credential_parameters())\n            self.log.info(\"Running the command: `%s`\", \" \".join(command_to_run))\n            self.sql_proxy_process = Popen(command_to_run,\n                                           stdin=PIPE, stdout=PIPE, stderr=PIPE)\n            self.log.info(\"The pid of cloud_sql_proxy: %s\", self.sql_proxy_process.pid)\n            while True:\n                line = self.sql_proxy_process.stderr.readline().decode('utf-8')\n                return_code = self.sql_proxy_process.poll()\n                if line == '' and return_code is not None:\n                    self.sql_proxy_process = None\n                    raise AirflowException(\n                        \"The cloud_sql_proxy finished early with return code {}!\".format(\n                            return_code))\n                if line != '':\n                    self.log.info(line)\n                if \"googleapi: Error\" in line or \"invalid instance name:\" in line:\n                    self.stop_proxy()\n                    raise AirflowException(\n                        \"Error when starting the cloud_sql_proxy {}!\".format(\n                            line))\n                if \"Ready for new connections\" in line:\n                    return", "code_tokens": ["def", "start_proxy", "(", "self", ")", ":", "self", ".", "_download_sql_proxy_if_needed", "(", ")", "if", "self", ".", "sql_proxy_process", ":", "raise", "AirflowException", "(", "\"The sql proxy is already running: {}\"", ".", "format", "(", "self", ".", "sql_proxy_process", ")", ")", "else", ":", "command_to_run", "=", "[", "self", ".", "sql_proxy_path", "]", "command_to_run", ".", "extend", "(", "self", ".", "command_line_parameters", ")", "try", ":", "self", ".", "log", ".", "info", "(", "\"Creating directory %s\"", ",", "self", ".", "cloud_sql_proxy_socket_directory", ")", "os", ".", "makedirs", "(", "self", ".", "cloud_sql_proxy_socket_directory", ")", "except", "OSError", ":", "# Needed for python 2 compatibility (exists_ok missing)", "pass", "command_to_run", ".", "extend", "(", "self", ".", "_get_credential_parameters", "(", ")", ")", "self", ".", "log", ".", "info", "(", "\"Running the command: `%s`\"", ",", "\" \"", ".", "join", "(", "command_to_run", ")", ")", "self", ".", "sql_proxy_process", "=", "Popen", "(", "command_to_run", ",", "stdin", "=", "PIPE", ",", "stdout", "=", "PIPE", ",", "stderr", "=", "PIPE", ")", "self", ".", "log", ".", "info", "(", "\"The pid of cloud_sql_proxy: %s\"", ",", "self", ".", "sql_proxy_process", ".", "pid", ")", "while", "True", ":", "line", "=", "self", ".", "sql_proxy_process", ".", "stderr", ".", "readline", "(", ")", ".", "decode", "(", "'utf-8'", ")", "return_code", "=", "self", ".", "sql_proxy_process", ".", "poll", "(", ")", "if", "line", "==", "''", "and", "return_code", "is", "not", "None", ":", "self", ".", "sql_proxy_process", "=", "None", "raise", "AirflowException", "(", "\"The cloud_sql_proxy finished early with return code {}!\"", ".", "format", "(", "return_code", ")", ")", "if", "line", "!=", "''", ":", "self", ".", "log", ".", "info", "(", "line", ")", "if", "\"googleapi: Error\"", "in", "line", "or", "\"invalid instance name:\"", "in", "line", ":", "self", ".", "stop_proxy", "(", ")", "raise", "AirflowException", "(", "\"Error when starting the cloud_sql_proxy {}!\"", ".", "format", "(", "line", ")", ")", "if", "\"Ready for new connections\"", "in", "line", ":", "return"], "docstring": "Starts Cloud SQL Proxy.\n\n        You have to remember to stop the proxy if you started it!", "docstring_tokens": ["Starts", "Cloud", "SQL", "Proxy", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L524-L565", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlProxyRunner.stop_proxy", "original_string": "def stop_proxy(self):\n        \"\"\"\n        Stops running proxy.\n\n        You should stop the proxy after you stop using it.\n        \"\"\"\n        if not self.sql_proxy_process:\n            raise AirflowException(\"The sql proxy is not started yet\")\n        else:\n            self.log.info(\"Stopping the cloud_sql_proxy pid: %s\",\n                          self.sql_proxy_process.pid)\n            self.sql_proxy_process.kill()\n            self.sql_proxy_process = None\n        # Cleanup!\n        self.log.info(\"Removing the socket directory: %s\",\n                      self.cloud_sql_proxy_socket_directory)\n        shutil.rmtree(self.cloud_sql_proxy_socket_directory, ignore_errors=True)\n        if self.sql_proxy_was_downloaded:\n            self.log.info(\"Removing downloaded proxy: %s\", self.sql_proxy_path)\n            # Silently ignore if the file has already been removed (concurrency)\n            try:\n                os.remove(self.sql_proxy_path)\n            except OSError as e:\n                if not e.errno == errno.ENOENT:\n                    raise\n        else:\n            self.log.info(\"Skipped removing proxy - it was not downloaded: %s\",\n                          self.sql_proxy_path)\n        if os.path.isfile(self.credentials_path):\n            self.log.info(\"Removing generated credentials file %s\",\n                          self.credentials_path)\n            # Here file cannot be delete by concurrent task (each task has its own copy)\n            os.remove(self.credentials_path)", "language": "python", "code": "def stop_proxy(self):\n        \"\"\"\n        Stops running proxy.\n\n        You should stop the proxy after you stop using it.\n        \"\"\"\n        if not self.sql_proxy_process:\n            raise AirflowException(\"The sql proxy is not started yet\")\n        else:\n            self.log.info(\"Stopping the cloud_sql_proxy pid: %s\",\n                          self.sql_proxy_process.pid)\n            self.sql_proxy_process.kill()\n            self.sql_proxy_process = None\n        # Cleanup!\n        self.log.info(\"Removing the socket directory: %s\",\n                      self.cloud_sql_proxy_socket_directory)\n        shutil.rmtree(self.cloud_sql_proxy_socket_directory, ignore_errors=True)\n        if self.sql_proxy_was_downloaded:\n            self.log.info(\"Removing downloaded proxy: %s\", self.sql_proxy_path)\n            # Silently ignore if the file has already been removed (concurrency)\n            try:\n                os.remove(self.sql_proxy_path)\n            except OSError as e:\n                if not e.errno == errno.ENOENT:\n                    raise\n        else:\n            self.log.info(\"Skipped removing proxy - it was not downloaded: %s\",\n                          self.sql_proxy_path)\n        if os.path.isfile(self.credentials_path):\n            self.log.info(\"Removing generated credentials file %s\",\n                          self.credentials_path)\n            # Here file cannot be delete by concurrent task (each task has its own copy)\n            os.remove(self.credentials_path)", "code_tokens": ["def", "stop_proxy", "(", "self", ")", ":", "if", "not", "self", ".", "sql_proxy_process", ":", "raise", "AirflowException", "(", "\"The sql proxy is not started yet\"", ")", "else", ":", "self", ".", "log", ".", "info", "(", "\"Stopping the cloud_sql_proxy pid: %s\"", ",", "self", ".", "sql_proxy_process", ".", "pid", ")", "self", ".", "sql_proxy_process", ".", "kill", "(", ")", "self", ".", "sql_proxy_process", "=", "None", "# Cleanup!", "self", ".", "log", ".", "info", "(", "\"Removing the socket directory: %s\"", ",", "self", ".", "cloud_sql_proxy_socket_directory", ")", "shutil", ".", "rmtree", "(", "self", ".", "cloud_sql_proxy_socket_directory", ",", "ignore_errors", "=", "True", ")", "if", "self", ".", "sql_proxy_was_downloaded", ":", "self", ".", "log", ".", "info", "(", "\"Removing downloaded proxy: %s\"", ",", "self", ".", "sql_proxy_path", ")", "# Silently ignore if the file has already been removed (concurrency)", "try", ":", "os", ".", "remove", "(", "self", ".", "sql_proxy_path", ")", "except", "OSError", "as", "e", ":", "if", "not", "e", ".", "errno", "==", "errno", ".", "ENOENT", ":", "raise", "else", ":", "self", ".", "log", ".", "info", "(", "\"Skipped removing proxy - it was not downloaded: %s\"", ",", "self", ".", "sql_proxy_path", ")", "if", "os", ".", "path", ".", "isfile", "(", "self", ".", "credentials_path", ")", ":", "self", ".", "log", ".", "info", "(", "\"Removing generated credentials file %s\"", ",", "self", ".", "credentials_path", ")", "# Here file cannot be delete by concurrent task (each task has its own copy)", "os", ".", "remove", "(", "self", ".", "credentials_path", ")"], "docstring": "Stops running proxy.\n\n        You should stop the proxy after you stop using it.", "docstring_tokens": ["Stops", "running", "proxy", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L567-L599", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlProxyRunner.get_proxy_version", "original_string": "def get_proxy_version(self):\n        \"\"\"\n        Returns version of the Cloud SQL Proxy.\n        \"\"\"\n        self._download_sql_proxy_if_needed()\n        command_to_run = [self.sql_proxy_path]\n        command_to_run.extend(['--version'])\n        command_to_run.extend(self._get_credential_parameters())\n        result = subprocess.check_output(command_to_run).decode('utf-8')\n        pattern = re.compile(\"^.*[V|v]ersion ([^;]*);.*$\")\n        m = pattern.match(result)\n        if m:\n            return m.group(1)\n        else:\n            return None", "language": "python", "code": "def get_proxy_version(self):\n        \"\"\"\n        Returns version of the Cloud SQL Proxy.\n        \"\"\"\n        self._download_sql_proxy_if_needed()\n        command_to_run = [self.sql_proxy_path]\n        command_to_run.extend(['--version'])\n        command_to_run.extend(self._get_credential_parameters())\n        result = subprocess.check_output(command_to_run).decode('utf-8')\n        pattern = re.compile(\"^.*[V|v]ersion ([^;]*);.*$\")\n        m = pattern.match(result)\n        if m:\n            return m.group(1)\n        else:\n            return None", "code_tokens": ["def", "get_proxy_version", "(", "self", ")", ":", "self", ".", "_download_sql_proxy_if_needed", "(", ")", "command_to_run", "=", "[", "self", ".", "sql_proxy_path", "]", "command_to_run", ".", "extend", "(", "[", "'--version'", "]", ")", "command_to_run", ".", "extend", "(", "self", ".", "_get_credential_parameters", "(", ")", ")", "result", "=", "subprocess", ".", "check_output", "(", "command_to_run", ")", ".", "decode", "(", "'utf-8'", ")", "pattern", "=", "re", ".", "compile", "(", "\"^.*[V|v]ersion ([^;]*);.*$\"", ")", "m", "=", "pattern", ".", "match", "(", "result", ")", "if", "m", ":", "return", "m", ".", "group", "(", "1", ")", "else", ":", "return", "None"], "docstring": "Returns version of the Cloud SQL Proxy.", "docstring_tokens": ["Returns", "version", "of", "the", "Cloud", "SQL", "Proxy", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L601-L615", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlDatabaseHook.create_connection", "original_string": "def create_connection(self, session=None):\n        \"\"\"\n        Create connection in the Connection table, according to whether it uses\n        proxy, TCP, UNIX sockets, SSL. Connection ID will be randomly generated.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).\n        \"\"\"\n        connection = Connection(conn_id=self.db_conn_id)\n        uri = self._generate_connection_uri()\n        self.log.info(\"Creating connection %s\", self.db_conn_id)\n        connection.parse_from_uri(uri)\n        session.add(connection)\n        session.commit()", "language": "python", "code": "def create_connection(self, session=None):\n        \"\"\"\n        Create connection in the Connection table, according to whether it uses\n        proxy, TCP, UNIX sockets, SSL. Connection ID will be randomly generated.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).\n        \"\"\"\n        connection = Connection(conn_id=self.db_conn_id)\n        uri = self._generate_connection_uri()\n        self.log.info(\"Creating connection %s\", self.db_conn_id)\n        connection.parse_from_uri(uri)\n        session.add(connection)\n        session.commit()", "code_tokens": ["def", "create_connection", "(", "self", ",", "session", "=", "None", ")", ":", "connection", "=", "Connection", "(", "conn_id", "=", "self", ".", "db_conn_id", ")", "uri", "=", "self", ".", "_generate_connection_uri", "(", ")", "self", ".", "log", ".", "info", "(", "\"Creating connection %s\"", ",", "self", ".", "db_conn_id", ")", "connection", ".", "parse_from_uri", "(", "uri", ")", "session", ".", "add", "(", "connection", ")", "session", ".", "commit", "(", ")"], "docstring": "Create connection in the Connection table, according to whether it uses\n        proxy, TCP, UNIX sockets, SSL. Connection ID will be randomly generated.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).", "docstring_tokens": ["Create", "connection", "in", "the", "Connection", "table", "according", "to", "whether", "it", "uses", "proxy", "TCP", "UNIX", "sockets", "SSL", ".", "Connection", "ID", "will", "be", "randomly", "generated", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L895-L908", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlDatabaseHook.retrieve_connection", "original_string": "def retrieve_connection(self, session=None):\n        \"\"\"\n        Retrieves the dynamically created connection from the Connection table.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).\n        \"\"\"\n        self.log.info(\"Retrieving connection %s\", self.db_conn_id)\n        connections = session.query(Connection).filter(\n            Connection.conn_id == self.db_conn_id)\n        if connections.count():\n            return connections[0]\n        return None", "language": "python", "code": "def retrieve_connection(self, session=None):\n        \"\"\"\n        Retrieves the dynamically created connection from the Connection table.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).\n        \"\"\"\n        self.log.info(\"Retrieving connection %s\", self.db_conn_id)\n        connections = session.query(Connection).filter(\n            Connection.conn_id == self.db_conn_id)\n        if connections.count():\n            return connections[0]\n        return None", "code_tokens": ["def", "retrieve_connection", "(", "self", ",", "session", "=", "None", ")", ":", "self", ".", "log", ".", "info", "(", "\"Retrieving connection %s\"", ",", "self", ".", "db_conn_id", ")", "connections", "=", "session", ".", "query", "(", "Connection", ")", ".", "filter", "(", "Connection", ".", "conn_id", "==", "self", ".", "db_conn_id", ")", "if", "connections", ".", "count", "(", ")", ":", "return", "connections", "[", "0", "]", "return", "None"], "docstring": "Retrieves the dynamically created connection from the Connection table.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).", "docstring_tokens": ["Retrieves", "the", "dynamically", "created", "connection", "from", "the", "Connection", "table", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L911-L923", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlDatabaseHook.delete_connection", "original_string": "def delete_connection(self, session=None):\n        \"\"\"\n        Delete the dynamically created connection from the Connection table.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).\n        \"\"\"\n        self.log.info(\"Deleting connection %s\", self.db_conn_id)\n        connections = session.query(Connection).filter(\n            Connection.conn_id == self.db_conn_id)\n        if connections.count():\n            connection = connections[0]\n            session.delete(connection)\n            session.commit()\n        else:\n            self.log.info(\"Connection was already deleted!\")", "language": "python", "code": "def delete_connection(self, session=None):\n        \"\"\"\n        Delete the dynamically created connection from the Connection table.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).\n        \"\"\"\n        self.log.info(\"Deleting connection %s\", self.db_conn_id)\n        connections = session.query(Connection).filter(\n            Connection.conn_id == self.db_conn_id)\n        if connections.count():\n            connection = connections[0]\n            session.delete(connection)\n            session.commit()\n        else:\n            self.log.info(\"Connection was already deleted!\")", "code_tokens": ["def", "delete_connection", "(", "self", ",", "session", "=", "None", ")", ":", "self", ".", "log", ".", "info", "(", "\"Deleting connection %s\"", ",", "self", ".", "db_conn_id", ")", "connections", "=", "session", ".", "query", "(", "Connection", ")", ".", "filter", "(", "Connection", ".", "conn_id", "==", "self", ".", "db_conn_id", ")", "if", "connections", ".", "count", "(", ")", ":", "connection", "=", "connections", "[", "0", "]", "session", ".", "delete", "(", "connection", ")", "session", ".", "commit", "(", ")", "else", ":", "self", ".", "log", ".", "info", "(", "\"Connection was already deleted!\"", ")"], "docstring": "Delete the dynamically created connection from the Connection table.\n\n        :param session: Session of the SQL Alchemy ORM (automatically generated with\n                        decorator).", "docstring_tokens": ["Delete", "the", "dynamically", "created", "connection", "from", "the", "Connection", "table", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L926-L941", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlDatabaseHook.get_sqlproxy_runner", "original_string": "def get_sqlproxy_runner(self):\n        \"\"\"\n        Retrieve Cloud SQL Proxy runner. It is used to manage the proxy\n        lifecycle per task.\n\n        :return: The Cloud SQL Proxy runner.\n        :rtype: CloudSqlProxyRunner\n        \"\"\"\n        if not self.use_proxy:\n            raise AirflowException(\"Proxy runner can only be retrieved in case of use_proxy = True\")\n        return CloudSqlProxyRunner(\n            path_prefix=self.sql_proxy_unique_path,\n            instance_specification=self._get_sqlproxy_instance_specification(),\n            project_id=self.project_id,\n            sql_proxy_version=self.sql_proxy_version,\n            sql_proxy_binary_path=self.sql_proxy_binary_path\n        )", "language": "python", "code": "def get_sqlproxy_runner(self):\n        \"\"\"\n        Retrieve Cloud SQL Proxy runner. It is used to manage the proxy\n        lifecycle per task.\n\n        :return: The Cloud SQL Proxy runner.\n        :rtype: CloudSqlProxyRunner\n        \"\"\"\n        if not self.use_proxy:\n            raise AirflowException(\"Proxy runner can only be retrieved in case of use_proxy = True\")\n        return CloudSqlProxyRunner(\n            path_prefix=self.sql_proxy_unique_path,\n            instance_specification=self._get_sqlproxy_instance_specification(),\n            project_id=self.project_id,\n            sql_proxy_version=self.sql_proxy_version,\n            sql_proxy_binary_path=self.sql_proxy_binary_path\n        )", "code_tokens": ["def", "get_sqlproxy_runner", "(", "self", ")", ":", "if", "not", "self", ".", "use_proxy", ":", "raise", "AirflowException", "(", "\"Proxy runner can only be retrieved in case of use_proxy = True\"", ")", "return", "CloudSqlProxyRunner", "(", "path_prefix", "=", "self", ".", "sql_proxy_unique_path", ",", "instance_specification", "=", "self", ".", "_get_sqlproxy_instance_specification", "(", ")", ",", "project_id", "=", "self", ".", "project_id", ",", "sql_proxy_version", "=", "self", ".", "sql_proxy_version", ",", "sql_proxy_binary_path", "=", "self", ".", "sql_proxy_binary_path", ")"], "docstring": "Retrieve Cloud SQL Proxy runner. It is used to manage the proxy\n        lifecycle per task.\n\n        :return: The Cloud SQL Proxy runner.\n        :rtype: CloudSqlProxyRunner", "docstring_tokens": ["Retrieve", "Cloud", "SQL", "Proxy", "runner", ".", "It", "is", "used", "to", "manage", "the", "proxy", "lifecycle", "per", "task", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L943-L959", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlDatabaseHook.get_database_hook", "original_string": "def get_database_hook(self):\n        \"\"\"\n        Retrieve database hook. This is the actual Postgres or MySQL database hook\n        that uses proxy or connects directly to the Google Cloud SQL database.\n        \"\"\"\n        if self.database_type == 'postgres':\n            self.db_hook = PostgresHook(postgres_conn_id=self.db_conn_id,\n                                        schema=self.database)\n        else:\n            self.db_hook = MySqlHook(mysql_conn_id=self.db_conn_id,\n                                     schema=self.database)\n        return self.db_hook", "language": "python", "code": "def get_database_hook(self):\n        \"\"\"\n        Retrieve database hook. This is the actual Postgres or MySQL database hook\n        that uses proxy or connects directly to the Google Cloud SQL database.\n        \"\"\"\n        if self.database_type == 'postgres':\n            self.db_hook = PostgresHook(postgres_conn_id=self.db_conn_id,\n                                        schema=self.database)\n        else:\n            self.db_hook = MySqlHook(mysql_conn_id=self.db_conn_id,\n                                     schema=self.database)\n        return self.db_hook", "code_tokens": ["def", "get_database_hook", "(", "self", ")", ":", "if", "self", ".", "database_type", "==", "'postgres'", ":", "self", ".", "db_hook", "=", "PostgresHook", "(", "postgres_conn_id", "=", "self", ".", "db_conn_id", ",", "schema", "=", "self", ".", "database", ")", "else", ":", "self", ".", "db_hook", "=", "MySqlHook", "(", "mysql_conn_id", "=", "self", ".", "db_conn_id", ",", "schema", "=", "self", ".", "database", ")", "return", "self", ".", "db_hook"], "docstring": "Retrieve database hook. This is the actual Postgres or MySQL database hook\n        that uses proxy or connects directly to the Google Cloud SQL database.", "docstring_tokens": ["Retrieve", "database", "hook", ".", "This", "is", "the", "actual", "Postgres", "or", "MySQL", "database", "hook", "that", "uses", "proxy", "or", "connects", "directly", "to", "the", "Google", "Cloud", "SQL", "database", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L961-L972", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlDatabaseHook.cleanup_database_hook", "original_string": "def cleanup_database_hook(self):\n        \"\"\"\n        Clean up database hook after it was used.\n        \"\"\"\n        if self.database_type == 'postgres':\n            if hasattr(self.db_hook,\n                       'conn') and self.db_hook.conn and self.db_hook.conn.notices:\n                for output in self.db_hook.conn.notices:\n                    self.log.info(output)", "language": "python", "code": "def cleanup_database_hook(self):\n        \"\"\"\n        Clean up database hook after it was used.\n        \"\"\"\n        if self.database_type == 'postgres':\n            if hasattr(self.db_hook,\n                       'conn') and self.db_hook.conn and self.db_hook.conn.notices:\n                for output in self.db_hook.conn.notices:\n                    self.log.info(output)", "code_tokens": ["def", "cleanup_database_hook", "(", "self", ")", ":", "if", "self", ".", "database_type", "==", "'postgres'", ":", "if", "hasattr", "(", "self", ".", "db_hook", ",", "'conn'", ")", "and", "self", ".", "db_hook", ".", "conn", "and", "self", ".", "db_hook", ".", "conn", ".", "notices", ":", "for", "output", "in", "self", ".", "db_hook", ".", "conn", ".", "notices", ":", "self", ".", "log", ".", "info", "(", "output", ")"], "docstring": "Clean up database hook after it was used.", "docstring_tokens": ["Clean", "up", "database", "hook", "after", "it", "was", "used", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L974-L982", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_sql_hook.py", "func_name": "CloudSqlDatabaseHook.reserve_free_tcp_port", "original_string": "def reserve_free_tcp_port(self):\n        \"\"\"\n        Reserve free TCP port to be used by Cloud SQL Proxy\n        \"\"\"\n        self.reserved_tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n        self.reserved_tcp_socket.bind(('127.0.0.1', 0))\n        self.sql_proxy_tcp_port = self.reserved_tcp_socket.getsockname()[1]", "language": "python", "code": "def reserve_free_tcp_port(self):\n        \"\"\"\n        Reserve free TCP port to be used by Cloud SQL Proxy\n        \"\"\"\n        self.reserved_tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n        self.reserved_tcp_socket.bind(('127.0.0.1', 0))\n        self.sql_proxy_tcp_port = self.reserved_tcp_socket.getsockname()[1]", "code_tokens": ["def", "reserve_free_tcp_port", "(", "self", ")", ":", "self", ".", "reserved_tcp_socket", "=", "socket", ".", "socket", "(", "socket", ".", "AF_INET", ",", "socket", ".", "SOCK_STREAM", ")", "self", ".", "reserved_tcp_socket", ".", "bind", "(", "(", "'127.0.0.1'", ",", "0", ")", ")", "self", ".", "sql_proxy_tcp_port", "=", "self", ".", "reserved_tcp_socket", ".", "getsockname", "(", ")", "[", "1", "]"], "docstring": "Reserve free TCP port to be used by Cloud SQL Proxy", "docstring_tokens": ["Reserve", "free", "TCP", "port", "to", "be", "used", "by", "Cloud", "SQL", "Proxy"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_sql_hook.py#L984-L990", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/operators/mlengine_operator.py", "func_name": "_normalize_mlengine_job_id", "original_string": "def _normalize_mlengine_job_id(job_id):\n    \"\"\"\n    Replaces invalid MLEngine job_id characters with '_'.\n\n    This also adds a leading 'z' in case job_id starts with an invalid\n    character.\n\n    Args:\n        job_id: A job_id str that may have invalid characters.\n\n    Returns:\n        A valid job_id representation.\n    \"\"\"\n\n    # Add a prefix when a job_id starts with a digit or a template\n    match = re.search(r'\\d|\\{{2}', job_id)\n    if match and match.start() == 0:\n        job = 'z_{}'.format(job_id)\n    else:\n        job = job_id\n\n    # Clean up 'bad' characters except templates\n    tracker = 0\n    cleansed_job_id = ''\n    for m in re.finditer(r'\\{{2}.+?\\}{2}', job):\n        cleansed_job_id += re.sub(r'[^0-9a-zA-Z]+', '_',\n                                  job[tracker:m.start()])\n        cleansed_job_id += job[m.start():m.end()]\n        tracker = m.end()\n\n    # Clean up last substring or the full string if no templates\n    cleansed_job_id += re.sub(r'[^0-9a-zA-Z]+', '_', job[tracker:])\n\n    return cleansed_job_id", "language": "python", "code": "def _normalize_mlengine_job_id(job_id):\n    \"\"\"\n    Replaces invalid MLEngine job_id characters with '_'.\n\n    This also adds a leading 'z' in case job_id starts with an invalid\n    character.\n\n    Args:\n        job_id: A job_id str that may have invalid characters.\n\n    Returns:\n        A valid job_id representation.\n    \"\"\"\n\n    # Add a prefix when a job_id starts with a digit or a template\n    match = re.search(r'\\d|\\{{2}', job_id)\n    if match and match.start() == 0:\n        job = 'z_{}'.format(job_id)\n    else:\n        job = job_id\n\n    # Clean up 'bad' characters except templates\n    tracker = 0\n    cleansed_job_id = ''\n    for m in re.finditer(r'\\{{2}.+?\\}{2}', job):\n        cleansed_job_id += re.sub(r'[^0-9a-zA-Z]+', '_',\n                                  job[tracker:m.start()])\n        cleansed_job_id += job[m.start():m.end()]\n        tracker = m.end()\n\n    # Clean up last substring or the full string if no templates\n    cleansed_job_id += re.sub(r'[^0-9a-zA-Z]+', '_', job[tracker:])\n\n    return cleansed_job_id", "code_tokens": ["def", "_normalize_mlengine_job_id", "(", "job_id", ")", ":", "# Add a prefix when a job_id starts with a digit or a template", "match", "=", "re", ".", "search", "(", "r'\\d|\\{{2}'", ",", "job_id", ")", "if", "match", "and", "match", ".", "start", "(", ")", "==", "0", ":", "job", "=", "'z_{}'", ".", "format", "(", "job_id", ")", "else", ":", "job", "=", "job_id", "# Clean up 'bad' characters except templates", "tracker", "=", "0", "cleansed_job_id", "=", "''", "for", "m", "in", "re", ".", "finditer", "(", "r'\\{{2}.+?\\}{2}'", ",", "job", ")", ":", "cleansed_job_id", "+=", "re", ".", "sub", "(", "r'[^0-9a-zA-Z]+'", ",", "'_'", ",", "job", "[", "tracker", ":", "m", ".", "start", "(", ")", "]", ")", "cleansed_job_id", "+=", "job", "[", "m", ".", "start", "(", ")", ":", "m", ".", "end", "(", ")", "]", "tracker", "=", "m", ".", "end", "(", ")", "# Clean up last substring or the full string if no templates", "cleansed_job_id", "+=", "re", ".", "sub", "(", "r'[^0-9a-zA-Z]+'", ",", "'_'", ",", "job", "[", "tracker", ":", "]", ")", "return", "cleansed_job_id"], "docstring": "Replaces invalid MLEngine job_id characters with '_'.\n\n    This also adds a leading 'z' in case job_id starts with an invalid\n    character.\n\n    Args:\n        job_id: A job_id str that may have invalid characters.\n\n    Returns:\n        A valid job_id representation.", "docstring_tokens": ["Replaces", "invalid", "MLEngine", "job_id", "characters", "with", "_", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/operators/mlengine_operator.py#L29-L62", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/sensors/ftp_sensor.py", "func_name": "FTPSensor._get_error_code", "original_string": "def _get_error_code(self, e):\n        \"\"\"Extract error code from ftp exception\"\"\"\n        try:\n            matches = self.error_code_pattern.match(str(e))\n            code = int(matches.group(0))\n            return code\n        except ValueError:\n            return e", "language": "python", "code": "def _get_error_code(self, e):\n        \"\"\"Extract error code from ftp exception\"\"\"\n        try:\n            matches = self.error_code_pattern.match(str(e))\n            code = int(matches.group(0))\n            return code\n        except ValueError:\n            return e", "code_tokens": ["def", "_get_error_code", "(", "self", ",", "e", ")", ":", "try", ":", "matches", "=", "self", ".", "error_code_pattern", ".", "match", "(", "str", "(", "e", ")", ")", "code", "=", "int", "(", "matches", ".", "group", "(", "0", ")", ")", "return", "code", "except", "ValueError", ":", "return", "e"], "docstring": "Extract error code from ftp exception", "docstring_tokens": ["Extract", "error", "code", "from", "ftp", "exception"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/sensors/ftp_sensor.py#L69-L76", "partition": "test"}
{"repo": "apache/airflow", "path": "scripts/perf/scheduler_ops_metrics.py", "func_name": "clear_dag_runs", "original_string": "def clear_dag_runs():\n    \"\"\"\n    Remove any existing DAG runs for the perf test DAGs.\n    \"\"\"\n    session = settings.Session()\n    drs = session.query(DagRun).filter(\n        DagRun.dag_id.in_(DAG_IDS),\n    ).all()\n    for dr in drs:\n        logging.info('Deleting DagRun :: {}'.format(dr))\n        session.delete(dr)", "language": "python", "code": "def clear_dag_runs():\n    \"\"\"\n    Remove any existing DAG runs for the perf test DAGs.\n    \"\"\"\n    session = settings.Session()\n    drs = session.query(DagRun).filter(\n        DagRun.dag_id.in_(DAG_IDS),\n    ).all()\n    for dr in drs:\n        logging.info('Deleting DagRun :: {}'.format(dr))\n        session.delete(dr)", "code_tokens": ["def", "clear_dag_runs", "(", ")", ":", "session", "=", "settings", ".", "Session", "(", ")", "drs", "=", "session", ".", "query", "(", "DagRun", ")", ".", "filter", "(", "DagRun", ".", "dag_id", ".", "in_", "(", "DAG_IDS", ")", ",", ")", ".", "all", "(", ")", "for", "dr", "in", "drs", ":", "logging", ".", "info", "(", "'Deleting DagRun :: {}'", ".", "format", "(", "dr", ")", ")", "session", ".", "delete", "(", "dr", ")"], "docstring": "Remove any existing DAG runs for the perf test DAGs.", "docstring_tokens": ["Remove", "any", "existing", "DAG", "runs", "for", "the", "perf", "test", "DAGs", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/scripts/perf/scheduler_ops_metrics.py#L138-L148", "partition": "test"}
{"repo": "apache/airflow", "path": "scripts/perf/scheduler_ops_metrics.py", "func_name": "clear_dag_task_instances", "original_string": "def clear_dag_task_instances():\n    \"\"\"\n    Remove any existing task instances for the perf test DAGs.\n    \"\"\"\n    session = settings.Session()\n    TI = TaskInstance\n    tis = (\n        session\n        .query(TI)\n        .filter(TI.dag_id.in_(DAG_IDS))\n        .all()\n    )\n    for ti in tis:\n        logging.info('Deleting TaskInstance :: {}'.format(ti))\n        session.delete(ti)\n    session.commit()", "language": "python", "code": "def clear_dag_task_instances():\n    \"\"\"\n    Remove any existing task instances for the perf test DAGs.\n    \"\"\"\n    session = settings.Session()\n    TI = TaskInstance\n    tis = (\n        session\n        .query(TI)\n        .filter(TI.dag_id.in_(DAG_IDS))\n        .all()\n    )\n    for ti in tis:\n        logging.info('Deleting TaskInstance :: {}'.format(ti))\n        session.delete(ti)\n    session.commit()", "code_tokens": ["def", "clear_dag_task_instances", "(", ")", ":", "session", "=", "settings", ".", "Session", "(", ")", "TI", "=", "TaskInstance", "tis", "=", "(", "session", ".", "query", "(", "TI", ")", ".", "filter", "(", "TI", ".", "dag_id", ".", "in_", "(", "DAG_IDS", ")", ")", ".", "all", "(", ")", ")", "for", "ti", "in", "tis", ":", "logging", ".", "info", "(", "'Deleting TaskInstance :: {}'", ".", "format", "(", "ti", ")", ")", "session", ".", "delete", "(", "ti", ")", "session", ".", "commit", "(", ")"], "docstring": "Remove any existing task instances for the perf test DAGs.", "docstring_tokens": ["Remove", "any", "existing", "task", "instances", "for", "the", "perf", "test", "DAGs", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/scripts/perf/scheduler_ops_metrics.py#L151-L166", "partition": "test"}
{"repo": "apache/airflow", "path": "scripts/perf/scheduler_ops_metrics.py", "func_name": "set_dags_paused_state", "original_string": "def set_dags_paused_state(is_paused):\n    \"\"\"\n    Toggle the pause state of the DAGs in the test.\n    \"\"\"\n    session = settings.Session()\n    dms = session.query(DagModel).filter(\n        DagModel.dag_id.in_(DAG_IDS))\n    for dm in dms:\n        logging.info('Setting DAG :: {} is_paused={}'.format(dm, is_paused))\n        dm.is_paused = is_paused\n    session.commit()", "language": "python", "code": "def set_dags_paused_state(is_paused):\n    \"\"\"\n    Toggle the pause state of the DAGs in the test.\n    \"\"\"\n    session = settings.Session()\n    dms = session.query(DagModel).filter(\n        DagModel.dag_id.in_(DAG_IDS))\n    for dm in dms:\n        logging.info('Setting DAG :: {} is_paused={}'.format(dm, is_paused))\n        dm.is_paused = is_paused\n    session.commit()", "code_tokens": ["def", "set_dags_paused_state", "(", "is_paused", ")", ":", "session", "=", "settings", ".", "Session", "(", ")", "dms", "=", "session", ".", "query", "(", "DagModel", ")", ".", "filter", "(", "DagModel", ".", "dag_id", ".", "in_", "(", "DAG_IDS", ")", ")", "for", "dm", "in", "dms", ":", "logging", ".", "info", "(", "'Setting DAG :: {} is_paused={}'", ".", "format", "(", "dm", ",", "is_paused", ")", ")", "dm", ".", "is_paused", "=", "is_paused", "session", ".", "commit", "(", ")"], "docstring": "Toggle the pause state of the DAGs in the test.", "docstring_tokens": ["Toggle", "the", "pause", "state", "of", "the", "DAGs", "in", "the", "test", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/scripts/perf/scheduler_ops_metrics.py#L169-L179", "partition": "test"}
{"repo": "apache/airflow", "path": "scripts/perf/scheduler_ops_metrics.py", "func_name": "SchedulerMetricsJob.print_stats", "original_string": "def print_stats(self):\n        \"\"\"\n        Print operational metrics for the scheduler test.\n        \"\"\"\n        session = settings.Session()\n        TI = TaskInstance\n        tis = (\n            session\n            .query(TI)\n            .filter(TI.dag_id.in_(DAG_IDS))\n            .all()\n        )\n        successful_tis = [x for x in tis if x.state == State.SUCCESS]\n        ti_perf = [(ti.dag_id, ti.task_id, ti.execution_date,\n                    (ti.queued_dttm - self.start_date).total_seconds(),\n                    (ti.start_date - self.start_date).total_seconds(),\n                    (ti.end_date - self.start_date).total_seconds(),\n                    ti.duration) for ti in successful_tis]\n        ti_perf_df = pd.DataFrame(ti_perf, columns=['dag_id', 'task_id',\n                                                    'execution_date',\n                                                    'queue_delay',\n                                                    'start_delay', 'land_time',\n                                                    'duration'])\n\n        print('Performance Results')\n        print('###################')\n        for dag_id in DAG_IDS:\n            print('DAG {}'.format(dag_id))\n            print(ti_perf_df[ti_perf_df['dag_id'] == dag_id])\n        print('###################')\n        if len(tis) > len(successful_tis):\n            print(\"WARNING!! The following task instances haven't completed\")\n            print(pd.DataFrame([(ti.dag_id, ti.task_id, ti.execution_date, ti.state)\n                  for ti in filter(lambda x: x.state != State.SUCCESS, tis)],\n                  columns=['dag_id', 'task_id', 'execution_date', 'state']))\n\n        session.commit()", "language": "python", "code": "def print_stats(self):\n        \"\"\"\n        Print operational metrics for the scheduler test.\n        \"\"\"\n        session = settings.Session()\n        TI = TaskInstance\n        tis = (\n            session\n            .query(TI)\n            .filter(TI.dag_id.in_(DAG_IDS))\n            .all()\n        )\n        successful_tis = [x for x in tis if x.state == State.SUCCESS]\n        ti_perf = [(ti.dag_id, ti.task_id, ti.execution_date,\n                    (ti.queued_dttm - self.start_date).total_seconds(),\n                    (ti.start_date - self.start_date).total_seconds(),\n                    (ti.end_date - self.start_date).total_seconds(),\n                    ti.duration) for ti in successful_tis]\n        ti_perf_df = pd.DataFrame(ti_perf, columns=['dag_id', 'task_id',\n                                                    'execution_date',\n                                                    'queue_delay',\n                                                    'start_delay', 'land_time',\n                                                    'duration'])\n\n        print('Performance Results')\n        print('###################')\n        for dag_id in DAG_IDS:\n            print('DAG {}'.format(dag_id))\n            print(ti_perf_df[ti_perf_df['dag_id'] == dag_id])\n        print('###################')\n        if len(tis) > len(successful_tis):\n            print(\"WARNING!! The following task instances haven't completed\")\n            print(pd.DataFrame([(ti.dag_id, ti.task_id, ti.execution_date, ti.state)\n                  for ti in filter(lambda x: x.state != State.SUCCESS, tis)],\n                  columns=['dag_id', 'task_id', 'execution_date', 'state']))\n\n        session.commit()", "code_tokens": ["def", "print_stats", "(", "self", ")", ":", "session", "=", "settings", ".", "Session", "(", ")", "TI", "=", "TaskInstance", "tis", "=", "(", "session", ".", "query", "(", "TI", ")", ".", "filter", "(", "TI", ".", "dag_id", ".", "in_", "(", "DAG_IDS", ")", ")", ".", "all", "(", ")", ")", "successful_tis", "=", "[", "x", "for", "x", "in", "tis", "if", "x", ".", "state", "==", "State", ".", "SUCCESS", "]", "ti_perf", "=", "[", "(", "ti", ".", "dag_id", ",", "ti", ".", "task_id", ",", "ti", ".", "execution_date", ",", "(", "ti", ".", "queued_dttm", "-", "self", ".", "start_date", ")", ".", "total_seconds", "(", ")", ",", "(", "ti", ".", "start_date", "-", "self", ".", "start_date", ")", ".", "total_seconds", "(", ")", ",", "(", "ti", ".", "end_date", "-", "self", ".", "start_date", ")", ".", "total_seconds", "(", ")", ",", "ti", ".", "duration", ")", "for", "ti", "in", "successful_tis", "]", "ti_perf_df", "=", "pd", ".", "DataFrame", "(", "ti_perf", ",", "columns", "=", "[", "'dag_id'", ",", "'task_id'", ",", "'execution_date'", ",", "'queue_delay'", ",", "'start_delay'", ",", "'land_time'", ",", "'duration'", "]", ")", "print", "(", "'Performance Results'", ")", "print", "(", "'###################'", ")", "for", "dag_id", "in", "DAG_IDS", ":", "print", "(", "'DAG {}'", ".", "format", "(", "dag_id", ")", ")", "print", "(", "ti_perf_df", "[", "ti_perf_df", "[", "'dag_id'", "]", "==", "dag_id", "]", ")", "print", "(", "'###################'", ")", "if", "len", "(", "tis", ")", ">", "len", "(", "successful_tis", ")", ":", "print", "(", "\"WARNING!! The following task instances haven't completed\"", ")", "print", "(", "pd", ".", "DataFrame", "(", "[", "(", "ti", ".", "dag_id", ",", "ti", ".", "task_id", ",", "ti", ".", "execution_date", ",", "ti", ".", "state", ")", "for", "ti", "in", "filter", "(", "lambda", "x", ":", "x", ".", "state", "!=", "State", ".", "SUCCESS", ",", "tis", ")", "]", ",", "columns", "=", "[", "'dag_id'", ",", "'task_id'", ",", "'execution_date'", ",", "'state'", "]", ")", ")", "session", ".", "commit", "(", ")"], "docstring": "Print operational metrics for the scheduler test.", "docstring_tokens": ["Print", "operational", "metrics", "for", "the", "scheduler", "test", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/scripts/perf/scheduler_ops_metrics.py#L65-L101", "partition": "test"}
{"repo": "apache/airflow", "path": "scripts/perf/scheduler_ops_metrics.py", "func_name": "SchedulerMetricsJob.heartbeat", "original_string": "def heartbeat(self):\n        \"\"\"\n        Override the scheduler heartbeat to determine when the test is complete\n        \"\"\"\n        super(SchedulerMetricsJob, self).heartbeat()\n        session = settings.Session()\n        # Get all the relevant task instances\n        TI = TaskInstance\n        successful_tis = (\n            session\n            .query(TI)\n            .filter(TI.dag_id.in_(DAG_IDS))\n            .filter(TI.state.in_([State.SUCCESS]))\n            .all()\n        )\n        session.commit()\n\n        dagbag = DagBag(SUBDIR)\n        dags = [dagbag.dags[dag_id] for dag_id in DAG_IDS]\n        # the tasks in perf_dag_1 and per_dag_2 have a daily schedule interval.\n        num_task_instances = sum([(timezone.utcnow() - task.start_date).days\n                                 for dag in dags for task in dag.tasks])\n\n        if (len(successful_tis) == num_task_instances or\n                (timezone.utcnow() - self.start_date).total_seconds() >\n                MAX_RUNTIME_SECS):\n            if len(successful_tis) == num_task_instances:\n                self.log.info(\"All tasks processed! Printing stats.\")\n            else:\n                self.log.info(\"Test timeout reached. Printing available stats.\")\n            self.print_stats()\n            set_dags_paused_state(True)\n            sys.exit()", "language": "python", "code": "def heartbeat(self):\n        \"\"\"\n        Override the scheduler heartbeat to determine when the test is complete\n        \"\"\"\n        super(SchedulerMetricsJob, self).heartbeat()\n        session = settings.Session()\n        # Get all the relevant task instances\n        TI = TaskInstance\n        successful_tis = (\n            session\n            .query(TI)\n            .filter(TI.dag_id.in_(DAG_IDS))\n            .filter(TI.state.in_([State.SUCCESS]))\n            .all()\n        )\n        session.commit()\n\n        dagbag = DagBag(SUBDIR)\n        dags = [dagbag.dags[dag_id] for dag_id in DAG_IDS]\n        # the tasks in perf_dag_1 and per_dag_2 have a daily schedule interval.\n        num_task_instances = sum([(timezone.utcnow() - task.start_date).days\n                                 for dag in dags for task in dag.tasks])\n\n        if (len(successful_tis) == num_task_instances or\n                (timezone.utcnow() - self.start_date).total_seconds() >\n                MAX_RUNTIME_SECS):\n            if len(successful_tis) == num_task_instances:\n                self.log.info(\"All tasks processed! Printing stats.\")\n            else:\n                self.log.info(\"Test timeout reached. Printing available stats.\")\n            self.print_stats()\n            set_dags_paused_state(True)\n            sys.exit()", "code_tokens": ["def", "heartbeat", "(", "self", ")", ":", "super", "(", "SchedulerMetricsJob", ",", "self", ")", ".", "heartbeat", "(", ")", "session", "=", "settings", ".", "Session", "(", ")", "# Get all the relevant task instances", "TI", "=", "TaskInstance", "successful_tis", "=", "(", "session", ".", "query", "(", "TI", ")", ".", "filter", "(", "TI", ".", "dag_id", ".", "in_", "(", "DAG_IDS", ")", ")", ".", "filter", "(", "TI", ".", "state", ".", "in_", "(", "[", "State", ".", "SUCCESS", "]", ")", ")", ".", "all", "(", ")", ")", "session", ".", "commit", "(", ")", "dagbag", "=", "DagBag", "(", "SUBDIR", ")", "dags", "=", "[", "dagbag", ".", "dags", "[", "dag_id", "]", "for", "dag_id", "in", "DAG_IDS", "]", "# the tasks in perf_dag_1 and per_dag_2 have a daily schedule interval.", "num_task_instances", "=", "sum", "(", "[", "(", "timezone", ".", "utcnow", "(", ")", "-", "task", ".", "start_date", ")", ".", "days", "for", "dag", "in", "dags", "for", "task", "in", "dag", ".", "tasks", "]", ")", "if", "(", "len", "(", "successful_tis", ")", "==", "num_task_instances", "or", "(", "timezone", ".", "utcnow", "(", ")", "-", "self", ".", "start_date", ")", ".", "total_seconds", "(", ")", ">", "MAX_RUNTIME_SECS", ")", ":", "if", "len", "(", "successful_tis", ")", "==", "num_task_instances", ":", "self", ".", "log", ".", "info", "(", "\"All tasks processed! Printing stats.\"", ")", "else", ":", "self", ".", "log", ".", "info", "(", "\"Test timeout reached. Printing available stats.\"", ")", "self", ".", "print_stats", "(", ")", "set_dags_paused_state", "(", "True", ")", "sys", ".", "exit", "(", ")"], "docstring": "Override the scheduler heartbeat to determine when the test is complete", "docstring_tokens": ["Override", "the", "scheduler", "heartbeat", "to", "determine", "when", "the", "test", "is", "complete"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/scripts/perf/scheduler_ops_metrics.py#L103-L135", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/aws_lambda_hook.py", "func_name": "AwsLambdaHook.invoke_lambda", "original_string": "def invoke_lambda(self, payload):\n        \"\"\"\n        Invoke Lambda Function\n        \"\"\"\n\n        awslambda_conn = self.get_conn()\n\n        response = awslambda_conn.invoke(\n            FunctionName=self.function_name,\n            InvocationType=self.invocation_type,\n            LogType=self.log_type,\n            Payload=payload,\n            Qualifier=self.qualifier\n        )\n\n        return response", "language": "python", "code": "def invoke_lambda(self, payload):\n        \"\"\"\n        Invoke Lambda Function\n        \"\"\"\n\n        awslambda_conn = self.get_conn()\n\n        response = awslambda_conn.invoke(\n            FunctionName=self.function_name,\n            InvocationType=self.invocation_type,\n            LogType=self.log_type,\n            Payload=payload,\n            Qualifier=self.qualifier\n        )\n\n        return response", "code_tokens": ["def", "invoke_lambda", "(", "self", ",", "payload", ")", ":", "awslambda_conn", "=", "self", ".", "get_conn", "(", ")", "response", "=", "awslambda_conn", ".", "invoke", "(", "FunctionName", "=", "self", ".", "function_name", ",", "InvocationType", "=", "self", ".", "invocation_type", ",", "LogType", "=", "self", ".", "log_type", ",", "Payload", "=", "payload", ",", "Qualifier", "=", "self", ".", "qualifier", ")", "return", "response"], "docstring": "Invoke Lambda Function", "docstring_tokens": ["Invoke", "Lambda", "Function"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/aws_lambda_hook.py#L53-L68", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/utils/mlengine_operator_utils.py", "func_name": "create_evaluate_ops", "original_string": "def create_evaluate_ops(task_prefix,\n                        data_format,\n                        input_paths,\n                        prediction_path,\n                        metric_fn_and_keys,\n                        validate_fn,\n                        batch_prediction_job_id=None,\n                        project_id=None,\n                        region=None,\n                        dataflow_options=None,\n                        model_uri=None,\n                        model_name=None,\n                        version_name=None,\n                        dag=None):\n    \"\"\"\n    Creates Operators needed for model evaluation and returns.\n\n    It gets prediction over inputs via Cloud ML Engine BatchPrediction API by\n    calling MLEngineBatchPredictionOperator, then summarize and validate\n    the result via Cloud Dataflow using DataFlowPythonOperator.\n\n    For details and pricing about Batch prediction, please refer to the website\n    https://cloud.google.com/ml-engine/docs/how-tos/batch-predict\n    and for Cloud Dataflow, https://cloud.google.com/dataflow/docs/\n\n    It returns three chained operators for prediction, summary, and validation,\n    named as <prefix>-prediction, <prefix>-summary, and <prefix>-validation,\n    respectively.\n    (<prefix> should contain only alphanumeric characters or hyphen.)\n\n    The upstream and downstream can be set accordingly like:\n      pred, _, val = create_evaluate_ops(...)\n      pred.set_upstream(upstream_op)\n      ...\n      downstream_op.set_upstream(val)\n\n    Callers will provide two python callables, metric_fn and validate_fn, in\n    order to customize the evaluation behavior as they wish.\n    - metric_fn receives a dictionary per instance derived from json in the\n      batch prediction result. The keys might vary depending on the model.\n      It should return a tuple of metrics.\n    - validation_fn receives a dictionary of the averaged metrics that metric_fn\n      generated over all instances.\n      The key/value of the dictionary matches to what's given by\n      metric_fn_and_keys arg.\n      The dictionary contains an additional metric, 'count' to represent the\n      total number of instances received for evaluation.\n      The function would raise an exception to mark the task as failed, in a\n      case the validation result is not okay to proceed (i.e. to set the trained\n      version as default).\n\n    Typical examples are like this:\n\n    def get_metric_fn_and_keys():\n        import math  # imports should be outside of the metric_fn below.\n        def error_and_squared_error(inst):\n            label = float(inst['input_label'])\n            classes = float(inst['classes'])  # 0 or 1\n            err = abs(classes-label)\n            squared_err = math.pow(classes-label, 2)\n            return (err, squared_err)  # returns a tuple.\n        return error_and_squared_error, ['err', 'mse']  # key order must match.\n\n    def validate_err_and_count(summary):\n        if summary['err'] > 0.2:\n            raise ValueError('Too high err>0.2; summary=%s' % summary)\n        if summary['mse'] > 0.05:\n            raise ValueError('Too high mse>0.05; summary=%s' % summary)\n        if summary['count'] < 1000:\n            raise ValueError('Too few instances<1000; summary=%s' % summary)\n        return summary\n\n    For the details on the other BatchPrediction-related arguments (project_id,\n    job_id, region, data_format, input_paths, prediction_path, model_uri),\n    please refer to MLEngineBatchPredictionOperator too.\n\n    :param task_prefix: a prefix for the tasks. Only alphanumeric characters and\n        hyphen are allowed (no underscores), since this will be used as dataflow\n        job name, which doesn't allow other characters.\n    :type task_prefix: str\n\n    :param data_format: either of 'TEXT', 'TF_RECORD', 'TF_RECORD_GZIP'\n    :type data_format: str\n\n    :param input_paths: a list of input paths to be sent to BatchPrediction.\n    :type input_paths: list[str]\n\n    :param prediction_path: GCS path to put the prediction results in.\n    :type prediction_path: str\n\n    :param metric_fn_and_keys: a tuple of metric_fn and metric_keys:\n        - metric_fn is a function that accepts a dictionary (for an instance),\n          and returns a tuple of metric(s) that it calculates.\n        - metric_keys is a list of strings to denote the key of each metric.\n    :type metric_fn_and_keys: tuple of a function and a list[str]\n\n    :param validate_fn: a function to validate whether the averaged metric(s) is\n        good enough to push the model.\n    :type validate_fn: function\n\n    :param batch_prediction_job_id: the id to use for the Cloud ML Batch\n        prediction job. Passed directly to the MLEngineBatchPredictionOperator as\n        the job_id argument.\n    :type batch_prediction_job_id: str\n\n    :param project_id: the Google Cloud Platform project id in which to execute\n        Cloud ML Batch Prediction and Dataflow jobs. If None, then the `dag`'s\n        `default_args['project_id']` will be used.\n    :type project_id: str\n\n    :param region: the Google Cloud Platform region in which to execute Cloud ML\n        Batch Prediction and Dataflow jobs. If None, then the `dag`'s\n        `default_args['region']` will be used.\n    :type region: str\n\n    :param dataflow_options: options to run Dataflow jobs. If None, then the\n        `dag`'s `default_args['dataflow_default_options']` will be used.\n    :type dataflow_options: dictionary\n\n    :param model_uri: GCS path of the model exported by Tensorflow using\n        tensorflow.estimator.export_savedmodel(). It cannot be used with\n        model_name or version_name below. See MLEngineBatchPredictionOperator for\n        more detail.\n    :type model_uri: str\n\n    :param model_name: Used to indicate a model to use for prediction. Can be\n        used in combination with version_name, but cannot be used together with\n        model_uri. See MLEngineBatchPredictionOperator for more detail. If None,\n        then the `dag`'s `default_args['model_name']` will be used.\n    :type model_name: str\n\n    :param version_name: Used to indicate a model version to use for prediction,\n        in combination with model_name. Cannot be used together with model_uri.\n        See MLEngineBatchPredictionOperator for more detail. If None, then the\n        `dag`'s `default_args['version_name']` will be used.\n    :type version_name: str\n\n    :param dag: The `DAG` to use for all Operators.\n    :type dag: airflow.models.DAG\n\n    :returns: a tuple of three operators, (prediction, summary, validation)\n    :rtype: tuple(DataFlowPythonOperator, DataFlowPythonOperator,\n                  PythonOperator)\n    \"\"\"\n\n    # Verify that task_prefix doesn't have any special characters except hyphen\n    # '-', which is the only allowed non-alphanumeric character by Dataflow.\n    if not re.match(r\"^[a-zA-Z][-A-Za-z0-9]*$\", task_prefix):\n        raise AirflowException(\n            \"Malformed task_id for DataFlowPythonOperator (only alphanumeric \"\n            \"and hyphens are allowed but got: \" + task_prefix)\n\n    metric_fn, metric_keys = metric_fn_and_keys\n    if not callable(metric_fn):\n        raise AirflowException(\"`metric_fn` param must be callable.\")\n    if not callable(validate_fn):\n        raise AirflowException(\"`validate_fn` param must be callable.\")\n\n    if dag is not None and dag.default_args is not None:\n        default_args = dag.default_args\n        project_id = project_id or default_args.get('project_id')\n        region = region or default_args.get('region')\n        model_name = model_name or default_args.get('model_name')\n        version_name = version_name or default_args.get('version_name')\n        dataflow_options = dataflow_options or \\\n            default_args.get('dataflow_default_options')\n\n    evaluate_prediction = MLEngineBatchPredictionOperator(\n        task_id=(task_prefix + \"-prediction\"),\n        project_id=project_id,\n        job_id=batch_prediction_job_id,\n        region=region,\n        data_format=data_format,\n        input_paths=input_paths,\n        output_path=prediction_path,\n        uri=model_uri,\n        model_name=model_name,\n        version_name=version_name,\n        dag=dag)\n\n    metric_fn_encoded = base64.b64encode(dill.dumps(metric_fn, recurse=True))\n    evaluate_summary = DataFlowPythonOperator(\n        task_id=(task_prefix + \"-summary\"),\n        py_options=[\"-m\"],\n        py_file=\"airflow.contrib.utils.mlengine_prediction_summary\",\n        dataflow_default_options=dataflow_options,\n        options={\n            \"prediction_path\": prediction_path,\n            \"metric_fn_encoded\": metric_fn_encoded,\n            \"metric_keys\": ','.join(metric_keys)\n        },\n        dag=dag)\n    evaluate_summary.set_upstream(evaluate_prediction)\n\n    def apply_validate_fn(*args, **kwargs):\n        prediction_path = kwargs[\"templates_dict\"][\"prediction_path\"]\n        scheme, bucket, obj, _, _ = urlsplit(prediction_path)\n        if scheme != \"gs\" or not bucket or not obj:\n            raise ValueError(\"Wrong format prediction_path: %s\",\n                             prediction_path)\n        summary = os.path.join(obj.strip(\"/\"),\n                               \"prediction.summary.json\")\n        gcs_hook = GoogleCloudStorageHook()\n        summary = json.loads(gcs_hook.download(bucket, summary))\n        return validate_fn(summary)\n\n    evaluate_validation = PythonOperator(\n        task_id=(task_prefix + \"-validation\"),\n        python_callable=apply_validate_fn,\n        provide_context=True,\n        templates_dict={\"prediction_path\": prediction_path},\n        dag=dag)\n    evaluate_validation.set_upstream(evaluate_summary)\n\n    return evaluate_prediction, evaluate_summary, evaluate_validation", "language": "python", "code": "def create_evaluate_ops(task_prefix,\n                        data_format,\n                        input_paths,\n                        prediction_path,\n                        metric_fn_and_keys,\n                        validate_fn,\n                        batch_prediction_job_id=None,\n                        project_id=None,\n                        region=None,\n                        dataflow_options=None,\n                        model_uri=None,\n                        model_name=None,\n                        version_name=None,\n                        dag=None):\n    \"\"\"\n    Creates Operators needed for model evaluation and returns.\n\n    It gets prediction over inputs via Cloud ML Engine BatchPrediction API by\n    calling MLEngineBatchPredictionOperator, then summarize and validate\n    the result via Cloud Dataflow using DataFlowPythonOperator.\n\n    For details and pricing about Batch prediction, please refer to the website\n    https://cloud.google.com/ml-engine/docs/how-tos/batch-predict\n    and for Cloud Dataflow, https://cloud.google.com/dataflow/docs/\n\n    It returns three chained operators for prediction, summary, and validation,\n    named as <prefix>-prediction, <prefix>-summary, and <prefix>-validation,\n    respectively.\n    (<prefix> should contain only alphanumeric characters or hyphen.)\n\n    The upstream and downstream can be set accordingly like:\n      pred, _, val = create_evaluate_ops(...)\n      pred.set_upstream(upstream_op)\n      ...\n      downstream_op.set_upstream(val)\n\n    Callers will provide two python callables, metric_fn and validate_fn, in\n    order to customize the evaluation behavior as they wish.\n    - metric_fn receives a dictionary per instance derived from json in the\n      batch prediction result. The keys might vary depending on the model.\n      It should return a tuple of metrics.\n    - validation_fn receives a dictionary of the averaged metrics that metric_fn\n      generated over all instances.\n      The key/value of the dictionary matches to what's given by\n      metric_fn_and_keys arg.\n      The dictionary contains an additional metric, 'count' to represent the\n      total number of instances received for evaluation.\n      The function would raise an exception to mark the task as failed, in a\n      case the validation result is not okay to proceed (i.e. to set the trained\n      version as default).\n\n    Typical examples are like this:\n\n    def get_metric_fn_and_keys():\n        import math  # imports should be outside of the metric_fn below.\n        def error_and_squared_error(inst):\n            label = float(inst['input_label'])\n            classes = float(inst['classes'])  # 0 or 1\n            err = abs(classes-label)\n            squared_err = math.pow(classes-label, 2)\n            return (err, squared_err)  # returns a tuple.\n        return error_and_squared_error, ['err', 'mse']  # key order must match.\n\n    def validate_err_and_count(summary):\n        if summary['err'] > 0.2:\n            raise ValueError('Too high err>0.2; summary=%s' % summary)\n        if summary['mse'] > 0.05:\n            raise ValueError('Too high mse>0.05; summary=%s' % summary)\n        if summary['count'] < 1000:\n            raise ValueError('Too few instances<1000; summary=%s' % summary)\n        return summary\n\n    For the details on the other BatchPrediction-related arguments (project_id,\n    job_id, region, data_format, input_paths, prediction_path, model_uri),\n    please refer to MLEngineBatchPredictionOperator too.\n\n    :param task_prefix: a prefix for the tasks. Only alphanumeric characters and\n        hyphen are allowed (no underscores), since this will be used as dataflow\n        job name, which doesn't allow other characters.\n    :type task_prefix: str\n\n    :param data_format: either of 'TEXT', 'TF_RECORD', 'TF_RECORD_GZIP'\n    :type data_format: str\n\n    :param input_paths: a list of input paths to be sent to BatchPrediction.\n    :type input_paths: list[str]\n\n    :param prediction_path: GCS path to put the prediction results in.\n    :type prediction_path: str\n\n    :param metric_fn_and_keys: a tuple of metric_fn and metric_keys:\n        - metric_fn is a function that accepts a dictionary (for an instance),\n          and returns a tuple of metric(s) that it calculates.\n        - metric_keys is a list of strings to denote the key of each metric.\n    :type metric_fn_and_keys: tuple of a function and a list[str]\n\n    :param validate_fn: a function to validate whether the averaged metric(s) is\n        good enough to push the model.\n    :type validate_fn: function\n\n    :param batch_prediction_job_id: the id to use for the Cloud ML Batch\n        prediction job. Passed directly to the MLEngineBatchPredictionOperator as\n        the job_id argument.\n    :type batch_prediction_job_id: str\n\n    :param project_id: the Google Cloud Platform project id in which to execute\n        Cloud ML Batch Prediction and Dataflow jobs. If None, then the `dag`'s\n        `default_args['project_id']` will be used.\n    :type project_id: str\n\n    :param region: the Google Cloud Platform region in which to execute Cloud ML\n        Batch Prediction and Dataflow jobs. If None, then the `dag`'s\n        `default_args['region']` will be used.\n    :type region: str\n\n    :param dataflow_options: options to run Dataflow jobs. If None, then the\n        `dag`'s `default_args['dataflow_default_options']` will be used.\n    :type dataflow_options: dictionary\n\n    :param model_uri: GCS path of the model exported by Tensorflow using\n        tensorflow.estimator.export_savedmodel(). It cannot be used with\n        model_name or version_name below. See MLEngineBatchPredictionOperator for\n        more detail.\n    :type model_uri: str\n\n    :param model_name: Used to indicate a model to use for prediction. Can be\n        used in combination with version_name, but cannot be used together with\n        model_uri. See MLEngineBatchPredictionOperator for more detail. If None,\n        then the `dag`'s `default_args['model_name']` will be used.\n    :type model_name: str\n\n    :param version_name: Used to indicate a model version to use for prediction,\n        in combination with model_name. Cannot be used together with model_uri.\n        See MLEngineBatchPredictionOperator for more detail. If None, then the\n        `dag`'s `default_args['version_name']` will be used.\n    :type version_name: str\n\n    :param dag: The `DAG` to use for all Operators.\n    :type dag: airflow.models.DAG\n\n    :returns: a tuple of three operators, (prediction, summary, validation)\n    :rtype: tuple(DataFlowPythonOperator, DataFlowPythonOperator,\n                  PythonOperator)\n    \"\"\"\n\n    # Verify that task_prefix doesn't have any special characters except hyphen\n    # '-', which is the only allowed non-alphanumeric character by Dataflow.\n    if not re.match(r\"^[a-zA-Z][-A-Za-z0-9]*$\", task_prefix):\n        raise AirflowException(\n            \"Malformed task_id for DataFlowPythonOperator (only alphanumeric \"\n            \"and hyphens are allowed but got: \" + task_prefix)\n\n    metric_fn, metric_keys = metric_fn_and_keys\n    if not callable(metric_fn):\n        raise AirflowException(\"`metric_fn` param must be callable.\")\n    if not callable(validate_fn):\n        raise AirflowException(\"`validate_fn` param must be callable.\")\n\n    if dag is not None and dag.default_args is not None:\n        default_args = dag.default_args\n        project_id = project_id or default_args.get('project_id')\n        region = region or default_args.get('region')\n        model_name = model_name or default_args.get('model_name')\n        version_name = version_name or default_args.get('version_name')\n        dataflow_options = dataflow_options or \\\n            default_args.get('dataflow_default_options')\n\n    evaluate_prediction = MLEngineBatchPredictionOperator(\n        task_id=(task_prefix + \"-prediction\"),\n        project_id=project_id,\n        job_id=batch_prediction_job_id,\n        region=region,\n        data_format=data_format,\n        input_paths=input_paths,\n        output_path=prediction_path,\n        uri=model_uri,\n        model_name=model_name,\n        version_name=version_name,\n        dag=dag)\n\n    metric_fn_encoded = base64.b64encode(dill.dumps(metric_fn, recurse=True))\n    evaluate_summary = DataFlowPythonOperator(\n        task_id=(task_prefix + \"-summary\"),\n        py_options=[\"-m\"],\n        py_file=\"airflow.contrib.utils.mlengine_prediction_summary\",\n        dataflow_default_options=dataflow_options,\n        options={\n            \"prediction_path\": prediction_path,\n            \"metric_fn_encoded\": metric_fn_encoded,\n            \"metric_keys\": ','.join(metric_keys)\n        },\n        dag=dag)\n    evaluate_summary.set_upstream(evaluate_prediction)\n\n    def apply_validate_fn(*args, **kwargs):\n        prediction_path = kwargs[\"templates_dict\"][\"prediction_path\"]\n        scheme, bucket, obj, _, _ = urlsplit(prediction_path)\n        if scheme != \"gs\" or not bucket or not obj:\n            raise ValueError(\"Wrong format prediction_path: %s\",\n                             prediction_path)\n        summary = os.path.join(obj.strip(\"/\"),\n                               \"prediction.summary.json\")\n        gcs_hook = GoogleCloudStorageHook()\n        summary = json.loads(gcs_hook.download(bucket, summary))\n        return validate_fn(summary)\n\n    evaluate_validation = PythonOperator(\n        task_id=(task_prefix + \"-validation\"),\n        python_callable=apply_validate_fn,\n        provide_context=True,\n        templates_dict={\"prediction_path\": prediction_path},\n        dag=dag)\n    evaluate_validation.set_upstream(evaluate_summary)\n\n    return evaluate_prediction, evaluate_summary, evaluate_validation", "code_tokens": ["def", "create_evaluate_ops", "(", "task_prefix", ",", "data_format", ",", "input_paths", ",", "prediction_path", ",", "metric_fn_and_keys", ",", "validate_fn", ",", "batch_prediction_job_id", "=", "None", ",", "project_id", "=", "None", ",", "region", "=", "None", ",", "dataflow_options", "=", "None", ",", "model_uri", "=", "None", ",", "model_name", "=", "None", ",", "version_name", "=", "None", ",", "dag", "=", "None", ")", ":", "# Verify that task_prefix doesn't have any special characters except hyphen", "# '-', which is the only allowed non-alphanumeric character by Dataflow.", "if", "not", "re", ".", "match", "(", "r\"^[a-zA-Z][-A-Za-z0-9]*$\"", ",", "task_prefix", ")", ":", "raise", "AirflowException", "(", "\"Malformed task_id for DataFlowPythonOperator (only alphanumeric \"", "\"and hyphens are allowed but got: \"", "+", "task_prefix", ")", "metric_fn", ",", "metric_keys", "=", "metric_fn_and_keys", "if", "not", "callable", "(", "metric_fn", ")", ":", "raise", "AirflowException", "(", "\"`metric_fn` param must be callable.\"", ")", "if", "not", "callable", "(", "validate_fn", ")", ":", "raise", "AirflowException", "(", "\"`validate_fn` param must be callable.\"", ")", "if", "dag", "is", "not", "None", "and", "dag", ".", "default_args", "is", "not", "None", ":", "default_args", "=", "dag", ".", "default_args", "project_id", "=", "project_id", "or", "default_args", ".", "get", "(", "'project_id'", ")", "region", "=", "region", "or", "default_args", ".", "get", "(", "'region'", ")", "model_name", "=", "model_name", "or", "default_args", ".", "get", "(", "'model_name'", ")", "version_name", "=", "version_name", "or", "default_args", ".", "get", "(", "'version_name'", ")", "dataflow_options", "=", "dataflow_options", "or", "default_args", ".", "get", "(", "'dataflow_default_options'", ")", "evaluate_prediction", "=", "MLEngineBatchPredictionOperator", "(", "task_id", "=", "(", "task_prefix", "+", "\"-prediction\"", ")", ",", "project_id", "=", "project_id", ",", "job_id", "=", "batch_prediction_job_id", ",", "region", "=", "region", ",", "data_format", "=", "data_format", ",", "input_paths", "=", "input_paths", ",", "output_path", "=", "prediction_path", ",", "uri", "=", "model_uri", ",", "model_name", "=", "model_name", ",", "version_name", "=", "version_name", ",", "dag", "=", "dag", ")", "metric_fn_encoded", "=", "base64", ".", "b64encode", "(", "dill", ".", "dumps", "(", "metric_fn", ",", "recurse", "=", "True", ")", ")", "evaluate_summary", "=", "DataFlowPythonOperator", "(", "task_id", "=", "(", "task_prefix", "+", "\"-summary\"", ")", ",", "py_options", "=", "[", "\"-m\"", "]", ",", "py_file", "=", "\"airflow.contrib.utils.mlengine_prediction_summary\"", ",", "dataflow_default_options", "=", "dataflow_options", ",", "options", "=", "{", "\"prediction_path\"", ":", "prediction_path", ",", "\"metric_fn_encoded\"", ":", "metric_fn_encoded", ",", "\"metric_keys\"", ":", "','", ".", "join", "(", "metric_keys", ")", "}", ",", "dag", "=", "dag", ")", "evaluate_summary", ".", "set_upstream", "(", "evaluate_prediction", ")", "def", "apply_validate_fn", "(", "*", "args", ",", "*", "*", "kwargs", ")", ":", "prediction_path", "=", "kwargs", "[", "\"templates_dict\"", "]", "[", "\"prediction_path\"", "]", "scheme", ",", "bucket", ",", "obj", ",", "_", ",", "_", "=", "urlsplit", "(", "prediction_path", ")", "if", "scheme", "!=", "\"gs\"", "or", "not", "bucket", "or", "not", "obj", ":", "raise", "ValueError", "(", "\"Wrong format prediction_path: %s\"", ",", "prediction_path", ")", "summary", "=", "os", ".", "path", ".", "join", "(", "obj", ".", "strip", "(", "\"/\"", ")", ",", "\"prediction.summary.json\"", ")", "gcs_hook", "=", "GoogleCloudStorageHook", "(", ")", "summary", "=", "json", ".", "loads", "(", "gcs_hook", ".", "download", "(", "bucket", ",", "summary", ")", ")", "return", "validate_fn", "(", "summary", ")", "evaluate_validation", "=", "PythonOperator", "(", "task_id", "=", "(", "task_prefix", "+", "\"-validation\"", ")", ",", "python_callable", "=", "apply_validate_fn", ",", "provide_context", "=", "True", ",", "templates_dict", "=", "{", "\"prediction_path\"", ":", "prediction_path", "}", ",", "dag", "=", "dag", ")", "evaluate_validation", ".", "set_upstream", "(", "evaluate_summary", ")", "return", "evaluate_prediction", ",", "evaluate_summary", ",", "evaluate_validation"], "docstring": "Creates Operators needed for model evaluation and returns.\n\n    It gets prediction over inputs via Cloud ML Engine BatchPrediction API by\n    calling MLEngineBatchPredictionOperator, then summarize and validate\n    the result via Cloud Dataflow using DataFlowPythonOperator.\n\n    For details and pricing about Batch prediction, please refer to the website\n    https://cloud.google.com/ml-engine/docs/how-tos/batch-predict\n    and for Cloud Dataflow, https://cloud.google.com/dataflow/docs/\n\n    It returns three chained operators for prediction, summary, and validation,\n    named as <prefix>-prediction, <prefix>-summary, and <prefix>-validation,\n    respectively.\n    (<prefix> should contain only alphanumeric characters or hyphen.)\n\n    The upstream and downstream can be set accordingly like:\n      pred, _, val = create_evaluate_ops(...)\n      pred.set_upstream(upstream_op)\n      ...\n      downstream_op.set_upstream(val)\n\n    Callers will provide two python callables, metric_fn and validate_fn, in\n    order to customize the evaluation behavior as they wish.\n    - metric_fn receives a dictionary per instance derived from json in the\n      batch prediction result. The keys might vary depending on the model.\n      It should return a tuple of metrics.\n    - validation_fn receives a dictionary of the averaged metrics that metric_fn\n      generated over all instances.\n      The key/value of the dictionary matches to what's given by\n      metric_fn_and_keys arg.\n      The dictionary contains an additional metric, 'count' to represent the\n      total number of instances received for evaluation.\n      The function would raise an exception to mark the task as failed, in a\n      case the validation result is not okay to proceed (i.e. to set the trained\n      version as default).\n\n    Typical examples are like this:\n\n    def get_metric_fn_and_keys():\n        import math  # imports should be outside of the metric_fn below.\n        def error_and_squared_error(inst):\n            label = float(inst['input_label'])\n            classes = float(inst['classes'])  # 0 or 1\n            err = abs(classes-label)\n            squared_err = math.pow(classes-label, 2)\n            return (err, squared_err)  # returns a tuple.\n        return error_and_squared_error, ['err', 'mse']  # key order must match.\n\n    def validate_err_and_count(summary):\n        if summary['err'] > 0.2:\n            raise ValueError('Too high err>0.2; summary=%s' % summary)\n        if summary['mse'] > 0.05:\n            raise ValueError('Too high mse>0.05; summary=%s' % summary)\n        if summary['count'] < 1000:\n            raise ValueError('Too few instances<1000; summary=%s' % summary)\n        return summary\n\n    For the details on the other BatchPrediction-related arguments (project_id,\n    job_id, region, data_format, input_paths, prediction_path, model_uri),\n    please refer to MLEngineBatchPredictionOperator too.\n\n    :param task_prefix: a prefix for the tasks. Only alphanumeric characters and\n        hyphen are allowed (no underscores), since this will be used as dataflow\n        job name, which doesn't allow other characters.\n    :type task_prefix: str\n\n    :param data_format: either of 'TEXT', 'TF_RECORD', 'TF_RECORD_GZIP'\n    :type data_format: str\n\n    :param input_paths: a list of input paths to be sent to BatchPrediction.\n    :type input_paths: list[str]\n\n    :param prediction_path: GCS path to put the prediction results in.\n    :type prediction_path: str\n\n    :param metric_fn_and_keys: a tuple of metric_fn and metric_keys:\n        - metric_fn is a function that accepts a dictionary (for an instance),\n          and returns a tuple of metric(s) that it calculates.\n        - metric_keys is a list of strings to denote the key of each metric.\n    :type metric_fn_and_keys: tuple of a function and a list[str]\n\n    :param validate_fn: a function to validate whether the averaged metric(s) is\n        good enough to push the model.\n    :type validate_fn: function\n\n    :param batch_prediction_job_id: the id to use for the Cloud ML Batch\n        prediction job. Passed directly to the MLEngineBatchPredictionOperator as\n        the job_id argument.\n    :type batch_prediction_job_id: str\n\n    :param project_id: the Google Cloud Platform project id in which to execute\n        Cloud ML Batch Prediction and Dataflow jobs. If None, then the `dag`'s\n        `default_args['project_id']` will be used.\n    :type project_id: str\n\n    :param region: the Google Cloud Platform region in which to execute Cloud ML\n        Batch Prediction and Dataflow jobs. If None, then the `dag`'s\n        `default_args['region']` will be used.\n    :type region: str\n\n    :param dataflow_options: options to run Dataflow jobs. If None, then the\n        `dag`'s `default_args['dataflow_default_options']` will be used.\n    :type dataflow_options: dictionary\n\n    :param model_uri: GCS path of the model exported by Tensorflow using\n        tensorflow.estimator.export_savedmodel(). It cannot be used with\n        model_name or version_name below. See MLEngineBatchPredictionOperator for\n        more detail.\n    :type model_uri: str\n\n    :param model_name: Used to indicate a model to use for prediction. Can be\n        used in combination with version_name, but cannot be used together with\n        model_uri. See MLEngineBatchPredictionOperator for more detail. If None,\n        then the `dag`'s `default_args['model_name']` will be used.\n    :type model_name: str\n\n    :param version_name: Used to indicate a model version to use for prediction,\n        in combination with model_name. Cannot be used together with model_uri.\n        See MLEngineBatchPredictionOperator for more detail. If None, then the\n        `dag`'s `default_args['version_name']` will be used.\n    :type version_name: str\n\n    :param dag: The `DAG` to use for all Operators.\n    :type dag: airflow.models.DAG\n\n    :returns: a tuple of three operators, (prediction, summary, validation)\n    :rtype: tuple(DataFlowPythonOperator, DataFlowPythonOperator,\n                  PythonOperator)", "docstring_tokens": ["Creates", "Operators", "needed", "for", "model", "evaluation", "and", "returns", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/utils/mlengine_operator_utils.py#L32-L246", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/utils/file.py", "func_name": "mkdirs", "original_string": "def mkdirs(path, mode):\n    \"\"\"\n    Creates the directory specified by path, creating intermediate directories\n    as necessary. If directory already exists, this is a no-op.\n\n    :param path: The directory to create\n    :type path: str\n    :param mode: The mode to give to the directory e.g. 0o755, ignores umask\n    :type mode: int\n    \"\"\"\n    try:\n        o_umask = os.umask(0)\n        os.makedirs(path, mode)\n    except OSError:\n        if not os.path.isdir(path):\n            raise\n    finally:\n        os.umask(o_umask)", "language": "python", "code": "def mkdirs(path, mode):\n    \"\"\"\n    Creates the directory specified by path, creating intermediate directories\n    as necessary. If directory already exists, this is a no-op.\n\n    :param path: The directory to create\n    :type path: str\n    :param mode: The mode to give to the directory e.g. 0o755, ignores umask\n    :type mode: int\n    \"\"\"\n    try:\n        o_umask = os.umask(0)\n        os.makedirs(path, mode)\n    except OSError:\n        if not os.path.isdir(path):\n            raise\n    finally:\n        os.umask(o_umask)", "code_tokens": ["def", "mkdirs", "(", "path", ",", "mode", ")", ":", "try", ":", "o_umask", "=", "os", ".", "umask", "(", "0", ")", "os", ".", "makedirs", "(", "path", ",", "mode", ")", "except", "OSError", ":", "if", "not", "os", ".", "path", ".", "isdir", "(", "path", ")", ":", "raise", "finally", ":", "os", ".", "umask", "(", "o_umask", ")"], "docstring": "Creates the directory specified by path, creating intermediate directories\n    as necessary. If directory already exists, this is a no-op.\n\n    :param path: The directory to create\n    :type path: str\n    :param mode: The mode to give to the directory e.g. 0o755, ignores umask\n    :type mode: int", "docstring_tokens": ["Creates", "the", "directory", "specified", "by", "path", "creating", "intermediate", "directories", "as", "necessary", ".", "If", "directory", "already", "exists", "this", "is", "a", "no", "-", "op", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/utils/file.py#L42-L59", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/operators/check_operator.py", "func_name": "_convert_to_float_if_possible", "original_string": "def _convert_to_float_if_possible(s):\n    \"\"\"\n    A small helper function to convert a string to a numeric value\n    if appropriate\n\n    :param s: the string to be converted\n    :type s: str\n    \"\"\"\n    try:\n        ret = float(s)\n    except (ValueError, TypeError):\n        ret = s\n    return ret", "language": "python", "code": "def _convert_to_float_if_possible(s):\n    \"\"\"\n    A small helper function to convert a string to a numeric value\n    if appropriate\n\n    :param s: the string to be converted\n    :type s: str\n    \"\"\"\n    try:\n        ret = float(s)\n    except (ValueError, TypeError):\n        ret = s\n    return ret", "code_tokens": ["def", "_convert_to_float_if_possible", "(", "s", ")", ":", "try", ":", "ret", "=", "float", "(", "s", ")", "except", "(", "ValueError", ",", "TypeError", ")", ":", "ret", "=", "s", "return", "ret"], "docstring": "A small helper function to convert a string to a numeric value\n    if appropriate\n\n    :param s: the string to be converted\n    :type s: str", "docstring_tokens": ["A", "small", "helper", "function", "to", "convert", "a", "string", "to", "a", "numeric", "value", "if", "appropriate"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/operators/check_operator.py#L98-L110", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/utils/timezone.py", "func_name": "make_aware", "original_string": "def make_aware(value, timezone=None):\n    \"\"\"\n    Make a naive datetime.datetime in a given time zone aware.\n\n    :param value: datetime\n    :param timezone: timezone\n    :return: localized datetime in settings.TIMEZONE or timezone\n\n    \"\"\"\n    if timezone is None:\n        timezone = TIMEZONE\n\n    # Check that we won't overwrite the timezone of an aware datetime.\n    if is_localized(value):\n        raise ValueError(\n            \"make_aware expects a naive datetime, got %s\" % value)\n    if hasattr(value, 'fold'):\n        # In case of python 3.6 we want to do the same that pendulum does for python3.5\n        # i.e in case we move clock back we want to schedule the run at the time of the second\n        # instance of the same clock time rather than the first one.\n        # Fold parameter has no impact in other cases so we can safely set it to 1 here\n        value = value.replace(fold=1)\n    if hasattr(timezone, 'localize'):\n        # This method is available for pytz time zones.\n        return timezone.localize(value)\n    elif hasattr(timezone, 'convert'):\n        # For pendulum\n        return timezone.convert(value)\n    else:\n        # This may be wrong around DST changes!\n        return value.replace(tzinfo=timezone)", "language": "python", "code": "def make_aware(value, timezone=None):\n    \"\"\"\n    Make a naive datetime.datetime in a given time zone aware.\n\n    :param value: datetime\n    :param timezone: timezone\n    :return: localized datetime in settings.TIMEZONE or timezone\n\n    \"\"\"\n    if timezone is None:\n        timezone = TIMEZONE\n\n    # Check that we won't overwrite the timezone of an aware datetime.\n    if is_localized(value):\n        raise ValueError(\n            \"make_aware expects a naive datetime, got %s\" % value)\n    if hasattr(value, 'fold'):\n        # In case of python 3.6 we want to do the same that pendulum does for python3.5\n        # i.e in case we move clock back we want to schedule the run at the time of the second\n        # instance of the same clock time rather than the first one.\n        # Fold parameter has no impact in other cases so we can safely set it to 1 here\n        value = value.replace(fold=1)\n    if hasattr(timezone, 'localize'):\n        # This method is available for pytz time zones.\n        return timezone.localize(value)\n    elif hasattr(timezone, 'convert'):\n        # For pendulum\n        return timezone.convert(value)\n    else:\n        # This may be wrong around DST changes!\n        return value.replace(tzinfo=timezone)", "code_tokens": ["def", "make_aware", "(", "value", ",", "timezone", "=", "None", ")", ":", "if", "timezone", "is", "None", ":", "timezone", "=", "TIMEZONE", "# Check that we won't overwrite the timezone of an aware datetime.", "if", "is_localized", "(", "value", ")", ":", "raise", "ValueError", "(", "\"make_aware expects a naive datetime, got %s\"", "%", "value", ")", "if", "hasattr", "(", "value", ",", "'fold'", ")", ":", "# In case of python 3.6 we want to do the same that pendulum does for python3.5", "# i.e in case we move clock back we want to schedule the run at the time of the second", "# instance of the same clock time rather than the first one.", "# Fold parameter has no impact in other cases so we can safely set it to 1 here", "value", "=", "value", ".", "replace", "(", "fold", "=", "1", ")", "if", "hasattr", "(", "timezone", ",", "'localize'", ")", ":", "# This method is available for pytz time zones.", "return", "timezone", ".", "localize", "(", "value", ")", "elif", "hasattr", "(", "timezone", ",", "'convert'", ")", ":", "# For pendulum", "return", "timezone", ".", "convert", "(", "value", ")", "else", ":", "# This may be wrong around DST changes!", "return", "value", ".", "replace", "(", "tzinfo", "=", "timezone", ")"], "docstring": "Make a naive datetime.datetime in a given time zone aware.\n\n    :param value: datetime\n    :param timezone: timezone\n    :return: localized datetime in settings.TIMEZONE or timezone", "docstring_tokens": ["Make", "a", "naive", "datetime", ".", "datetime", "in", "a", "given", "time", "zone", "aware", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/utils/timezone.py#L98-L128", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/utils/timezone.py", "func_name": "make_naive", "original_string": "def make_naive(value, timezone=None):\n    \"\"\"\n    Make an aware datetime.datetime naive in a given time zone.\n\n    :param value: datetime\n    :param timezone: timezone\n    :return: naive datetime\n    \"\"\"\n    if timezone is None:\n        timezone = TIMEZONE\n\n    # Emulate the behavior of astimezone() on Python < 3.6.\n    if is_naive(value):\n        raise ValueError(\"make_naive() cannot be applied to a naive datetime\")\n\n    o = value.astimezone(timezone)\n\n    # cross library compatibility\n    naive = dt.datetime(o.year,\n                        o.month,\n                        o.day,\n                        o.hour,\n                        o.minute,\n                        o.second,\n                        o.microsecond)\n\n    return naive", "language": "python", "code": "def make_naive(value, timezone=None):\n    \"\"\"\n    Make an aware datetime.datetime naive in a given time zone.\n\n    :param value: datetime\n    :param timezone: timezone\n    :return: naive datetime\n    \"\"\"\n    if timezone is None:\n        timezone = TIMEZONE\n\n    # Emulate the behavior of astimezone() on Python < 3.6.\n    if is_naive(value):\n        raise ValueError(\"make_naive() cannot be applied to a naive datetime\")\n\n    o = value.astimezone(timezone)\n\n    # cross library compatibility\n    naive = dt.datetime(o.year,\n                        o.month,\n                        o.day,\n                        o.hour,\n                        o.minute,\n                        o.second,\n                        o.microsecond)\n\n    return naive", "code_tokens": ["def", "make_naive", "(", "value", ",", "timezone", "=", "None", ")", ":", "if", "timezone", "is", "None", ":", "timezone", "=", "TIMEZONE", "# Emulate the behavior of astimezone() on Python < 3.6.", "if", "is_naive", "(", "value", ")", ":", "raise", "ValueError", "(", "\"make_naive() cannot be applied to a naive datetime\"", ")", "o", "=", "value", ".", "astimezone", "(", "timezone", ")", "# cross library compatibility", "naive", "=", "dt", ".", "datetime", "(", "o", ".", "year", ",", "o", ".", "month", ",", "o", ".", "day", ",", "o", ".", "hour", ",", "o", ".", "minute", ",", "o", ".", "second", ",", "o", ".", "microsecond", ")", "return", "naive"], "docstring": "Make an aware datetime.datetime naive in a given time zone.\n\n    :param value: datetime\n    :param timezone: timezone\n    :return: naive datetime", "docstring_tokens": ["Make", "an", "aware", "datetime", ".", "datetime", "naive", "in", "a", "given", "time", "zone", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/utils/timezone.py#L131-L157", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/utils/timezone.py", "func_name": "datetime", "original_string": "def datetime(*args, **kwargs):\n    \"\"\"\n    Wrapper around datetime.datetime that adds settings.TIMEZONE if tzinfo not specified\n\n    :return: datetime.datetime\n    \"\"\"\n    if 'tzinfo' not in kwargs:\n        kwargs['tzinfo'] = TIMEZONE\n\n    return dt.datetime(*args, **kwargs)", "language": "python", "code": "def datetime(*args, **kwargs):\n    \"\"\"\n    Wrapper around datetime.datetime that adds settings.TIMEZONE if tzinfo not specified\n\n    :return: datetime.datetime\n    \"\"\"\n    if 'tzinfo' not in kwargs:\n        kwargs['tzinfo'] = TIMEZONE\n\n    return dt.datetime(*args, **kwargs)", "code_tokens": ["def", "datetime", "(", "*", "args", ",", "*", "*", "kwargs", ")", ":", "if", "'tzinfo'", "not", "in", "kwargs", ":", "kwargs", "[", "'tzinfo'", "]", "=", "TIMEZONE", "return", "dt", ".", "datetime", "(", "*", "args", ",", "*", "*", "kwargs", ")"], "docstring": "Wrapper around datetime.datetime that adds settings.TIMEZONE if tzinfo not specified\n\n    :return: datetime.datetime", "docstring_tokens": ["Wrapper", "around", "datetime", ".", "datetime", "that", "adds", "settings", ".", "TIMEZONE", "if", "tzinfo", "not", "specified"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/utils/timezone.py#L160-L169", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/druid_hook.py", "func_name": "DruidDbApiHook.get_conn", "original_string": "def get_conn(self):\n        \"\"\"\n        Establish a connection to druid broker.\n        \"\"\"\n        conn = self.get_connection(self.druid_broker_conn_id)\n        druid_broker_conn = connect(\n            host=conn.host,\n            port=conn.port,\n            path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'),\n            scheme=conn.extra_dejson.get('schema', 'http')\n        )\n        self.log.info('Get the connection to druid broker on %s', conn.host)\n        return druid_broker_conn", "language": "python", "code": "def get_conn(self):\n        \"\"\"\n        Establish a connection to druid broker.\n        \"\"\"\n        conn = self.get_connection(self.druid_broker_conn_id)\n        druid_broker_conn = connect(\n            host=conn.host,\n            port=conn.port,\n            path=conn.extra_dejson.get('endpoint', '/druid/v2/sql'),\n            scheme=conn.extra_dejson.get('schema', 'http')\n        )\n        self.log.info('Get the connection to druid broker on %s', conn.host)\n        return druid_broker_conn", "code_tokens": ["def", "get_conn", "(", "self", ")", ":", "conn", "=", "self", ".", "get_connection", "(", "self", ".", "druid_broker_conn_id", ")", "druid_broker_conn", "=", "connect", "(", "host", "=", "conn", ".", "host", ",", "port", "=", "conn", ".", "port", ",", "path", "=", "conn", ".", "extra_dejson", ".", "get", "(", "'endpoint'", ",", "'/druid/v2/sql'", ")", ",", "scheme", "=", "conn", ".", "extra_dejson", ".", "get", "(", "'schema'", ",", "'http'", ")", ")", "self", ".", "log", ".", "info", "(", "'Get the connection to druid broker on %s'", ",", "conn", ".", "host", ")", "return", "druid_broker_conn"], "docstring": "Establish a connection to druid broker.", "docstring_tokens": ["Establish", "a", "connection", "to", "druid", "broker", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/druid_hook.py#L127-L139", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/http_hook.py", "func_name": "HttpHook.get_conn", "original_string": "def get_conn(self, headers=None):\n        \"\"\"\n        Returns http session for use with requests\n\n        :param headers: additional headers to be passed through as a dictionary\n        :type headers: dict\n        \"\"\"\n        session = requests.Session()\n        if self.http_conn_id:\n            conn = self.get_connection(self.http_conn_id)\n\n            if \"://\" in conn.host:\n                self.base_url = conn.host\n            else:\n                # schema defaults to HTTP\n                schema = conn.schema if conn.schema else \"http\"\n                self.base_url = schema + \"://\" + conn.host\n\n            if conn.port:\n                self.base_url = self.base_url + \":\" + str(conn.port)\n            if conn.login:\n                session.auth = (conn.login, conn.password)\n            if conn.extra:\n                try:\n                    session.headers.update(conn.extra_dejson)\n                except TypeError:\n                    self.log.warn('Connection to %s has invalid extra field.', conn.host)\n        if headers:\n            session.headers.update(headers)\n\n        return session", "language": "python", "code": "def get_conn(self, headers=None):\n        \"\"\"\n        Returns http session for use with requests\n\n        :param headers: additional headers to be passed through as a dictionary\n        :type headers: dict\n        \"\"\"\n        session = requests.Session()\n        if self.http_conn_id:\n            conn = self.get_connection(self.http_conn_id)\n\n            if \"://\" in conn.host:\n                self.base_url = conn.host\n            else:\n                # schema defaults to HTTP\n                schema = conn.schema if conn.schema else \"http\"\n                self.base_url = schema + \"://\" + conn.host\n\n            if conn.port:\n                self.base_url = self.base_url + \":\" + str(conn.port)\n            if conn.login:\n                session.auth = (conn.login, conn.password)\n            if conn.extra:\n                try:\n                    session.headers.update(conn.extra_dejson)\n                except TypeError:\n                    self.log.warn('Connection to %s has invalid extra field.', conn.host)\n        if headers:\n            session.headers.update(headers)\n\n        return session", "code_tokens": ["def", "get_conn", "(", "self", ",", "headers", "=", "None", ")", ":", "session", "=", "requests", ".", "Session", "(", ")", "if", "self", ".", "http_conn_id", ":", "conn", "=", "self", ".", "get_connection", "(", "self", ".", "http_conn_id", ")", "if", "\"://\"", "in", "conn", ".", "host", ":", "self", ".", "base_url", "=", "conn", ".", "host", "else", ":", "# schema defaults to HTTP", "schema", "=", "conn", ".", "schema", "if", "conn", ".", "schema", "else", "\"http\"", "self", ".", "base_url", "=", "schema", "+", "\"://\"", "+", "conn", ".", "host", "if", "conn", ".", "port", ":", "self", ".", "base_url", "=", "self", ".", "base_url", "+", "\":\"", "+", "str", "(", "conn", ".", "port", ")", "if", "conn", ".", "login", ":", "session", ".", "auth", "=", "(", "conn", ".", "login", ",", "conn", ".", "password", ")", "if", "conn", ".", "extra", ":", "try", ":", "session", ".", "headers", ".", "update", "(", "conn", ".", "extra_dejson", ")", "except", "TypeError", ":", "self", ".", "log", ".", "warn", "(", "'Connection to %s has invalid extra field.'", ",", "conn", ".", "host", ")", "if", "headers", ":", "session", ".", "headers", ".", "update", "(", "headers", ")", "return", "session"], "docstring": "Returns http session for use with requests\n\n        :param headers: additional headers to be passed through as a dictionary\n        :type headers: dict", "docstring_tokens": ["Returns", "http", "session", "for", "use", "with", "requests"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/http_hook.py#L53-L83", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/http_hook.py", "func_name": "HttpHook.run", "original_string": "def run(self, endpoint, data=None, headers=None, extra_options=None):\n        \"\"\"\n        Performs the request\n\n        :param endpoint: the endpoint to be called i.e. resource/v1/query?\n        :type endpoint: str\n        :param data: payload to be uploaded or request parameters\n        :type data: dict\n        :param headers: additional headers to be passed through as a dictionary\n        :type headers: dict\n        :param extra_options: additional options to be used when executing the request\n            i.e. {'check_response': False} to avoid checking raising exceptions on non\n            2XX or 3XX status codes\n        :type extra_options: dict\n        \"\"\"\n        extra_options = extra_options or {}\n\n        session = self.get_conn(headers)\n\n        if self.base_url and not self.base_url.endswith('/') and \\\n           endpoint and not endpoint.startswith('/'):\n            url = self.base_url + '/' + endpoint\n        else:\n            url = (self.base_url or '') + (endpoint or '')\n\n        req = None\n        if self.method == 'GET':\n            # GET uses params\n            req = requests.Request(self.method,\n                                   url,\n                                   params=data,\n                                   headers=headers)\n        elif self.method == 'HEAD':\n            # HEAD doesn't use params\n            req = requests.Request(self.method,\n                                   url,\n                                   headers=headers)\n        else:\n            # Others use data\n            req = requests.Request(self.method,\n                                   url,\n                                   data=data,\n                                   headers=headers)\n\n        prepped_request = session.prepare_request(req)\n        self.log.info(\"Sending '%s' to url: %s\", self.method, url)\n        return self.run_and_check(session, prepped_request, extra_options)", "language": "python", "code": "def run(self, endpoint, data=None, headers=None, extra_options=None):\n        \"\"\"\n        Performs the request\n\n        :param endpoint: the endpoint to be called i.e. resource/v1/query?\n        :type endpoint: str\n        :param data: payload to be uploaded or request parameters\n        :type data: dict\n        :param headers: additional headers to be passed through as a dictionary\n        :type headers: dict\n        :param extra_options: additional options to be used when executing the request\n            i.e. {'check_response': False} to avoid checking raising exceptions on non\n            2XX or 3XX status codes\n        :type extra_options: dict\n        \"\"\"\n        extra_options = extra_options or {}\n\n        session = self.get_conn(headers)\n\n        if self.base_url and not self.base_url.endswith('/') and \\\n           endpoint and not endpoint.startswith('/'):\n            url = self.base_url + '/' + endpoint\n        else:\n            url = (self.base_url or '') + (endpoint or '')\n\n        req = None\n        if self.method == 'GET':\n            # GET uses params\n            req = requests.Request(self.method,\n                                   url,\n                                   params=data,\n                                   headers=headers)\n        elif self.method == 'HEAD':\n            # HEAD doesn't use params\n            req = requests.Request(self.method,\n                                   url,\n                                   headers=headers)\n        else:\n            # Others use data\n            req = requests.Request(self.method,\n                                   url,\n                                   data=data,\n                                   headers=headers)\n\n        prepped_request = session.prepare_request(req)\n        self.log.info(\"Sending '%s' to url: %s\", self.method, url)\n        return self.run_and_check(session, prepped_request, extra_options)", "code_tokens": ["def", "run", "(", "self", ",", "endpoint", ",", "data", "=", "None", ",", "headers", "=", "None", ",", "extra_options", "=", "None", ")", ":", "extra_options", "=", "extra_options", "or", "{", "}", "session", "=", "self", ".", "get_conn", "(", "headers", ")", "if", "self", ".", "base_url", "and", "not", "self", ".", "base_url", ".", "endswith", "(", "'/'", ")", "and", "endpoint", "and", "not", "endpoint", ".", "startswith", "(", "'/'", ")", ":", "url", "=", "self", ".", "base_url", "+", "'/'", "+", "endpoint", "else", ":", "url", "=", "(", "self", ".", "base_url", "or", "''", ")", "+", "(", "endpoint", "or", "''", ")", "req", "=", "None", "if", "self", ".", "method", "==", "'GET'", ":", "# GET uses params", "req", "=", "requests", ".", "Request", "(", "self", ".", "method", ",", "url", ",", "params", "=", "data", ",", "headers", "=", "headers", ")", "elif", "self", ".", "method", "==", "'HEAD'", ":", "# HEAD doesn't use params", "req", "=", "requests", ".", "Request", "(", "self", ".", "method", ",", "url", ",", "headers", "=", "headers", ")", "else", ":", "# Others use data", "req", "=", "requests", ".", "Request", "(", "self", ".", "method", ",", "url", ",", "data", "=", "data", ",", "headers", "=", "headers", ")", "prepped_request", "=", "session", ".", "prepare_request", "(", "req", ")", "self", ".", "log", ".", "info", "(", "\"Sending '%s' to url: %s\"", ",", "self", ".", "method", ",", "url", ")", "return", "self", ".", "run_and_check", "(", "session", ",", "prepped_request", ",", "extra_options", ")"], "docstring": "Performs the request\n\n        :param endpoint: the endpoint to be called i.e. resource/v1/query?\n        :type endpoint: str\n        :param data: payload to be uploaded or request parameters\n        :type data: dict\n        :param headers: additional headers to be passed through as a dictionary\n        :type headers: dict\n        :param extra_options: additional options to be used when executing the request\n            i.e. {'check_response': False} to avoid checking raising exceptions on non\n            2XX or 3XX status codes\n        :type extra_options: dict", "docstring_tokens": ["Performs", "the", "request"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/http_hook.py#L85-L131", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/http_hook.py", "func_name": "HttpHook.check_response", "original_string": "def check_response(self, response):\n        \"\"\"\n        Checks the status code and raise an AirflowException exception on non 2XX or 3XX\n        status codes\n\n        :param response: A requests response object\n        :type response: requests.response\n        \"\"\"\n        try:\n            response.raise_for_status()\n        except requests.exceptions.HTTPError:\n            self.log.error(\"HTTP error: %s\", response.reason)\n            if self.method not in ['GET', 'HEAD']:\n                self.log.error(response.text)\n            raise AirflowException(str(response.status_code) + \":\" + response.reason)", "language": "python", "code": "def check_response(self, response):\n        \"\"\"\n        Checks the status code and raise an AirflowException exception on non 2XX or 3XX\n        status codes\n\n        :param response: A requests response object\n        :type response: requests.response\n        \"\"\"\n        try:\n            response.raise_for_status()\n        except requests.exceptions.HTTPError:\n            self.log.error(\"HTTP error: %s\", response.reason)\n            if self.method not in ['GET', 'HEAD']:\n                self.log.error(response.text)\n            raise AirflowException(str(response.status_code) + \":\" + response.reason)", "code_tokens": ["def", "check_response", "(", "self", ",", "response", ")", ":", "try", ":", "response", ".", "raise_for_status", "(", ")", "except", "requests", ".", "exceptions", ".", "HTTPError", ":", "self", ".", "log", ".", "error", "(", "\"HTTP error: %s\"", ",", "response", ".", "reason", ")", "if", "self", ".", "method", "not", "in", "[", "'GET'", ",", "'HEAD'", "]", ":", "self", ".", "log", ".", "error", "(", "response", ".", "text", ")", "raise", "AirflowException", "(", "str", "(", "response", ".", "status_code", ")", "+", "\":\"", "+", "response", ".", "reason", ")"], "docstring": "Checks the status code and raise an AirflowException exception on non 2XX or 3XX\n        status codes\n\n        :param response: A requests response object\n        :type response: requests.response", "docstring_tokens": ["Checks", "the", "status", "code", "and", "raise", "an", "AirflowException", "exception", "on", "non", "2XX", "or", "3XX", "status", "codes"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/http_hook.py#L133-L147", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/http_hook.py", "func_name": "HttpHook.run_and_check", "original_string": "def run_and_check(self, session, prepped_request, extra_options):\n        \"\"\"\n        Grabs extra options like timeout and actually runs the request,\n        checking for the result\n\n        :param session: the session to be used to execute the request\n        :type session: requests.Session\n        :param prepped_request: the prepared request generated in run()\n        :type prepped_request: session.prepare_request\n        :param extra_options: additional options to be used when executing the request\n            i.e. {'check_response': False} to avoid checking raising exceptions on non 2XX\n            or 3XX status codes\n        :type extra_options: dict\n        \"\"\"\n        extra_options = extra_options or {}\n\n        try:\n            response = session.send(\n                prepped_request,\n                stream=extra_options.get(\"stream\", False),\n                verify=extra_options.get(\"verify\", True),\n                proxies=extra_options.get(\"proxies\", {}),\n                cert=extra_options.get(\"cert\"),\n                timeout=extra_options.get(\"timeout\"),\n                allow_redirects=extra_options.get(\"allow_redirects\", True))\n\n            if extra_options.get('check_response', True):\n                self.check_response(response)\n            return response\n\n        except requests.exceptions.ConnectionError as ex:\n            self.log.warn(str(ex) + ' Tenacity will retry to execute the operation')\n            raise ex", "language": "python", "code": "def run_and_check(self, session, prepped_request, extra_options):\n        \"\"\"\n        Grabs extra options like timeout and actually runs the request,\n        checking for the result\n\n        :param session: the session to be used to execute the request\n        :type session: requests.Session\n        :param prepped_request: the prepared request generated in run()\n        :type prepped_request: session.prepare_request\n        :param extra_options: additional options to be used when executing the request\n            i.e. {'check_response': False} to avoid checking raising exceptions on non 2XX\n            or 3XX status codes\n        :type extra_options: dict\n        \"\"\"\n        extra_options = extra_options or {}\n\n        try:\n            response = session.send(\n                prepped_request,\n                stream=extra_options.get(\"stream\", False),\n                verify=extra_options.get(\"verify\", True),\n                proxies=extra_options.get(\"proxies\", {}),\n                cert=extra_options.get(\"cert\"),\n                timeout=extra_options.get(\"timeout\"),\n                allow_redirects=extra_options.get(\"allow_redirects\", True))\n\n            if extra_options.get('check_response', True):\n                self.check_response(response)\n            return response\n\n        except requests.exceptions.ConnectionError as ex:\n            self.log.warn(str(ex) + ' Tenacity will retry to execute the operation')\n            raise ex", "code_tokens": ["def", "run_and_check", "(", "self", ",", "session", ",", "prepped_request", ",", "extra_options", ")", ":", "extra_options", "=", "extra_options", "or", "{", "}", "try", ":", "response", "=", "session", ".", "send", "(", "prepped_request", ",", "stream", "=", "extra_options", ".", "get", "(", "\"stream\"", ",", "False", ")", ",", "verify", "=", "extra_options", ".", "get", "(", "\"verify\"", ",", "True", ")", ",", "proxies", "=", "extra_options", ".", "get", "(", "\"proxies\"", ",", "{", "}", ")", ",", "cert", "=", "extra_options", ".", "get", "(", "\"cert\"", ")", ",", "timeout", "=", "extra_options", ".", "get", "(", "\"timeout\"", ")", ",", "allow_redirects", "=", "extra_options", ".", "get", "(", "\"allow_redirects\"", ",", "True", ")", ")", "if", "extra_options", ".", "get", "(", "'check_response'", ",", "True", ")", ":", "self", ".", "check_response", "(", "response", ")", "return", "response", "except", "requests", ".", "exceptions", ".", "ConnectionError", "as", "ex", ":", "self", ".", "log", ".", "warn", "(", "str", "(", "ex", ")", "+", "' Tenacity will retry to execute the operation'", ")", "raise", "ex"], "docstring": "Grabs extra options like timeout and actually runs the request,\n        checking for the result\n\n        :param session: the session to be used to execute the request\n        :type session: requests.Session\n        :param prepped_request: the prepared request generated in run()\n        :type prepped_request: session.prepare_request\n        :param extra_options: additional options to be used when executing the request\n            i.e. {'check_response': False} to avoid checking raising exceptions on non 2XX\n            or 3XX status codes\n        :type extra_options: dict", "docstring_tokens": ["Grabs", "extra", "options", "like", "timeout", "and", "actually", "runs", "the", "request", "checking", "for", "the", "result"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/http_hook.py#L149-L181", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/utils/db.py", "func_name": "create_session", "original_string": "def create_session():\n    \"\"\"\n    Contextmanager that will create and teardown a session.\n    \"\"\"\n    session = settings.Session()\n    try:\n        yield session\n        session.commit()\n    except Exception:\n        session.rollback()\n        raise\n    finally:\n        session.close()", "language": "python", "code": "def create_session():\n    \"\"\"\n    Contextmanager that will create and teardown a session.\n    \"\"\"\n    session = settings.Session()\n    try:\n        yield session\n        session.commit()\n    except Exception:\n        session.rollback()\n        raise\n    finally:\n        session.close()", "code_tokens": ["def", "create_session", "(", ")", ":", "session", "=", "settings", ".", "Session", "(", ")", "try", ":", "yield", "session", "session", ".", "commit", "(", ")", "except", "Exception", ":", "session", ".", "rollback", "(", ")", "raise", "finally", ":", "session", ".", "close", "(", ")"], "docstring": "Contextmanager that will create and teardown a session.", "docstring_tokens": ["Contextmanager", "that", "will", "create", "and", "teardown", "a", "session", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/utils/db.py#L32-L44", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/utils/db.py", "func_name": "provide_session", "original_string": "def provide_session(func):\n    \"\"\"\n    Function decorator that provides a session if it isn't provided.\n    If you want to reuse a session or run the function as part of a\n    database transaction, you pass it to the function, if not this wrapper\n    will create one and close it for you.\n    \"\"\"\n    @wraps(func)\n    def wrapper(*args, **kwargs):\n        arg_session = 'session'\n\n        func_params = func.__code__.co_varnames\n        session_in_args = arg_session in func_params and \\\n            func_params.index(arg_session) < len(args)\n        session_in_kwargs = arg_session in kwargs\n\n        if session_in_kwargs or session_in_args:\n            return func(*args, **kwargs)\n        else:\n            with create_session() as session:\n                kwargs[arg_session] = session\n                return func(*args, **kwargs)\n\n    return wrapper", "language": "python", "code": "def provide_session(func):\n    \"\"\"\n    Function decorator that provides a session if it isn't provided.\n    If you want to reuse a session or run the function as part of a\n    database transaction, you pass it to the function, if not this wrapper\n    will create one and close it for you.\n    \"\"\"\n    @wraps(func)\n    def wrapper(*args, **kwargs):\n        arg_session = 'session'\n\n        func_params = func.__code__.co_varnames\n        session_in_args = arg_session in func_params and \\\n            func_params.index(arg_session) < len(args)\n        session_in_kwargs = arg_session in kwargs\n\n        if session_in_kwargs or session_in_args:\n            return func(*args, **kwargs)\n        else:\n            with create_session() as session:\n                kwargs[arg_session] = session\n                return func(*args, **kwargs)\n\n    return wrapper", "code_tokens": ["def", "provide_session", "(", "func", ")", ":", "@", "wraps", "(", "func", ")", "def", "wrapper", "(", "*", "args", ",", "*", "*", "kwargs", ")", ":", "arg_session", "=", "'session'", "func_params", "=", "func", ".", "__code__", ".", "co_varnames", "session_in_args", "=", "arg_session", "in", "func_params", "and", "func_params", ".", "index", "(", "arg_session", ")", "<", "len", "(", "args", ")", "session_in_kwargs", "=", "arg_session", "in", "kwargs", "if", "session_in_kwargs", "or", "session_in_args", ":", "return", "func", "(", "*", "args", ",", "*", "*", "kwargs", ")", "else", ":", "with", "create_session", "(", ")", "as", "session", ":", "kwargs", "[", "arg_session", "]", "=", "session", "return", "func", "(", "*", "args", ",", "*", "*", "kwargs", ")", "return", "wrapper"], "docstring": "Function decorator that provides a session if it isn't provided.\n    If you want to reuse a session or run the function as part of a\n    database transaction, you pass it to the function, if not this wrapper\n    will create one and close it for you.", "docstring_tokens": ["Function", "decorator", "that", "provides", "a", "session", "if", "it", "isn", "t", "provided", ".", "If", "you", "want", "to", "reuse", "a", "session", "or", "run", "the", "function", "as", "part", "of", "a", "database", "transaction", "you", "pass", "it", "to", "the", "function", "if", "not", "this", "wrapper", "will", "create", "one", "and", "close", "it", "for", "you", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/utils/db.py#L47-L70", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/utils/db.py", "func_name": "resetdb", "original_string": "def resetdb():\n    \"\"\"\n    Clear out the database\n    \"\"\"\n    from airflow import models\n\n    # alembic adds significant import time, so we import it lazily\n    from alembic.migration import MigrationContext\n\n    log.info(\"Dropping tables that exist\")\n\n    models.base.Base.metadata.drop_all(settings.engine)\n    mc = MigrationContext.configure(settings.engine)\n    if mc._version.exists(settings.engine):\n        mc._version.drop(settings.engine)\n\n    from flask_appbuilder.models.sqla import Base\n    Base.metadata.drop_all(settings.engine)\n\n    initdb()", "language": "python", "code": "def resetdb():\n    \"\"\"\n    Clear out the database\n    \"\"\"\n    from airflow import models\n\n    # alembic adds significant import time, so we import it lazily\n    from alembic.migration import MigrationContext\n\n    log.info(\"Dropping tables that exist\")\n\n    models.base.Base.metadata.drop_all(settings.engine)\n    mc = MigrationContext.configure(settings.engine)\n    if mc._version.exists(settings.engine):\n        mc._version.drop(settings.engine)\n\n    from flask_appbuilder.models.sqla import Base\n    Base.metadata.drop_all(settings.engine)\n\n    initdb()", "code_tokens": ["def", "resetdb", "(", ")", ":", "from", "airflow", "import", "models", "# alembic adds significant import time, so we import it lazily", "from", "alembic", ".", "migration", "import", "MigrationContext", "log", ".", "info", "(", "\"Dropping tables that exist\"", ")", "models", ".", "base", ".", "Base", ".", "metadata", ".", "drop_all", "(", "settings", ".", "engine", ")", "mc", "=", "MigrationContext", ".", "configure", "(", "settings", ".", "engine", ")", "if", "mc", ".", "_version", ".", "exists", "(", "settings", ".", "engine", ")", ":", "mc", ".", "_version", ".", "drop", "(", "settings", ".", "engine", ")", "from", "flask_appbuilder", ".", "models", ".", "sqla", "import", "Base", "Base", ".", "metadata", ".", "drop_all", "(", "settings", ".", "engine", ")", "initdb", "(", ")"], "docstring": "Clear out the database", "docstring_tokens": ["Clear", "out", "the", "database"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/utils/db.py#L312-L331", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/presto_hook.py", "func_name": "PrestoHook._get_pretty_exception_message", "original_string": "def _get_pretty_exception_message(e):\n        \"\"\"\n        Parses some DatabaseError to provide a better error message\n        \"\"\"\n        if (hasattr(e, 'message') and\n            'errorName' in e.message and\n                'message' in e.message):\n            return ('{name}: {message}'.format(\n                    name=e.message['errorName'],\n                    message=e.message['message']))\n        else:\n            return str(e)", "language": "python", "code": "def _get_pretty_exception_message(e):\n        \"\"\"\n        Parses some DatabaseError to provide a better error message\n        \"\"\"\n        if (hasattr(e, 'message') and\n            'errorName' in e.message and\n                'message' in e.message):\n            return ('{name}: {message}'.format(\n                    name=e.message['errorName'],\n                    message=e.message['message']))\n        else:\n            return str(e)", "code_tokens": ["def", "_get_pretty_exception_message", "(", "e", ")", ":", "if", "(", "hasattr", "(", "e", ",", "'message'", ")", "and", "'errorName'", "in", "e", ".", "message", "and", "'message'", "in", "e", ".", "message", ")", ":", "return", "(", "'{name}: {message}'", ".", "format", "(", "name", "=", "e", ".", "message", "[", "'errorName'", "]", ",", "message", "=", "e", ".", "message", "[", "'message'", "]", ")", ")", "else", ":", "return", "str", "(", "e", ")"], "docstring": "Parses some DatabaseError to provide a better error message", "docstring_tokens": ["Parses", "some", "DatabaseError", "to", "provide", "a", "better", "error", "message"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/presto_hook.py#L67-L78", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/presto_hook.py", "func_name": "PrestoHook.get_records", "original_string": "def get_records(self, hql, parameters=None):\n        \"\"\"\n        Get a set of records from Presto\n        \"\"\"\n        try:\n            return super().get_records(\n                self._strip_sql(hql), parameters)\n        except DatabaseError as e:\n            raise PrestoException(self._get_pretty_exception_message(e))", "language": "python", "code": "def get_records(self, hql, parameters=None):\n        \"\"\"\n        Get a set of records from Presto\n        \"\"\"\n        try:\n            return super().get_records(\n                self._strip_sql(hql), parameters)\n        except DatabaseError as e:\n            raise PrestoException(self._get_pretty_exception_message(e))", "code_tokens": ["def", "get_records", "(", "self", ",", "hql", ",", "parameters", "=", "None", ")", ":", "try", ":", "return", "super", "(", ")", ".", "get_records", "(", "self", ".", "_strip_sql", "(", "hql", ")", ",", "parameters", ")", "except", "DatabaseError", "as", "e", ":", "raise", "PrestoException", "(", "self", ".", "_get_pretty_exception_message", "(", "e", ")", ")"], "docstring": "Get a set of records from Presto", "docstring_tokens": ["Get", "a", "set", "of", "records", "from", "Presto"], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/presto_hook.py#L80-L88", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/presto_hook.py", "func_name": "PrestoHook.get_pandas_df", "original_string": "def get_pandas_df(self, hql, parameters=None):\n        \"\"\"\n        Get a pandas dataframe from a sql query.\n        \"\"\"\n        import pandas\n        cursor = self.get_cursor()\n        try:\n            cursor.execute(self._strip_sql(hql), parameters)\n            data = cursor.fetchall()\n        except DatabaseError as e:\n            raise PrestoException(self._get_pretty_exception_message(e))\n        column_descriptions = cursor.description\n        if data:\n            df = pandas.DataFrame(data)\n            df.columns = [c[0] for c in column_descriptions]\n        else:\n            df = pandas.DataFrame()\n        return df", "language": "python", "code": "def get_pandas_df(self, hql, parameters=None):\n        \"\"\"\n        Get a pandas dataframe from a sql query.\n        \"\"\"\n        import pandas\n        cursor = self.get_cursor()\n        try:\n            cursor.execute(self._strip_sql(hql), parameters)\n            data = cursor.fetchall()\n        except DatabaseError as e:\n            raise PrestoException(self._get_pretty_exception_message(e))\n        column_descriptions = cursor.description\n        if data:\n            df = pandas.DataFrame(data)\n            df.columns = [c[0] for c in column_descriptions]\n        else:\n            df = pandas.DataFrame()\n        return df", "code_tokens": ["def", "get_pandas_df", "(", "self", ",", "hql", ",", "parameters", "=", "None", ")", ":", "import", "pandas", "cursor", "=", "self", ".", "get_cursor", "(", ")", "try", ":", "cursor", ".", "execute", "(", "self", ".", "_strip_sql", "(", "hql", ")", ",", "parameters", ")", "data", "=", "cursor", ".", "fetchall", "(", ")", "except", "DatabaseError", "as", "e", ":", "raise", "PrestoException", "(", "self", ".", "_get_pretty_exception_message", "(", "e", ")", ")", "column_descriptions", "=", "cursor", ".", "description", "if", "data", ":", "df", "=", "pandas", ".", "DataFrame", "(", "data", ")", "df", ".", "columns", "=", "[", "c", "[", "0", "]", "for", "c", "in", "column_descriptions", "]", "else", ":", "df", "=", "pandas", ".", "DataFrame", "(", ")", "return", "df"], "docstring": "Get a pandas dataframe from a sql query.", "docstring_tokens": ["Get", "a", "pandas", "dataframe", "from", "a", "sql", "query", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/presto_hook.py#L101-L118", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/presto_hook.py", "func_name": "PrestoHook.run", "original_string": "def run(self, hql, parameters=None):\n        \"\"\"\n        Execute the statement against Presto. Can be used to create views.\n        \"\"\"\n        return super().run(self._strip_sql(hql), parameters)", "language": "python", "code": "def run(self, hql, parameters=None):\n        \"\"\"\n        Execute the statement against Presto. Can be used to create views.\n        \"\"\"\n        return super().run(self._strip_sql(hql), parameters)", "code_tokens": ["def", "run", "(", "self", ",", "hql", ",", "parameters", "=", "None", ")", ":", "return", "super", "(", ")", ".", "run", "(", "self", ".", "_strip_sql", "(", "hql", ")", ",", "parameters", ")"], "docstring": "Execute the statement against Presto. Can be used to create views.", "docstring_tokens": ["Execute", "the", "statement", "against", "Presto", ".", "Can", "be", "used", "to", "create", "views", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/presto_hook.py#L120-L124", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/hooks/presto_hook.py", "func_name": "PrestoHook.insert_rows", "original_string": "def insert_rows(self, table, rows, target_fields=None):\n        \"\"\"\n        A generic way to insert a set of tuples into a table.\n\n        :param table: Name of the target table\n        :type table: str\n        :param rows: The rows to insert into the table\n        :type rows: iterable of tuples\n        :param target_fields: The names of the columns to fill in the table\n        :type target_fields: iterable of strings\n        \"\"\"\n        super().insert_rows(table, rows, target_fields, 0)", "language": "python", "code": "def insert_rows(self, table, rows, target_fields=None):\n        \"\"\"\n        A generic way to insert a set of tuples into a table.\n\n        :param table: Name of the target table\n        :type table: str\n        :param rows: The rows to insert into the table\n        :type rows: iterable of tuples\n        :param target_fields: The names of the columns to fill in the table\n        :type target_fields: iterable of strings\n        \"\"\"\n        super().insert_rows(table, rows, target_fields, 0)", "code_tokens": ["def", "insert_rows", "(", "self", ",", "table", ",", "rows", ",", "target_fields", "=", "None", ")", ":", "super", "(", ")", ".", "insert_rows", "(", "table", ",", "rows", ",", "target_fields", ",", "0", ")"], "docstring": "A generic way to insert a set of tuples into a table.\n\n        :param table: Name of the target table\n        :type table: str\n        :param rows: The rows to insert into the table\n        :type rows: iterable of tuples\n        :param target_fields: The names of the columns to fill in the table\n        :type target_fields: iterable of strings", "docstring_tokens": ["A", "generic", "way", "to", "insert", "a", "set", "of", "tuples", "into", "a", "table", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/hooks/presto_hook.py#L129-L140", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.get_conn", "original_string": "def get_conn(self):\n        \"\"\"\n        Return a cosmos db client.\n        \"\"\"\n        if self.cosmos_client is not None:\n            return self.cosmos_client\n\n        # Initialize the Python Azure Cosmos DB client\n        self.cosmos_client = cosmos_client.CosmosClient(self.endpoint_uri, {'masterKey': self.master_key})\n\n        return self.cosmos_client", "language": "python", "code": "def get_conn(self):\n        \"\"\"\n        Return a cosmos db client.\n        \"\"\"\n        if self.cosmos_client is not None:\n            return self.cosmos_client\n\n        # Initialize the Python Azure Cosmos DB client\n        self.cosmos_client = cosmos_client.CosmosClient(self.endpoint_uri, {'masterKey': self.master_key})\n\n        return self.cosmos_client", "code_tokens": ["def", "get_conn", "(", "self", ")", ":", "if", "self", ".", "cosmos_client", "is", "not", "None", ":", "return", "self", ".", "cosmos_client", "# Initialize the Python Azure Cosmos DB client", "self", ".", "cosmos_client", "=", "cosmos_client", ".", "CosmosClient", "(", "self", ".", "endpoint_uri", ",", "{", "'masterKey'", ":", "self", ".", "master_key", "}", ")", "return", "self", ".", "cosmos_client"], "docstring": "Return a cosmos db client.", "docstring_tokens": ["Return", "a", "cosmos", "db", "client", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L50-L60", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.does_collection_exist", "original_string": "def does_collection_exist(self, collection_name, database_name=None):\n        \"\"\"\n        Checks if a collection exists in CosmosDB.\n        \"\"\"\n        if collection_name is None:\n            raise AirflowBadRequest(\"Collection name cannot be None.\")\n\n        existing_container = list(self.get_conn().QueryContainers(\n            get_database_link(self.__get_database_name(database_name)), {\n                \"query\": \"SELECT * FROM r WHERE r.id=@id\",\n                \"parameters\": [\n                    {\"name\": \"@id\", \"value\": collection_name}\n                ]\n            }))\n        if len(existing_container) == 0:\n            return False\n\n        return True", "language": "python", "code": "def does_collection_exist(self, collection_name, database_name=None):\n        \"\"\"\n        Checks if a collection exists in CosmosDB.\n        \"\"\"\n        if collection_name is None:\n            raise AirflowBadRequest(\"Collection name cannot be None.\")\n\n        existing_container = list(self.get_conn().QueryContainers(\n            get_database_link(self.__get_database_name(database_name)), {\n                \"query\": \"SELECT * FROM r WHERE r.id=@id\",\n                \"parameters\": [\n                    {\"name\": \"@id\", \"value\": collection_name}\n                ]\n            }))\n        if len(existing_container) == 0:\n            return False\n\n        return True", "code_tokens": ["def", "does_collection_exist", "(", "self", ",", "collection_name", ",", "database_name", "=", "None", ")", ":", "if", "collection_name", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"Collection name cannot be None.\"", ")", "existing_container", "=", "list", "(", "self", ".", "get_conn", "(", ")", ".", "QueryContainers", "(", "get_database_link", "(", "self", ".", "__get_database_name", "(", "database_name", ")", ")", ",", "{", "\"query\"", ":", "\"SELECT * FROM r WHERE r.id=@id\"", ",", "\"parameters\"", ":", "[", "{", "\"name\"", ":", "\"@id\"", ",", "\"value\"", ":", "collection_name", "}", "]", "}", ")", ")", "if", "len", "(", "existing_container", ")", "==", "0", ":", "return", "False", "return", "True"], "docstring": "Checks if a collection exists in CosmosDB.", "docstring_tokens": ["Checks", "if", "a", "collection", "exists", "in", "CosmosDB", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L82-L99", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.create_collection", "original_string": "def create_collection(self, collection_name, database_name=None):\n        \"\"\"\n        Creates a new collection in the CosmosDB database.\n        \"\"\"\n        if collection_name is None:\n            raise AirflowBadRequest(\"Collection name cannot be None.\")\n\n        # We need to check to see if this container already exists so we don't try\n        # to create it twice\n        existing_container = list(self.get_conn().QueryContainers(\n            get_database_link(self.__get_database_name(database_name)), {\n                \"query\": \"SELECT * FROM r WHERE r.id=@id\",\n                \"parameters\": [\n                    {\"name\": \"@id\", \"value\": collection_name}\n                ]\n            }))\n\n        # Only create if we did not find it already existing\n        if len(existing_container) == 0:\n            self.get_conn().CreateContainer(\n                get_database_link(self.__get_database_name(database_name)),\n                {\"id\": collection_name})", "language": "python", "code": "def create_collection(self, collection_name, database_name=None):\n        \"\"\"\n        Creates a new collection in the CosmosDB database.\n        \"\"\"\n        if collection_name is None:\n            raise AirflowBadRequest(\"Collection name cannot be None.\")\n\n        # We need to check to see if this container already exists so we don't try\n        # to create it twice\n        existing_container = list(self.get_conn().QueryContainers(\n            get_database_link(self.__get_database_name(database_name)), {\n                \"query\": \"SELECT * FROM r WHERE r.id=@id\",\n                \"parameters\": [\n                    {\"name\": \"@id\", \"value\": collection_name}\n                ]\n            }))\n\n        # Only create if we did not find it already existing\n        if len(existing_container) == 0:\n            self.get_conn().CreateContainer(\n                get_database_link(self.__get_database_name(database_name)),\n                {\"id\": collection_name})", "code_tokens": ["def", "create_collection", "(", "self", ",", "collection_name", ",", "database_name", "=", "None", ")", ":", "if", "collection_name", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"Collection name cannot be None.\"", ")", "# We need to check to see if this container already exists so we don't try", "# to create it twice", "existing_container", "=", "list", "(", "self", ".", "get_conn", "(", ")", ".", "QueryContainers", "(", "get_database_link", "(", "self", ".", "__get_database_name", "(", "database_name", ")", ")", ",", "{", "\"query\"", ":", "\"SELECT * FROM r WHERE r.id=@id\"", ",", "\"parameters\"", ":", "[", "{", "\"name\"", ":", "\"@id\"", ",", "\"value\"", ":", "collection_name", "}", "]", "}", ")", ")", "# Only create if we did not find it already existing", "if", "len", "(", "existing_container", ")", "==", "0", ":", "self", ".", "get_conn", "(", ")", ".", "CreateContainer", "(", "get_database_link", "(", "self", ".", "__get_database_name", "(", "database_name", ")", ")", ",", "{", "\"id\"", ":", "collection_name", "}", ")"], "docstring": "Creates a new collection in the CosmosDB database.", "docstring_tokens": ["Creates", "a", "new", "collection", "in", "the", "CosmosDB", "database", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L101-L122", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.does_database_exist", "original_string": "def does_database_exist(self, database_name):\n        \"\"\"\n        Checks if a database exists in CosmosDB.\n        \"\"\"\n        if database_name is None:\n            raise AirflowBadRequest(\"Database name cannot be None.\")\n\n        existing_database = list(self.get_conn().QueryDatabases({\n            \"query\": \"SELECT * FROM r WHERE r.id=@id\",\n            \"parameters\": [\n                {\"name\": \"@id\", \"value\": database_name}\n            ]\n        }))\n        if len(existing_database) == 0:\n            return False\n\n        return True", "language": "python", "code": "def does_database_exist(self, database_name):\n        \"\"\"\n        Checks if a database exists in CosmosDB.\n        \"\"\"\n        if database_name is None:\n            raise AirflowBadRequest(\"Database name cannot be None.\")\n\n        existing_database = list(self.get_conn().QueryDatabases({\n            \"query\": \"SELECT * FROM r WHERE r.id=@id\",\n            \"parameters\": [\n                {\"name\": \"@id\", \"value\": database_name}\n            ]\n        }))\n        if len(existing_database) == 0:\n            return False\n\n        return True", "code_tokens": ["def", "does_database_exist", "(", "self", ",", "database_name", ")", ":", "if", "database_name", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"Database name cannot be None.\"", ")", "existing_database", "=", "list", "(", "self", ".", "get_conn", "(", ")", ".", "QueryDatabases", "(", "{", "\"query\"", ":", "\"SELECT * FROM r WHERE r.id=@id\"", ",", "\"parameters\"", ":", "[", "{", "\"name\"", ":", "\"@id\"", ",", "\"value\"", ":", "database_name", "}", "]", "}", ")", ")", "if", "len", "(", "existing_database", ")", "==", "0", ":", "return", "False", "return", "True"], "docstring": "Checks if a database exists in CosmosDB.", "docstring_tokens": ["Checks", "if", "a", "database", "exists", "in", "CosmosDB", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L124-L140", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.create_database", "original_string": "def create_database(self, database_name):\n        \"\"\"\n        Creates a new database in CosmosDB.\n        \"\"\"\n        if database_name is None:\n            raise AirflowBadRequest(\"Database name cannot be None.\")\n\n        # We need to check to see if this database already exists so we don't try\n        # to create it twice\n        existing_database = list(self.get_conn().QueryDatabases({\n            \"query\": \"SELECT * FROM r WHERE r.id=@id\",\n            \"parameters\": [\n                {\"name\": \"@id\", \"value\": database_name}\n            ]\n        }))\n\n        # Only create if we did not find it already existing\n        if len(existing_database) == 0:\n            self.get_conn().CreateDatabase({\"id\": database_name})", "language": "python", "code": "def create_database(self, database_name):\n        \"\"\"\n        Creates a new database in CosmosDB.\n        \"\"\"\n        if database_name is None:\n            raise AirflowBadRequest(\"Database name cannot be None.\")\n\n        # We need to check to see if this database already exists so we don't try\n        # to create it twice\n        existing_database = list(self.get_conn().QueryDatabases({\n            \"query\": \"SELECT * FROM r WHERE r.id=@id\",\n            \"parameters\": [\n                {\"name\": \"@id\", \"value\": database_name}\n            ]\n        }))\n\n        # Only create if we did not find it already existing\n        if len(existing_database) == 0:\n            self.get_conn().CreateDatabase({\"id\": database_name})", "code_tokens": ["def", "create_database", "(", "self", ",", "database_name", ")", ":", "if", "database_name", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"Database name cannot be None.\"", ")", "# We need to check to see if this database already exists so we don't try", "# to create it twice", "existing_database", "=", "list", "(", "self", ".", "get_conn", "(", ")", ".", "QueryDatabases", "(", "{", "\"query\"", ":", "\"SELECT * FROM r WHERE r.id=@id\"", ",", "\"parameters\"", ":", "[", "{", "\"name\"", ":", "\"@id\"", ",", "\"value\"", ":", "database_name", "}", "]", "}", ")", ")", "# Only create if we did not find it already existing", "if", "len", "(", "existing_database", ")", "==", "0", ":", "self", ".", "get_conn", "(", ")", ".", "CreateDatabase", "(", "{", "\"id\"", ":", "database_name", "}", ")"], "docstring": "Creates a new database in CosmosDB.", "docstring_tokens": ["Creates", "a", "new", "database", "in", "CosmosDB", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L142-L160", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.delete_database", "original_string": "def delete_database(self, database_name):\n        \"\"\"\n        Deletes an existing database in CosmosDB.\n        \"\"\"\n        if database_name is None:\n            raise AirflowBadRequest(\"Database name cannot be None.\")\n\n        self.get_conn().DeleteDatabase(get_database_link(database_name))", "language": "python", "code": "def delete_database(self, database_name):\n        \"\"\"\n        Deletes an existing database in CosmosDB.\n        \"\"\"\n        if database_name is None:\n            raise AirflowBadRequest(\"Database name cannot be None.\")\n\n        self.get_conn().DeleteDatabase(get_database_link(database_name))", "code_tokens": ["def", "delete_database", "(", "self", ",", "database_name", ")", ":", "if", "database_name", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"Database name cannot be None.\"", ")", "self", ".", "get_conn", "(", ")", ".", "DeleteDatabase", "(", "get_database_link", "(", "database_name", ")", ")"], "docstring": "Deletes an existing database in CosmosDB.", "docstring_tokens": ["Deletes", "an", "existing", "database", "in", "CosmosDB", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L162-L169", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.delete_collection", "original_string": "def delete_collection(self, collection_name, database_name=None):\n        \"\"\"\n        Deletes an existing collection in the CosmosDB database.\n        \"\"\"\n        if collection_name is None:\n            raise AirflowBadRequest(\"Collection name cannot be None.\")\n\n        self.get_conn().DeleteContainer(\n            get_collection_link(self.__get_database_name(database_name), collection_name))", "language": "python", "code": "def delete_collection(self, collection_name, database_name=None):\n        \"\"\"\n        Deletes an existing collection in the CosmosDB database.\n        \"\"\"\n        if collection_name is None:\n            raise AirflowBadRequest(\"Collection name cannot be None.\")\n\n        self.get_conn().DeleteContainer(\n            get_collection_link(self.__get_database_name(database_name), collection_name))", "code_tokens": ["def", "delete_collection", "(", "self", ",", "collection_name", ",", "database_name", "=", "None", ")", ":", "if", "collection_name", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"Collection name cannot be None.\"", ")", "self", ".", "get_conn", "(", ")", ".", "DeleteContainer", "(", "get_collection_link", "(", "self", ".", "__get_database_name", "(", "database_name", ")", ",", "collection_name", ")", ")"], "docstring": "Deletes an existing collection in the CosmosDB database.", "docstring_tokens": ["Deletes", "an", "existing", "collection", "in", "the", "CosmosDB", "database", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L171-L179", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.insert_documents", "original_string": "def insert_documents(self, documents, database_name=None, collection_name=None):\n        \"\"\"\n        Insert a list of new documents into an existing collection in the CosmosDB database.\n        \"\"\"\n        if documents is None:\n            raise AirflowBadRequest(\"You cannot insert empty documents\")\n\n        created_documents = []\n        for single_document in documents:\n            created_documents.append(\n                self.get_conn().CreateItem(\n                    get_collection_link(\n                        self.__get_database_name(database_name),\n                        self.__get_collection_name(collection_name)),\n                    single_document))\n\n        return created_documents", "language": "python", "code": "def insert_documents(self, documents, database_name=None, collection_name=None):\n        \"\"\"\n        Insert a list of new documents into an existing collection in the CosmosDB database.\n        \"\"\"\n        if documents is None:\n            raise AirflowBadRequest(\"You cannot insert empty documents\")\n\n        created_documents = []\n        for single_document in documents:\n            created_documents.append(\n                self.get_conn().CreateItem(\n                    get_collection_link(\n                        self.__get_database_name(database_name),\n                        self.__get_collection_name(collection_name)),\n                    single_document))\n\n        return created_documents", "code_tokens": ["def", "insert_documents", "(", "self", ",", "documents", ",", "database_name", "=", "None", ",", "collection_name", "=", "None", ")", ":", "if", "documents", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"You cannot insert empty documents\"", ")", "created_documents", "=", "[", "]", "for", "single_document", "in", "documents", ":", "created_documents", ".", "append", "(", "self", ".", "get_conn", "(", ")", ".", "CreateItem", "(", "get_collection_link", "(", "self", ".", "__get_database_name", "(", "database_name", ")", ",", "self", ".", "__get_collection_name", "(", "collection_name", ")", ")", ",", "single_document", ")", ")", "return", "created_documents"], "docstring": "Insert a list of new documents into an existing collection in the CosmosDB database.", "docstring_tokens": ["Insert", "a", "list", "of", "new", "documents", "into", "an", "existing", "collection", "in", "the", "CosmosDB", "database", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L208-L224", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.delete_document", "original_string": "def delete_document(self, document_id, database_name=None, collection_name=None):\n        \"\"\"\n        Delete an existing document out of a collection in the CosmosDB database.\n        \"\"\"\n        if document_id is None:\n            raise AirflowBadRequest(\"Cannot delete a document without an id\")\n\n        self.get_conn().DeleteItem(\n            get_document_link(\n                self.__get_database_name(database_name),\n                self.__get_collection_name(collection_name),\n                document_id))", "language": "python", "code": "def delete_document(self, document_id, database_name=None, collection_name=None):\n        \"\"\"\n        Delete an existing document out of a collection in the CosmosDB database.\n        \"\"\"\n        if document_id is None:\n            raise AirflowBadRequest(\"Cannot delete a document without an id\")\n\n        self.get_conn().DeleteItem(\n            get_document_link(\n                self.__get_database_name(database_name),\n                self.__get_collection_name(collection_name),\n                document_id))", "code_tokens": ["def", "delete_document", "(", "self", ",", "document_id", ",", "database_name", "=", "None", ",", "collection_name", "=", "None", ")", ":", "if", "document_id", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"Cannot delete a document without an id\"", ")", "self", ".", "get_conn", "(", ")", ".", "DeleteItem", "(", "get_document_link", "(", "self", ".", "__get_database_name", "(", "database_name", ")", ",", "self", ".", "__get_collection_name", "(", "collection_name", ")", ",", "document_id", ")", ")"], "docstring": "Delete an existing document out of a collection in the CosmosDB database.", "docstring_tokens": ["Delete", "an", "existing", "document", "out", "of", "a", "collection", "in", "the", "CosmosDB", "database", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L226-L237", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.get_document", "original_string": "def get_document(self, document_id, database_name=None, collection_name=None):\n        \"\"\"\n        Get a document from an existing collection in the CosmosDB database.\n        \"\"\"\n        if document_id is None:\n            raise AirflowBadRequest(\"Cannot get a document without an id\")\n\n        try:\n            return self.get_conn().ReadItem(\n                get_document_link(\n                    self.__get_database_name(database_name),\n                    self.__get_collection_name(collection_name),\n                    document_id))\n        except HTTPFailure:\n            return None", "language": "python", "code": "def get_document(self, document_id, database_name=None, collection_name=None):\n        \"\"\"\n        Get a document from an existing collection in the CosmosDB database.\n        \"\"\"\n        if document_id is None:\n            raise AirflowBadRequest(\"Cannot get a document without an id\")\n\n        try:\n            return self.get_conn().ReadItem(\n                get_document_link(\n                    self.__get_database_name(database_name),\n                    self.__get_collection_name(collection_name),\n                    document_id))\n        except HTTPFailure:\n            return None", "code_tokens": ["def", "get_document", "(", "self", ",", "document_id", ",", "database_name", "=", "None", ",", "collection_name", "=", "None", ")", ":", "if", "document_id", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"Cannot get a document without an id\"", ")", "try", ":", "return", "self", ".", "get_conn", "(", ")", ".", "ReadItem", "(", "get_document_link", "(", "self", ".", "__get_database_name", "(", "database_name", ")", ",", "self", ".", "__get_collection_name", "(", "collection_name", ")", ",", "document_id", ")", ")", "except", "HTTPFailure", ":", "return", "None"], "docstring": "Get a document from an existing collection in the CosmosDB database.", "docstring_tokens": ["Get", "a", "document", "from", "an", "existing", "collection", "in", "the", "CosmosDB", "database", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L239-L253", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/azure_cosmos_hook.py", "func_name": "AzureCosmosDBHook.get_documents", "original_string": "def get_documents(self, sql_string, database_name=None, collection_name=None, partition_key=None):\n        \"\"\"\n        Get a list of documents from an existing collection in the CosmosDB database via SQL query.\n        \"\"\"\n        if sql_string is None:\n            raise AirflowBadRequest(\"SQL query string cannot be None\")\n\n        # Query them in SQL\n        query = {'query': sql_string}\n\n        try:\n            result_iterable = self.get_conn().QueryItems(\n                get_collection_link(\n                    self.__get_database_name(database_name),\n                    self.__get_collection_name(collection_name)),\n                query,\n                partition_key)\n\n            return list(result_iterable)\n        except HTTPFailure:\n            return None", "language": "python", "code": "def get_documents(self, sql_string, database_name=None, collection_name=None, partition_key=None):\n        \"\"\"\n        Get a list of documents from an existing collection in the CosmosDB database via SQL query.\n        \"\"\"\n        if sql_string is None:\n            raise AirflowBadRequest(\"SQL query string cannot be None\")\n\n        # Query them in SQL\n        query = {'query': sql_string}\n\n        try:\n            result_iterable = self.get_conn().QueryItems(\n                get_collection_link(\n                    self.__get_database_name(database_name),\n                    self.__get_collection_name(collection_name)),\n                query,\n                partition_key)\n\n            return list(result_iterable)\n        except HTTPFailure:\n            return None", "code_tokens": ["def", "get_documents", "(", "self", ",", "sql_string", ",", "database_name", "=", "None", ",", "collection_name", "=", "None", ",", "partition_key", "=", "None", ")", ":", "if", "sql_string", "is", "None", ":", "raise", "AirflowBadRequest", "(", "\"SQL query string cannot be None\"", ")", "# Query them in SQL", "query", "=", "{", "'query'", ":", "sql_string", "}", "try", ":", "result_iterable", "=", "self", ".", "get_conn", "(", ")", ".", "QueryItems", "(", "get_collection_link", "(", "self", ".", "__get_database_name", "(", "database_name", ")", ",", "self", ".", "__get_collection_name", "(", "collection_name", ")", ")", ",", "query", ",", "partition_key", ")", "return", "list", "(", "result_iterable", ")", "except", "HTTPFailure", ":", "return", "None"], "docstring": "Get a list of documents from an existing collection in the CosmosDB database via SQL query.", "docstring_tokens": ["Get", "a", "list", "of", "documents", "from", "an", "existing", "collection", "in", "the", "CosmosDB", "database", "via", "SQL", "query", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/azure_cosmos_hook.py#L255-L275", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_function_hook.py", "func_name": "GcfHook.get_function", "original_string": "def get_function(self, name):\n        \"\"\"\n        Returns the Cloud Function with the given name.\n\n        :param name: Name of the function.\n        :type name: str\n        :return: A Cloud Functions object representing the function.\n        :rtype: dict\n        \"\"\"\n        return self.get_conn().projects().locations().functions().get(\n            name=name).execute(num_retries=self.num_retries)", "language": "python", "code": "def get_function(self, name):\n        \"\"\"\n        Returns the Cloud Function with the given name.\n\n        :param name: Name of the function.\n        :type name: str\n        :return: A Cloud Functions object representing the function.\n        :rtype: dict\n        \"\"\"\n        return self.get_conn().projects().locations().functions().get(\n            name=name).execute(num_retries=self.num_retries)", "code_tokens": ["def", "get_function", "(", "self", ",", "name", ")", ":", "return", "self", ".", "get_conn", "(", ")", ".", "projects", "(", ")", ".", "locations", "(", ")", ".", "functions", "(", ")", ".", "get", "(", "name", "=", "name", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")"], "docstring": "Returns the Cloud Function with the given name.\n\n        :param name: Name of the function.\n        :type name: str\n        :return: A Cloud Functions object representing the function.\n        :rtype: dict", "docstring_tokens": ["Returns", "the", "Cloud", "Function", "with", "the", "given", "name", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_function_hook.py#L76-L86", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_function_hook.py", "func_name": "GcfHook.create_new_function", "original_string": "def create_new_function(self, location, body, project_id=None):\n        \"\"\"\n        Creates a new function in Cloud Function in the location specified in the body.\n\n        :param location: The location of the function.\n        :type location: str\n        :param body: The body required by the Cloud Functions insert API.\n        :type body: dict\n        :param project_id: Optional, Google Cloud Project project_id where the function belongs.\n            If set to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().projects().locations().functions().create(\n            location=self._full_location(project_id, location),\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(operation_name=operation_name)", "language": "python", "code": "def create_new_function(self, location, body, project_id=None):\n        \"\"\"\n        Creates a new function in Cloud Function in the location specified in the body.\n\n        :param location: The location of the function.\n        :type location: str\n        :param body: The body required by the Cloud Functions insert API.\n        :type body: dict\n        :param project_id: Optional, Google Cloud Project project_id where the function belongs.\n            If set to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None\n        \"\"\"\n        response = self.get_conn().projects().locations().functions().create(\n            location=self._full_location(project_id, location),\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(operation_name=operation_name)", "code_tokens": ["def", "create_new_function", "(", "self", ",", "location", ",", "body", ",", "project_id", "=", "None", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "projects", "(", ")", ".", "locations", "(", ")", ".", "functions", "(", ")", ".", "create", "(", "location", "=", "self", ".", "_full_location", "(", "project_id", ",", "location", ")", ",", "body", "=", "body", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "operation_name", "=", "operation_name", ")"], "docstring": "Creates a new function in Cloud Function in the location specified in the body.\n\n        :param location: The location of the function.\n        :type location: str\n        :param body: The body required by the Cloud Functions insert API.\n        :type body: dict\n        :param project_id: Optional, Google Cloud Project project_id where the function belongs.\n            If set to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: None", "docstring_tokens": ["Creates", "a", "new", "function", "in", "Cloud", "Function", "in", "the", "location", "specified", "in", "the", "body", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_function_hook.py#L89-L107", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_function_hook.py", "func_name": "GcfHook.update_function", "original_string": "def update_function(self, name, body, update_mask):\n        \"\"\"\n        Updates Cloud Functions according to the specified update mask.\n\n        :param name: The name of the function.\n        :type name: str\n        :param body: The body required by the cloud function patch API.\n        :type body: dict\n        :param update_mask: The update mask - array of fields that should be patched.\n        :type update_mask: [str]\n        :return: None\n        \"\"\"\n        response = self.get_conn().projects().locations().functions().patch(\n            updateMask=\",\".join(update_mask),\n            name=name,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(operation_name=operation_name)", "language": "python", "code": "def update_function(self, name, body, update_mask):\n        \"\"\"\n        Updates Cloud Functions according to the specified update mask.\n\n        :param name: The name of the function.\n        :type name: str\n        :param body: The body required by the cloud function patch API.\n        :type body: dict\n        :param update_mask: The update mask - array of fields that should be patched.\n        :type update_mask: [str]\n        :return: None\n        \"\"\"\n        response = self.get_conn().projects().locations().functions().patch(\n            updateMask=\",\".join(update_mask),\n            name=name,\n            body=body\n        ).execute(num_retries=self.num_retries)\n        operation_name = response[\"name\"]\n        self._wait_for_operation_to_complete(operation_name=operation_name)", "code_tokens": ["def", "update_function", "(", "self", ",", "name", ",", "body", ",", "update_mask", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "projects", "(", ")", ".", "locations", "(", ")", ".", "functions", "(", ")", ".", "patch", "(", "updateMask", "=", "\",\"", ".", "join", "(", "update_mask", ")", ",", "name", "=", "name", ",", "body", "=", "body", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "operation_name", "=", "response", "[", "\"name\"", "]", "self", ".", "_wait_for_operation_to_complete", "(", "operation_name", "=", "operation_name", ")"], "docstring": "Updates Cloud Functions according to the specified update mask.\n\n        :param name: The name of the function.\n        :type name: str\n        :param body: The body required by the cloud function patch API.\n        :type body: dict\n        :param update_mask: The update mask - array of fields that should be patched.\n        :type update_mask: [str]\n        :return: None", "docstring_tokens": ["Updates", "Cloud", "Functions", "according", "to", "the", "specified", "update", "mask", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_function_hook.py#L109-L127", "partition": "test"}
{"repo": "apache/airflow", "path": "airflow/contrib/hooks/gcp_function_hook.py", "func_name": "GcfHook.upload_function_zip", "original_string": "def upload_function_zip(self, location, zip_path, project_id=None):\n        \"\"\"\n        Uploads zip file with sources.\n\n        :param location: The location where the function is created.\n        :type location: str\n        :param zip_path: The path of the valid .zip file to upload.\n        :type zip_path: str\n        :param project_id: Optional, Google Cloud Project project_id where the function belongs.\n            If set to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: The upload URL that was returned by generateUploadUrl method.\n        \"\"\"\n        response = self.get_conn().projects().locations().functions().generateUploadUrl(\n            parent=self._full_location(project_id, location)\n        ).execute(num_retries=self.num_retries)\n        upload_url = response.get('uploadUrl')\n        with open(zip_path, 'rb') as fp:\n            requests.put(\n                url=upload_url,\n                data=fp,\n                # Those two headers needs to be specified according to:\n                # https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions/generateUploadUrl\n                # nopep8\n                headers={\n                    'Content-type': 'application/zip',\n                    'x-goog-content-length-range': '0,104857600',\n                }\n            )\n        return upload_url", "language": "python", "code": "def upload_function_zip(self, location, zip_path, project_id=None):\n        \"\"\"\n        Uploads zip file with sources.\n\n        :param location: The location where the function is created.\n        :type location: str\n        :param zip_path: The path of the valid .zip file to upload.\n        :type zip_path: str\n        :param project_id: Optional, Google Cloud Project project_id where the function belongs.\n            If set to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: The upload URL that was returned by generateUploadUrl method.\n        \"\"\"\n        response = self.get_conn().projects().locations().functions().generateUploadUrl(\n            parent=self._full_location(project_id, location)\n        ).execute(num_retries=self.num_retries)\n        upload_url = response.get('uploadUrl')\n        with open(zip_path, 'rb') as fp:\n            requests.put(\n                url=upload_url,\n                data=fp,\n                # Those two headers needs to be specified according to:\n                # https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions/generateUploadUrl\n                # nopep8\n                headers={\n                    'Content-type': 'application/zip',\n                    'x-goog-content-length-range': '0,104857600',\n                }\n            )\n        return upload_url", "code_tokens": ["def", "upload_function_zip", "(", "self", ",", "location", ",", "zip_path", ",", "project_id", "=", "None", ")", ":", "response", "=", "self", ".", "get_conn", "(", ")", ".", "projects", "(", ")", ".", "locations", "(", ")", ".", "functions", "(", ")", ".", "generateUploadUrl", "(", "parent", "=", "self", ".", "_full_location", "(", "project_id", ",", "location", ")", ")", ".", "execute", "(", "num_retries", "=", "self", ".", "num_retries", ")", "upload_url", "=", "response", ".", "get", "(", "'uploadUrl'", ")", "with", "open", "(", "zip_path", ",", "'rb'", ")", "as", "fp", ":", "requests", ".", "put", "(", "url", "=", "upload_url", ",", "data", "=", "fp", ",", "# Those two headers needs to be specified according to:", "# https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions/generateUploadUrl", "# nopep8", "headers", "=", "{", "'Content-type'", ":", "'application/zip'", ",", "'x-goog-content-length-range'", ":", "'0,104857600'", ",", "}", ")", "return", "upload_url"], "docstring": "Uploads zip file with sources.\n\n        :param location: The location where the function is created.\n        :type location: str\n        :param zip_path: The path of the valid .zip file to upload.\n        :type zip_path: str\n        :param project_id: Optional, Google Cloud Project project_id where the function belongs.\n            If set to None or missing, the default project_id from the GCP connection is used.\n        :type project_id: str\n        :return: The upload URL that was returned by generateUploadUrl method.", "docstring_tokens": ["Uploads", "zip", "file", "with", "sources", "."], "sha": "b69c686ad8a0c89b9136bb4b31767257eb7b2597", "url": "https://github.com/apache/airflow/blob/b69c686ad8a0c89b9136bb4b31767257eb7b2597/airflow/contrib/hooks/gcp_function_hook.py#L130-L159", "partition": "test"}