wip part 2

This commit is contained in:
Claudio Maggioni 2023-10-11 13:59:07 +02:00
parent ec511c169a
commit e4cca6d81e
3 changed files with 34380 additions and 24819 deletions

59121
data.csv

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,11 @@ def find_py_files(dir):
yield os.path.join(cwd, file)
def keep_name(name):
return not name.startswith("_") and not "main" in str(name).lower() and \
"test" not in str(name).lower()
class FeatureVisitor(ast.NodeVisitor):
def __init__(self, filename):
@ -21,31 +26,35 @@ class FeatureVisitor(ast.NodeVisitor):
self.rows = []
def visit_FunctionDef(self, node):
self.rows.append({
"name": node.name,
"file": self.filename,
"line": node.lineno,
"type": "function",
"comment": ast.get_docstring(node)
})
if keep_name(node.name):
self.rows.append({
"name": node.name,
"file": self.filename,
"line": node.lineno,
"type": "function",
"comment": ast.get_docstring(node)
})
def visit_MethodDef(self, node):
self.rows.append({
"name": node.name,
"file": self.filename,
"line": node.lineno,
"type": "method",
"comment": ast.get_docstring(node)
})
def visit_ClassDef(self, node):
self.rows.append({
"name": node.name,
"file": self.filename,
"line": node.lineno,
"type": "class",
"comment": ast.get_docstring(node)
})
if keep_name(node.name):
self.rows.append({
"name": node.name,
"file": self.filename,
"line": node.lineno,
"type": "class",
"comment": ast.get_docstring(node)
})
for nd in ast.walk(node):
if isinstance(nd, ast.FunctionDef):
if keep_name(nd.name):
self.rows.append({
"name": nd.name,
"file": self.filename,
"line": nd.lineno,
"type": "method",
"comment": ast.get_docstring(nd)
})
def main():

23
search-data.py Normal file
View File

@ -0,0 +1,23 @@
import re
import argparse
import os
import pandas as pd
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
IN_DATASET = os.path.join(SCRIPT_DIR, "data.csv")
def search(query):
df = pd.read_csv(IN_DATASET)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("query", help="the query to search the corpus with", type=str)
args = parser.parse_args()
search(query)
if __name__ == "__main__":
main()