Skip to content

Commit

Permalink
Merge pull request #73 from codefuse-ai/cfamily-opensource
Browse files Browse the repository at this point in the history
feat: Add COREF for C family extractor source code
  • Loading branch information
FunJim authored Aug 29, 2024
2 parents efcb9e0 + 493bc5b commit 7c32be0
Show file tree
Hide file tree
Showing 128 changed files with 54,640 additions and 2 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ Regarding the openness of languages, you can refer to the table below:
| JavaScript | Y | Y | RELEASE |
| Go | Y | Y | RELEASE |
| XML | Y | Y | RELEASE |
| Cfamily | N | N | BETA |
| Cfamily | Y | Y | BETA |
| SQL | Y | Y | BETA |
| Swift | N | N | BETA |
| Properties | Y | Y | BETA |
Expand Down
2 changes: 1 addition & 1 deletion README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ CodeFuse-Query 包括**Sparrow CLI **和CodeFuse-Query**在线服务Query中心*
| JavaScript | Y | Y | RELEASE |
| Go | Y | Y | RELEASE |
| XML | Y | Y | RELEASE |
| Cfamily | N | N | BETA |
| Cfamily | Y | Y | BETA |
| SQL | Y | Y | BETA |
| Swift | N | N | BETA |
| Properties | Y | Y | BETA |
Expand Down
6 changes: 6 additions & 0 deletions language/cfamily/extractor/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# We'll use defaults from the LLVM style, but with 4 columns indentation.
BasedOnStyle: LLVM
IndentWidth: 4
## The column limit.
## A column limit of 0 means that there is no column limit. In this case, clang-format will respect the input’s line breaking decisions within statements.
ColumnLimit: 100
28 changes: 28 additions & 0 deletions language/cfamily/extractor/.clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Specify a comma-separated list of positive and negative globs: positive globs add subsets of checks, while negative globs (prefixed with "-") remove them.
# Current header guard does not follow preferred style [llvm-header-guard] so disable it
Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,-misc-no-recursion,readability-identifier-naming'
CheckOptions:
- key: readability-identifier-naming.ClassCase
value: CamelCase
- key: readability-identifier-naming.EnumCase
value: CamelCase
- key: readability-identifier-naming.FunctionCase
value: camelBack
- key: readability-identifier-naming.MemberCase
value: camelBack
- key: readability-identifier-naming.PrivateMemberPrefix
value: '_'
- key: readability-identifier-naming.ProtectedMemberPrefix
value: '_'
- key: readability-identifier-naming.ParameterCase
value: camelBack
- key: readability-identifier-naming.UnionCase
value: CamelCase
- key: readability-identifier-naming.VariableCase
value: camelBack
- key: readability-identifier-naming.IgnoreMainLikeFunctions
value: 1
- key: readability-redundant-member-init.IgnoreBaseInCopyConstructors
value: 1
- key: modernize-use-default-member-init.UseAssignment
value: 1
17 changes: 17 additions & 0 deletions language/cfamily/extractor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#OS X specific files.
.DS_store

# Nested build directory
/cmake-build-*
/Tests/cmake-build-*

# VS2017 and VSCode config files.
.vscode
.vs

# CLion project configuration
/.idea

# Ignore all bazel-* symlinks. There is no full list since this can change
# based on the name of the directory bazel is cloned into.
/bazel-*
21 changes: 21 additions & 0 deletions language/cfamily/extractor/AST/ASTUtil.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@


#include "ASTUtil.hpp"

using namespace llvm;

/// Get Root-Relative path of the given file
/// \param absoluteFile
/// \param relativeFile
/// \return true if root-relative path solved, or false
bool getRootRelativePath(StringRef &absolutePath, StringRef &relativePath) {
SmallString<256> cwd;
sys::fs::current_path(cwd);
auto npos = cwd.rfind(sys::path::get_separator().data());
if (absolutePath.contains(cwd.substr(0, npos))) {
relativePath = absolutePath.substr(npos);
return true;
}
relativePath = absolutePath;
return false;
}
12 changes: 12 additions & 0 deletions language/cfamily/extractor/AST/ASTUtil.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@


#ifndef COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP
#define COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP

#include <clang/Tooling/Tooling.h>

using namespace llvm;

bool getRootRelativePath(StringRef &absolutePath, StringRef &relativePath);

#endif // COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP
120 changes: 120 additions & 0 deletions language/cfamily/extractor/AST/CorefASTConsumer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@

#ifndef COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP
#define COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP

#include "ASTUtil.hpp"
#include "CorefASTVisitor.hpp"
#include <clang/Tooling/Tooling.h>
#include <llvm/Support/Regex.h>

using namespace llvm;

namespace coref {

class CorefASTConsumer : public clang::ASTConsumer {
private:
const CorefUri _corefUri;
std::set<clang::FileID> _visitedFileIds;
std::unique_ptr<llvm::Regex> _blacklistDirFilter;

inline bool isInBlackListDir(StringRef absolutePath) {
return _blacklistDirFilter && _blacklistDirFilter->match(absolutePath);
}

protected:
/// An override HandleTranslationUnit
/// This method is called when the ASTs for entire translation unit have
/// been parsed. \param astContext
void HandleTranslationUnit(clang::ASTContext &astContext) final {
coref::StorageFacade::transaction([&]() mutable {
// insert entry for Program table, entry could be existed already.
auto programOid = CorefUri::generateCorpusOId(_corefUri.getCorpus());
coref::StorageFacade::insertClassObj(Program{programOid, _corefUri.getCorpus()});

std::unordered_map<CorefOid, File> newVisitFileMap{};
coref::CorefASTVisitor visitor(astContext, _corefUri, programOid, newVisitFileMap);

auto decls = astContext.getTranslationUnitDecl()->decls();
auto &sourceMngr = astContext.getSourceManager();
for (auto &decl : decls) {
auto curFileId = sourceMngr.getFileID(decl->getLocation());

if (_visitedFileIds.find(curFileId) != _visitedFileIds.end()) {
// skip visited files
continue;
}

// skip AST nodes having invalid source location
if (!decl->getLocation().isValid()) {
continue;
}

// todo: need to verify the accuracy of function
// "isInSystemHeader" & "isInSystemMacro"
if (sourceMngr.isInSystemHeader(decl->getLocation()) ||
sourceMngr.isInSystemMacro(decl->getLocation())) {
// skip AST nodes in system headers
_visitedFileIds.insert(curFileId);
continue;
}

StringRef absolutePath = sourceMngr.getFilename(decl->getLocation());
if (absolutePath.empty())
continue;
if (isInBlackListDir(absolutePath)) {
_visitedFileIds.insert(curFileId);
continue;
}

StringRef relativePath;
getRootRelativePath(absolutePath, relativePath);
// note: relativePath would be an absolute path when handling a
// framework file.

auto fileOid =
CorefUri::generateFileOId(_corefUri.getCorpus(), std::string(relativePath));
if (coref::StorageFacade::checkFileObjExist(programOid, fileOid)) {
// skip file that have been added in Sqlite DB
_visitedFileIds.insert(curFileId);
continue;
}

if (newVisitFileMap.find(fileOid) == newVisitFileMap.end()) {
File file{fileOid, std::string(relativePath),
std::string(sys::path::extension(relativePath)),
std::string(sys::path::filename(relativePath)), programOid};
newVisitFileMap.insert({fileOid, std::move(file)});
}
visitor.setExtractFileOid(fileOid);
visitor.TraverseDecl(decl);
}

// traverse the newFileStruct and update the File table
for (auto &[fileOid, f] : newVisitFileMap) {
coref::StorageFacade::insertClassObj(std::move(f));
}

return true;
});
}

public:
CorefASTConsumer(const CorefUri &corefUri, std::vector<std::string> &blacklistDir)
: _corefUri(corefUri) {
std::stringstream regexStr;
bool first = true;
for (auto dir : blacklistDir) {
if (first) {
regexStr << "(" << dir << ")";
first = false;
} else {
regexStr << "|(" << dir << ")";
}
}
_blacklistDirFilter = std::make_unique<llvm::Regex>(regexStr.str());
_blacklistDirFilter->isValid();
};
};
} // namespace coref

#endif // COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP
60 changes: 60 additions & 0 deletions language/cfamily/extractor/AST/CorefASTFrontendAction.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

#ifndef COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP
#define COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP

#include "../Coref/CorefUri.hpp"
#include "CorefASTConsumer.hpp"
#include <clang/Tooling/Tooling.h>
#include <filesystem>

namespace coref {

class CorefASTFrontendAction : public clang::ASTFrontendAction {
private:
std::string _corpus;
std::vector<std::string>& _blacklistDir;

public:
/// Constructor of CorefASTFrontendAction Class
/// \param corpusName
explicit CorefASTFrontendAction(const std::string &corpusName, std::vector<std::string>& blacklistDir)
: clang::ASTFrontendAction(), _corpus(corpusName), _blacklistDir(blacklistDir) {}

protected:
/// An override of CreateASTConsumer
/// \param ci
/// \param input_file
/// \return
std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance &ci,
llvm::StringRef inputFile) override {
// TODO fill in corpus info
auto absoluteInputFile = std::filesystem::absolute(inputFile.str());
auto corefUri = CorefUri(_corpus, absoluteInputFile.string());
llvm::outs() << "Consuming file: " << absoluteInputFile.string()
<< " of corpus: " << _corpus << "\n";
return std::unique_ptr<clang::ASTConsumer>(new coref::CorefASTConsumer(corefUri, _blacklistDir));
}
};

struct CorefFrontendActionFactory : public clang::tooling::FrontendActionFactory {
/// Constructor of CorefFrontendActionFactory Class
/// \param corpusName
explicit CorefFrontendActionFactory(const std::string &corpusName, std::vector<std::string>& blacklistDir)
: clang::tooling::FrontendActionFactory(), _corpusName(corpusName), _blacklistDir(blacklistDir) {}

/// a Factory method to create clang::FrontendAction
/// \return
std::unique_ptr<clang::FrontendAction> create() override {
auto *action =
dynamic_cast<clang::FrontendAction *>(new CorefASTFrontendAction(_corpusName, _blacklistDir));
return std::unique_ptr<clang::FrontendAction>(action);
}

private:
std::string _corpusName;
std::vector<std::string>& _blacklistDir;
};

} // namespace coref

#endif // COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP
Loading

0 comments on commit 7c32be0

Please sign in to comment.