-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #73 from codefuse-ai/cfamily-opensource
feat: Add COREF for C family extractor source code
- Loading branch information
Showing
128 changed files
with
54,640 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# We'll use defaults from the LLVM style, but with 4 columns indentation. | ||
BasedOnStyle: LLVM | ||
IndentWidth: 4 | ||
## The column limit. | ||
## A column limit of 0 means that there is no column limit. In this case, clang-format will respect the input’s line breaking decisions within statements. | ||
ColumnLimit: 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Specify a comma-separated list of positive and negative globs: positive globs add subsets of checks, while negative globs (prefixed with "-") remove them. | ||
# Current header guard does not follow preferred style [llvm-header-guard] so disable it | ||
Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,-misc-no-recursion,readability-identifier-naming' | ||
CheckOptions: | ||
- key: readability-identifier-naming.ClassCase | ||
value: CamelCase | ||
- key: readability-identifier-naming.EnumCase | ||
value: CamelCase | ||
- key: readability-identifier-naming.FunctionCase | ||
value: camelBack | ||
- key: readability-identifier-naming.MemberCase | ||
value: camelBack | ||
- key: readability-identifier-naming.PrivateMemberPrefix | ||
value: '_' | ||
- key: readability-identifier-naming.ProtectedMemberPrefix | ||
value: '_' | ||
- key: readability-identifier-naming.ParameterCase | ||
value: camelBack | ||
- key: readability-identifier-naming.UnionCase | ||
value: CamelCase | ||
- key: readability-identifier-naming.VariableCase | ||
value: camelBack | ||
- key: readability-identifier-naming.IgnoreMainLikeFunctions | ||
value: 1 | ||
- key: readability-redundant-member-init.IgnoreBaseInCopyConstructors | ||
value: 1 | ||
- key: modernize-use-default-member-init.UseAssignment | ||
value: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#OS X specific files. | ||
.DS_store | ||
|
||
# Nested build directory | ||
/cmake-build-* | ||
/Tests/cmake-build-* | ||
|
||
# VS2017 and VSCode config files. | ||
.vscode | ||
.vs | ||
|
||
# CLion project configuration | ||
/.idea | ||
|
||
# Ignore all bazel-* symlinks. There is no full list since this can change | ||
# based on the name of the directory bazel is cloned into. | ||
/bazel-* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
|
||
|
||
#include "ASTUtil.hpp" | ||
|
||
using namespace llvm; | ||
|
||
/// Get Root-Relative path of the given file | ||
/// \param absoluteFile | ||
/// \param relativeFile | ||
/// \return true if root-relative path solved, or false | ||
bool getRootRelativePath(StringRef &absolutePath, StringRef &relativePath) { | ||
SmallString<256> cwd; | ||
sys::fs::current_path(cwd); | ||
auto npos = cwd.rfind(sys::path::get_separator().data()); | ||
if (absolutePath.contains(cwd.substr(0, npos))) { | ||
relativePath = absolutePath.substr(npos); | ||
return true; | ||
} | ||
relativePath = absolutePath; | ||
return false; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
|
||
|
||
#ifndef COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP | ||
#define COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP | ||
|
||
#include <clang/Tooling/Tooling.h> | ||
|
||
using namespace llvm; | ||
|
||
bool getRootRelativePath(StringRef &absolutePath, StringRef &relativePath); | ||
|
||
#endif // COREF_CFAMILY_SRC_EXTRACTOR_ASTUTIL_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
|
||
#ifndef COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP | ||
#define COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP | ||
|
||
#include "ASTUtil.hpp" | ||
#include "CorefASTVisitor.hpp" | ||
#include <clang/Tooling/Tooling.h> | ||
#include <llvm/Support/Regex.h> | ||
|
||
using namespace llvm; | ||
|
||
namespace coref { | ||
|
||
class CorefASTConsumer : public clang::ASTConsumer { | ||
private: | ||
const CorefUri _corefUri; | ||
std::set<clang::FileID> _visitedFileIds; | ||
std::unique_ptr<llvm::Regex> _blacklistDirFilter; | ||
|
||
inline bool isInBlackListDir(StringRef absolutePath) { | ||
return _blacklistDirFilter && _blacklistDirFilter->match(absolutePath); | ||
} | ||
|
||
protected: | ||
/// An override HandleTranslationUnit | ||
/// This method is called when the ASTs for entire translation unit have | ||
/// been parsed. \param astContext | ||
void HandleTranslationUnit(clang::ASTContext &astContext) final { | ||
coref::StorageFacade::transaction([&]() mutable { | ||
// insert entry for Program table, entry could be existed already. | ||
auto programOid = CorefUri::generateCorpusOId(_corefUri.getCorpus()); | ||
coref::StorageFacade::insertClassObj(Program{programOid, _corefUri.getCorpus()}); | ||
|
||
std::unordered_map<CorefOid, File> newVisitFileMap{}; | ||
coref::CorefASTVisitor visitor(astContext, _corefUri, programOid, newVisitFileMap); | ||
|
||
auto decls = astContext.getTranslationUnitDecl()->decls(); | ||
auto &sourceMngr = astContext.getSourceManager(); | ||
for (auto &decl : decls) { | ||
auto curFileId = sourceMngr.getFileID(decl->getLocation()); | ||
|
||
if (_visitedFileIds.find(curFileId) != _visitedFileIds.end()) { | ||
// skip visited files | ||
continue; | ||
} | ||
|
||
// skip AST nodes having invalid source location | ||
if (!decl->getLocation().isValid()) { | ||
continue; | ||
} | ||
|
||
// todo: need to verify the accuracy of function | ||
// "isInSystemHeader" & "isInSystemMacro" | ||
if (sourceMngr.isInSystemHeader(decl->getLocation()) || | ||
sourceMngr.isInSystemMacro(decl->getLocation())) { | ||
// skip AST nodes in system headers | ||
_visitedFileIds.insert(curFileId); | ||
continue; | ||
} | ||
|
||
StringRef absolutePath = sourceMngr.getFilename(decl->getLocation()); | ||
if (absolutePath.empty()) | ||
continue; | ||
if (isInBlackListDir(absolutePath)) { | ||
_visitedFileIds.insert(curFileId); | ||
continue; | ||
} | ||
|
||
StringRef relativePath; | ||
getRootRelativePath(absolutePath, relativePath); | ||
// note: relativePath would be an absolute path when handling a | ||
// framework file. | ||
|
||
auto fileOid = | ||
CorefUri::generateFileOId(_corefUri.getCorpus(), std::string(relativePath)); | ||
if (coref::StorageFacade::checkFileObjExist(programOid, fileOid)) { | ||
// skip file that have been added in Sqlite DB | ||
_visitedFileIds.insert(curFileId); | ||
continue; | ||
} | ||
|
||
if (newVisitFileMap.find(fileOid) == newVisitFileMap.end()) { | ||
File file{fileOid, std::string(relativePath), | ||
std::string(sys::path::extension(relativePath)), | ||
std::string(sys::path::filename(relativePath)), programOid}; | ||
newVisitFileMap.insert({fileOid, std::move(file)}); | ||
} | ||
visitor.setExtractFileOid(fileOid); | ||
visitor.TraverseDecl(decl); | ||
} | ||
|
||
// traverse the newFileStruct and update the File table | ||
for (auto &[fileOid, f] : newVisitFileMap) { | ||
coref::StorageFacade::insertClassObj(std::move(f)); | ||
} | ||
|
||
return true; | ||
}); | ||
} | ||
|
||
public: | ||
CorefASTConsumer(const CorefUri &corefUri, std::vector<std::string> &blacklistDir) | ||
: _corefUri(corefUri) { | ||
std::stringstream regexStr; | ||
bool first = true; | ||
for (auto dir : blacklistDir) { | ||
if (first) { | ||
regexStr << "(" << dir << ")"; | ||
first = false; | ||
} else { | ||
regexStr << "|(" << dir << ")"; | ||
} | ||
} | ||
_blacklistDirFilter = std::make_unique<llvm::Regex>(regexStr.str()); | ||
_blacklistDirFilter->isValid(); | ||
}; | ||
}; | ||
} // namespace coref | ||
|
||
#endif // COREF_CFAMILY_SRC_EXTRACTOR_COREFASTCONSUMER_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
|
||
#ifndef COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP | ||
#define COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP | ||
|
||
#include "../Coref/CorefUri.hpp" | ||
#include "CorefASTConsumer.hpp" | ||
#include <clang/Tooling/Tooling.h> | ||
#include <filesystem> | ||
|
||
namespace coref { | ||
|
||
class CorefASTFrontendAction : public clang::ASTFrontendAction { | ||
private: | ||
std::string _corpus; | ||
std::vector<std::string>& _blacklistDir; | ||
|
||
public: | ||
/// Constructor of CorefASTFrontendAction Class | ||
/// \param corpusName | ||
explicit CorefASTFrontendAction(const std::string &corpusName, std::vector<std::string>& blacklistDir) | ||
: clang::ASTFrontendAction(), _corpus(corpusName), _blacklistDir(blacklistDir) {} | ||
|
||
protected: | ||
/// An override of CreateASTConsumer | ||
/// \param ci | ||
/// \param input_file | ||
/// \return | ||
std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance &ci, | ||
llvm::StringRef inputFile) override { | ||
// TODO fill in corpus info | ||
auto absoluteInputFile = std::filesystem::absolute(inputFile.str()); | ||
auto corefUri = CorefUri(_corpus, absoluteInputFile.string()); | ||
llvm::outs() << "Consuming file: " << absoluteInputFile.string() | ||
<< " of corpus: " << _corpus << "\n"; | ||
return std::unique_ptr<clang::ASTConsumer>(new coref::CorefASTConsumer(corefUri, _blacklistDir)); | ||
} | ||
}; | ||
|
||
struct CorefFrontendActionFactory : public clang::tooling::FrontendActionFactory { | ||
/// Constructor of CorefFrontendActionFactory Class | ||
/// \param corpusName | ||
explicit CorefFrontendActionFactory(const std::string &corpusName, std::vector<std::string>& blacklistDir) | ||
: clang::tooling::FrontendActionFactory(), _corpusName(corpusName), _blacklistDir(blacklistDir) {} | ||
|
||
/// a Factory method to create clang::FrontendAction | ||
/// \return | ||
std::unique_ptr<clang::FrontendAction> create() override { | ||
auto *action = | ||
dynamic_cast<clang::FrontendAction *>(new CorefASTFrontendAction(_corpusName, _blacklistDir)); | ||
return std::unique_ptr<clang::FrontendAction>(action); | ||
} | ||
|
||
private: | ||
std::string _corpusName; | ||
std::vector<std::string>& _blacklistDir; | ||
}; | ||
|
||
} // namespace coref | ||
|
||
#endif // COREF_CFAMILY_SRC_EXTRACTOR_COREFASTFRONTENDACTION_HPP |
Oops, something went wrong.